From 77d8f3068c63ee0983f0b5ba3207d3f7cce11be4 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:10 +0800 Subject: [PATCH 0001/1894] clk: imx: scu: add two cells binding support This patch implements the new two cells binding for SCU clocks. The usage is as follows: clocks = <&uart0_clk IMX_SC_R_UART_0 IMX_SC_PM_CLK_PER> Due to each SCU clock is associated with a power domain, without power on the domain, the SCU clock can't work. So we create platform devices for each domain clock respectively and manually attach the required domain before register the clock devices, then we can register clocks in the clock platform driver accordingly. Note because we do not have power domain info in device tree and the SCU resource ID is the same for power domain and clock, so we use resource ID to find power domains. Later, we will also use this clock platform driver to support suspend/resume and runtime pm. Cc: Stephen Boyd Cc: Shawn Guo Cc: Sascha Hauer Cc: Michael Turquette Signed-off-by: Dong Aisheng Reviewed-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8qxp.c | 136 ++++++++++++++++------------- drivers/clk/imx/clk-scu.c | 156 +++++++++++++++++++++++++++++++++- drivers/clk/imx/clk-scu.h | 27 ++++-- 3 files changed, 252 insertions(+), 67 deletions(-) diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index d650ca33cdc80..5b3d4ede7c7c4 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -22,9 +22,10 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) struct device_node *ccm_node = pdev->dev.of_node; struct clk_hw_onecell_data *clk_data; struct clk_hw **clks; + u32 clk_cells; int ret, i; - ret = imx_clk_scu_init(); + ret = imx_clk_scu_init(ccm_node); if (ret) return ret; @@ -33,6 +34,9 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; + if (of_property_read_u32(ccm_node, "#clock-cells", &clk_cells)) + return -EINVAL; + clk_data->num = IMX_SCU_CLK_END; clks = clk_data->hws; @@ -55,78 +59,78 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) clks[IMX_LSIO_BUS_CLK] = clk_hw_register_fixed_rate(NULL, "lsio_bus_clk_root", NULL, 0, 100000000); /* ARM core */ - clks[IMX_A35_CLK] = imx_clk_scu("a35_clk", IMX_SC_R_A35, IMX_SC_PM_CLK_CPU); + clks[IMX_A35_CLK] = imx_clk_scu("a35_clk", IMX_SC_R_A35, IMX_SC_PM_CLK_CPU, clk_cells); /* LSIO SS */ - clks[IMX_LSIO_PWM0_CLK] = imx_clk_scu("pwm0_clk", IMX_SC_R_PWM_0, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_PWM1_CLK] = imx_clk_scu("pwm1_clk", IMX_SC_R_PWM_1, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_PWM2_CLK] = imx_clk_scu("pwm2_clk", IMX_SC_R_PWM_2, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_PWM3_CLK] = imx_clk_scu("pwm3_clk", IMX_SC_R_PWM_3, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_PWM4_CLK] = imx_clk_scu("pwm4_clk", IMX_SC_R_PWM_4, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_PWM5_CLK] = imx_clk_scu("pwm5_clk", IMX_SC_R_PWM_5, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_PWM6_CLK] = imx_clk_scu("pwm6_clk", IMX_SC_R_PWM_6, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_PWM7_CLK] = imx_clk_scu("pwm7_clk", IMX_SC_R_PWM_7, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_GPT0_CLK] = imx_clk_scu("gpt0_clk", IMX_SC_R_GPT_0, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_GPT1_CLK] = imx_clk_scu("gpt1_clk", IMX_SC_R_GPT_1, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_GPT2_CLK] = imx_clk_scu("gpt2_clk", IMX_SC_R_GPT_2, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_GPT3_CLK] = imx_clk_scu("gpt3_clk", IMX_SC_R_GPT_3, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_GPT4_CLK] = imx_clk_scu("gpt4_clk", IMX_SC_R_GPT_4, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_FSPI0_CLK] = imx_clk_scu("fspi0_clk", IMX_SC_R_FSPI_0, IMX_SC_PM_CLK_PER); - clks[IMX_LSIO_FSPI1_CLK] = imx_clk_scu("fspi1_clk", IMX_SC_R_FSPI_1, IMX_SC_PM_CLK_PER); + clks[IMX_LSIO_PWM0_CLK] = imx_clk_scu("pwm0_clk", IMX_SC_R_PWM_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_PWM1_CLK] = imx_clk_scu("pwm1_clk", IMX_SC_R_PWM_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_PWM2_CLK] = imx_clk_scu("pwm2_clk", IMX_SC_R_PWM_2, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_PWM3_CLK] = imx_clk_scu("pwm3_clk", IMX_SC_R_PWM_3, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_PWM4_CLK] = imx_clk_scu("pwm4_clk", IMX_SC_R_PWM_4, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_PWM5_CLK] = imx_clk_scu("pwm5_clk", IMX_SC_R_PWM_5, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_PWM6_CLK] = imx_clk_scu("pwm6_clk", IMX_SC_R_PWM_6, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_PWM7_CLK] = imx_clk_scu("pwm7_clk", IMX_SC_R_PWM_7, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_GPT0_CLK] = imx_clk_scu("gpt0_clk", IMX_SC_R_GPT_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_GPT1_CLK] = imx_clk_scu("gpt1_clk", IMX_SC_R_GPT_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_GPT2_CLK] = imx_clk_scu("gpt2_clk", IMX_SC_R_GPT_2, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_GPT3_CLK] = imx_clk_scu("gpt3_clk", IMX_SC_R_GPT_3, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_GPT4_CLK] = imx_clk_scu("gpt4_clk", IMX_SC_R_GPT_4, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_FSPI0_CLK] = imx_clk_scu("fspi0_clk", IMX_SC_R_FSPI_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_LSIO_FSPI1_CLK] = imx_clk_scu("fspi1_clk", IMX_SC_R_FSPI_1, IMX_SC_PM_CLK_PER, clk_cells); /* ADMA SS */ - clks[IMX_ADMA_UART0_CLK] = imx_clk_scu("uart0_clk", IMX_SC_R_UART_0, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_UART1_CLK] = imx_clk_scu("uart1_clk", IMX_SC_R_UART_1, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_UART2_CLK] = imx_clk_scu("uart2_clk", IMX_SC_R_UART_2, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_UART3_CLK] = imx_clk_scu("uart3_clk", IMX_SC_R_UART_3, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_SPI0_CLK] = imx_clk_scu("spi0_clk", IMX_SC_R_SPI_0, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_SPI1_CLK] = imx_clk_scu("spi1_clk", IMX_SC_R_SPI_1, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_SPI2_CLK] = imx_clk_scu("spi2_clk", IMX_SC_R_SPI_2, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_SPI3_CLK] = imx_clk_scu("spi3_clk", IMX_SC_R_SPI_3, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_CAN0_CLK] = imx_clk_scu("can0_clk", IMX_SC_R_CAN_0, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_I2C0_CLK] = imx_clk_scu("i2c0_clk", IMX_SC_R_I2C_0, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_I2C1_CLK] = imx_clk_scu("i2c1_clk", IMX_SC_R_I2C_1, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_I2C2_CLK] = imx_clk_scu("i2c2_clk", IMX_SC_R_I2C_2, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_I2C3_CLK] = imx_clk_scu("i2c3_clk", IMX_SC_R_I2C_3, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_FTM0_CLK] = imx_clk_scu("ftm0_clk", IMX_SC_R_FTM_0, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_FTM1_CLK] = imx_clk_scu("ftm1_clk", IMX_SC_R_FTM_1, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_ADC0_CLK] = imx_clk_scu("adc0_clk", IMX_SC_R_ADC_0, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_PWM_CLK] = imx_clk_scu("pwm_clk", IMX_SC_R_LCD_0_PWM_0, IMX_SC_PM_CLK_PER); - clks[IMX_ADMA_LCD_CLK] = imx_clk_scu("lcd_clk", IMX_SC_R_LCD_0, IMX_SC_PM_CLK_PER); + clks[IMX_ADMA_UART0_CLK] = imx_clk_scu("uart0_clk", IMX_SC_R_UART_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_UART1_CLK] = imx_clk_scu("uart1_clk", IMX_SC_R_UART_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_UART2_CLK] = imx_clk_scu("uart2_clk", IMX_SC_R_UART_2, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_UART3_CLK] = imx_clk_scu("uart3_clk", IMX_SC_R_UART_3, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_SPI0_CLK] = imx_clk_scu("spi0_clk", IMX_SC_R_SPI_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_SPI1_CLK] = imx_clk_scu("spi1_clk", IMX_SC_R_SPI_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_SPI2_CLK] = imx_clk_scu("spi2_clk", IMX_SC_R_SPI_2, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_SPI3_CLK] = imx_clk_scu("spi3_clk", IMX_SC_R_SPI_3, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_CAN0_CLK] = imx_clk_scu("can0_clk", IMX_SC_R_CAN_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_I2C0_CLK] = imx_clk_scu("i2c0_clk", IMX_SC_R_I2C_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_I2C1_CLK] = imx_clk_scu("i2c1_clk", IMX_SC_R_I2C_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_I2C2_CLK] = imx_clk_scu("i2c2_clk", IMX_SC_R_I2C_2, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_I2C3_CLK] = imx_clk_scu("i2c3_clk", IMX_SC_R_I2C_3, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_FTM0_CLK] = imx_clk_scu("ftm0_clk", IMX_SC_R_FTM_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_FTM1_CLK] = imx_clk_scu("ftm1_clk", IMX_SC_R_FTM_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_ADC0_CLK] = imx_clk_scu("adc0_clk", IMX_SC_R_ADC_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_PWM_CLK] = imx_clk_scu("pwm_clk", IMX_SC_R_LCD_0_PWM_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_ADMA_LCD_CLK] = imx_clk_scu("lcd_clk", IMX_SC_R_LCD_0, IMX_SC_PM_CLK_PER, clk_cells); /* Connectivity */ - clks[IMX_CONN_SDHC0_CLK] = imx_clk_scu("sdhc0_clk", IMX_SC_R_SDHC_0, IMX_SC_PM_CLK_PER); - clks[IMX_CONN_SDHC1_CLK] = imx_clk_scu("sdhc1_clk", IMX_SC_R_SDHC_1, IMX_SC_PM_CLK_PER); - clks[IMX_CONN_SDHC2_CLK] = imx_clk_scu("sdhc2_clk", IMX_SC_R_SDHC_2, IMX_SC_PM_CLK_PER); - clks[IMX_CONN_ENET0_ROOT_CLK] = imx_clk_scu("enet0_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_PER); - clks[IMX_CONN_ENET0_BYPASS_CLK] = imx_clk_scu("enet0_bypass_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_BYPASS); - clks[IMX_CONN_ENET0_RGMII_CLK] = imx_clk_scu("enet0_rgmii_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_MISC0); - clks[IMX_CONN_ENET1_ROOT_CLK] = imx_clk_scu("enet1_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_PER); - clks[IMX_CONN_ENET1_BYPASS_CLK] = imx_clk_scu("enet1_bypass_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_BYPASS); - clks[IMX_CONN_ENET1_RGMII_CLK] = imx_clk_scu("enet1_rgmii_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_MISC0); - clks[IMX_CONN_GPMI_BCH_IO_CLK] = imx_clk_scu("gpmi_io_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_MST_BUS); - clks[IMX_CONN_GPMI_BCH_CLK] = imx_clk_scu("gpmi_bch_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_PER); - clks[IMX_CONN_USB2_ACLK] = imx_clk_scu("usb3_aclk_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_PER); - clks[IMX_CONN_USB2_BUS_CLK] = imx_clk_scu("usb3_bus_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MST_BUS); - clks[IMX_CONN_USB2_LPM_CLK] = imx_clk_scu("usb3_lpm_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MISC); + clks[IMX_CONN_SDHC0_CLK] = imx_clk_scu("sdhc0_clk", IMX_SC_R_SDHC_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CONN_SDHC1_CLK] = imx_clk_scu("sdhc1_clk", IMX_SC_R_SDHC_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CONN_SDHC2_CLK] = imx_clk_scu("sdhc2_clk", IMX_SC_R_SDHC_2, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CONN_ENET0_ROOT_CLK] = imx_clk_scu("enet0_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CONN_ENET0_BYPASS_CLK] = imx_clk_scu("enet0_bypass_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_BYPASS, clk_cells); + clks[IMX_CONN_ENET0_RGMII_CLK] = imx_clk_scu("enet0_rgmii_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_MISC0, clk_cells); + clks[IMX_CONN_ENET1_ROOT_CLK] = imx_clk_scu("enet1_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CONN_ENET1_BYPASS_CLK] = imx_clk_scu("enet1_bypass_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_BYPASS, clk_cells); + clks[IMX_CONN_ENET1_RGMII_CLK] = imx_clk_scu("enet1_rgmii_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_MISC0, clk_cells); + clks[IMX_CONN_GPMI_BCH_IO_CLK] = imx_clk_scu("gpmi_io_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_MST_BUS, clk_cells); + clks[IMX_CONN_GPMI_BCH_CLK] = imx_clk_scu("gpmi_bch_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CONN_USB2_ACLK] = imx_clk_scu("usb3_aclk_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CONN_USB2_BUS_CLK] = imx_clk_scu("usb3_bus_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MST_BUS, clk_cells); + clks[IMX_CONN_USB2_LPM_CLK] = imx_clk_scu("usb3_lpm_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MISC, clk_cells); /* Display controller SS */ - clks[IMX_DC0_DISP0_CLK] = imx_clk_scu("dc0_disp0_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC0); - clks[IMX_DC0_DISP1_CLK] = imx_clk_scu("dc0_disp1_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC1); + clks[IMX_DC0_DISP0_CLK] = imx_clk_scu("dc0_disp0_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC0, clk_cells); + clks[IMX_DC0_DISP1_CLK] = imx_clk_scu("dc0_disp1_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC1, clk_cells); /* MIPI-LVDS SS */ - clks[IMX_MIPI0_I2C0_CLK] = imx_clk_scu("mipi0_i2c0_clk", IMX_SC_R_MIPI_0_I2C_0, IMX_SC_PM_CLK_MISC2); - clks[IMX_MIPI0_I2C1_CLK] = imx_clk_scu("mipi0_i2c1_clk", IMX_SC_R_MIPI_0_I2C_1, IMX_SC_PM_CLK_MISC2); + clks[IMX_MIPI0_I2C0_CLK] = imx_clk_scu("mipi0_i2c0_clk", IMX_SC_R_MIPI_0_I2C_0, IMX_SC_PM_CLK_MISC2, clk_cells); + clks[IMX_MIPI0_I2C1_CLK] = imx_clk_scu("mipi0_i2c1_clk", IMX_SC_R_MIPI_0_I2C_1, IMX_SC_PM_CLK_MISC2, clk_cells); /* MIPI CSI SS */ - clks[IMX_CSI0_CORE_CLK] = imx_clk_scu("mipi_csi0_core_clk", IMX_SC_R_CSI_0, IMX_SC_PM_CLK_PER); - clks[IMX_CSI0_ESC_CLK] = imx_clk_scu("mipi_csi0_esc_clk", IMX_SC_R_CSI_0, IMX_SC_PM_CLK_MISC); - clks[IMX_CSI0_I2C0_CLK] = imx_clk_scu("mipi_csi0_i2c0_clk", IMX_SC_R_CSI_0_I2C_0, IMX_SC_PM_CLK_PER); - clks[IMX_CSI0_PWM0_CLK] = imx_clk_scu("mipi_csi0_pwm0_clk", IMX_SC_R_CSI_0_PWM_0, IMX_SC_PM_CLK_PER); + clks[IMX_CSI0_CORE_CLK] = imx_clk_scu("mipi_csi0_core_clk", IMX_SC_R_CSI_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CSI0_ESC_CLK] = imx_clk_scu("mipi_csi0_esc_clk", IMX_SC_R_CSI_0, IMX_SC_PM_CLK_MISC, clk_cells); + clks[IMX_CSI0_I2C0_CLK] = imx_clk_scu("mipi_csi0_i2c0_clk", IMX_SC_R_CSI_0_I2C_0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_CSI0_PWM0_CLK] = imx_clk_scu("mipi_csi0_pwm0_clk", IMX_SC_R_CSI_0_PWM_0, IMX_SC_PM_CLK_PER, clk_cells); /* GPU SS */ - clks[IMX_GPU0_CORE_CLK] = imx_clk_scu("gpu_core0_clk", IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_PER); - clks[IMX_GPU0_SHADER_CLK] = imx_clk_scu("gpu_shader0_clk", IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_MISC); + clks[IMX_GPU0_CORE_CLK] = imx_clk_scu("gpu_core0_clk", IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_PER, clk_cells); + clks[IMX_GPU0_SHADER_CLK] = imx_clk_scu("gpu_shader0_clk", IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_MISC, clk_cells); for (i = 0; i < clk_data->num; i++) { if (IS_ERR(clks[i])) @@ -134,7 +138,19 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) i, PTR_ERR(clks[i])); } - return of_clk_add_hw_provider(ccm_node, of_clk_hw_onecell_get, clk_data); + if (clk_cells == 2) { + ret = of_clk_add_hw_provider(ccm_node, imx_scu_of_clk_src_get, imx_scu_clks); + if (ret) + imx_clk_scu_unregister(); + } else { + /* + * legacy binding code path doesn't unregister here because + * it will be removed later. + */ + ret = of_clk_add_hw_provider(ccm_node, of_clk_hw_onecell_get, clk_data); + } + + return ret; } static const struct of_device_id imx8qxp_match[] = { diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index b8b2072742a56..e5837e7caa50f 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -8,6 +8,9 @@ #include #include #include +#include +#include +#include #include #include "clk-scu.h" @@ -16,6 +19,20 @@ #define IMX_SIP_SET_CPUFREQ 0x00 static struct imx_sc_ipc *ccm_ipc_handle; +struct device_node *pd_np; + +struct imx_scu_clk_node { + const char *name; + u32 rsrc; + u8 clk_type; + const char * const *parents; + int num_parents; + + struct clk_hw *hw; + struct list_head node; +}; + +struct list_head imx_scu_clks[IMX_SC_R_LAST]; /* * struct clk_scu - Description of one SCU clock @@ -128,9 +145,35 @@ static inline struct clk_scu *to_clk_scu(struct clk_hw *hw) return container_of(hw, struct clk_scu, hw); } -int imx_clk_scu_init(void) +int imx_clk_scu_init(struct device_node *np) { - return imx_scu_get_handle(&ccm_ipc_handle); + struct platform_device *pd_dev; + u32 clk_cells; + int ret, i; + + ret = imx_scu_get_handle(&ccm_ipc_handle); + if (ret) + return ret; + + of_property_read_u32(np, "#clock-cells", &clk_cells); + + if (clk_cells == 2) { + for (i = 0; i < IMX_SC_R_LAST; i++) + INIT_LIST_HEAD(&imx_scu_clks[i]); + /* + * Note: SCU clock driver depends on SCU power domain to be ready + * first. As there're no power domains under scu clock node in dts, + * we can't use PROBE_DEFER automatically. + */ + pd_np = of_find_compatible_node(NULL, NULL, "fsl,scu-pd"); + pd_dev = of_find_device_by_node(pd_np); + if (!pd_dev || !device_is_bound(&pd_dev->dev)) { + of_node_put(pd_np); + return -EPROBE_DEFER; + } + } + + return 0; } /* @@ -387,3 +430,112 @@ struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents, return hw; } + +struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec, + void *data) +{ + unsigned int rsrc = clkspec->args[0]; + unsigned int idx = clkspec->args[1]; + struct list_head *scu_clks = data; + struct imx_scu_clk_node *clk; + + list_for_each_entry(clk, &scu_clks[rsrc], node) { + if (clk->clk_type == idx) + return clk->hw; + } + + return ERR_PTR(-ENODEV); +} + +static int imx_clk_scu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct imx_scu_clk_node *clk = dev_get_platdata(dev); + struct clk_hw *hw; + + hw = __imx_clk_scu(clk->name, clk->parents, clk->num_parents, + clk->rsrc, clk->clk_type); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + clk->hw = hw; + list_add_tail(&clk->node, &imx_scu_clks[clk->rsrc]); + + dev_dbg(dev, "register SCU clock rsrc:%d type:%d\n", clk->rsrc, + clk->clk_type); + + return 0; +} + +static struct platform_driver imx_clk_scu_driver = { + .driver = { + .name = "imx-scu-clk", + .suppress_bind_attrs = true, + }, + .probe = imx_clk_scu_probe, +}; +builtin_platform_driver(imx_clk_scu_driver); + +static int imx_clk_scu_attach_pd(struct device *dev, u32 rsrc_id) +{ + struct of_phandle_args genpdspec = { + .np = pd_np, + .args_count = 1, + .args[0] = rsrc_id, + }; + + return of_genpd_add_device(&genpdspec, dev); +} + +struct clk_hw *imx_clk_scu_alloc_dev(const char *name, + const char * const *parents, + int num_parents, u32 rsrc_id, u8 clk_type) +{ + struct imx_scu_clk_node clk = { + .name = name, + .rsrc = rsrc_id, + .clk_type = clk_type, + .parents = parents, + .num_parents = num_parents, + }; + struct platform_device *pdev; + int ret; + + pdev = platform_device_alloc(name, PLATFORM_DEVID_NONE); + if (!pdev) { + pr_err("%s: failed to allocate scu clk dev rsrc %d type %d\n", + name, rsrc_id, clk_type); + return ERR_PTR(-ENOMEM); + } + + ret = platform_device_add_data(pdev, &clk, sizeof(clk)); + if (ret) { + platform_device_put(pdev); + return ERR_PTR(ret); + } + + pdev->driver_override = "imx-scu-clk"; + + ret = imx_clk_scu_attach_pd(&pdev->dev, rsrc_id); + if (ret) + pr_warn("%s: failed to attached the power domain %d\n", + name, ret); + + platform_device_add(pdev); + + /* For API backwards compatiblilty, simply return NULL for success */ + return NULL; +} + +void imx_clk_scu_unregister(void) +{ + struct imx_scu_clk_node *clk; + int i; + + for (i = 0; i < IMX_SC_R_LAST; i++) { + list_for_each_entry(clk, &imx_scu_clks[i], node) { + clk_hw_unregister(clk->hw); + kfree(clk); + } + } +} diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index 2bcfaf06a4586..f7480898ea74e 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h @@ -8,22 +8,39 @@ #define __IMX_CLK_SCU_H #include +#include -int imx_clk_scu_init(void); +extern struct list_head imx_scu_clks[]; + +int imx_clk_scu_init(struct device_node *np); +struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec, + void *data); +struct clk_hw *imx_clk_scu_alloc_dev(const char *name, + const char * const *parents, + int num_parents, u32 rsrc_id, u8 clk_type); struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents, int num_parents, u32 rsrc_id, u8 clk_type); +void imx_clk_scu_unregister(void); + static inline struct clk_hw *imx_clk_scu(const char *name, u32 rsrc_id, - u8 clk_type) + u8 clk_type, u8 clk_cells) { - return __imx_clk_scu(name, NULL, 0, rsrc_id, clk_type); + if (clk_cells == 2) + return imx_clk_scu_alloc_dev(name, NULL, 0, rsrc_id, clk_type); + else + return __imx_clk_scu(name, NULL, 0, rsrc_id, clk_type); } static inline struct clk_hw *imx_clk_scu2(const char *name, const char * const *parents, - int num_parents, u32 rsrc_id, u8 clk_type) + int num_parents, u32 rsrc_id, u8 clk_type, + u8 clk_cells) { - return __imx_clk_scu(name, parents, num_parents, rsrc_id, clk_type); + if (clk_cells == 2) + return imx_clk_scu_alloc_dev(name, parents, num_parents, rsrc_id, clk_type); + else + return __imx_clk_scu(name, parents, num_parents, rsrc_id, clk_type); } struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, -- GitLab From 0d5f1f4731b52e294f25de193978d8b181b55faa Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:11 +0800 Subject: [PATCH 0002/1894] clk: imx: scu: bypass cpu power domains Bypass cpu power domains which are owned by ATF. Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-scu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index e5837e7caa50f..dae2529bf64e0 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -484,6 +484,10 @@ static int imx_clk_scu_attach_pd(struct device *dev, u32 rsrc_id) .args[0] = rsrc_id, }; + if (rsrc_id == IMX_SC_R_A35 || rsrc_id == IMX_SC_R_A53 || + rsrc_id == IMX_SC_R_A72) + return 0; + return of_genpd_add_device(&genpdspec, dev); } -- GitLab From 2f1a2c1d00bc9417f5faa54777a23a52f054e9cf Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:12 +0800 Subject: [PATCH 0003/1894] clk: imx: scu: allow scu clk to take device pointer Used to support runtime pm. Cc: Shawn Guo Cc: Sascha Hauer Cc: Michael Turquette Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-scu.c | 9 +++++---- drivers/clk/imx/clk-scu.h | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index dae2529bf64e0..a67cca0a32adc 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -387,8 +387,9 @@ static const struct clk_ops clk_scu_cpu_ops = { .unprepare = clk_scu_unprepare, }; -struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents, - int num_parents, u32 rsrc_id, u8 clk_type) +struct clk_hw *__imx_clk_scu(struct device *dev, const char *name, + const char * const *parents, int num_parents, + u32 rsrc_id, u8 clk_type) { struct clk_init_data init; struct clk_scu *clk; @@ -422,7 +423,7 @@ struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents, clk->hw.init = &init; hw = &clk->hw; - ret = clk_hw_register(NULL, hw); + ret = clk_hw_register(dev, hw); if (ret) { kfree(clk); hw = ERR_PTR(ret); @@ -453,7 +454,7 @@ static int imx_clk_scu_probe(struct platform_device *pdev) struct imx_scu_clk_node *clk = dev_get_platdata(dev); struct clk_hw *hw; - hw = __imx_clk_scu(clk->name, clk->parents, clk->num_parents, + hw = __imx_clk_scu(NULL, clk->name, clk->parents, clk->num_parents, clk->rsrc, clk->clk_type); if (IS_ERR(hw)) return PTR_ERR(hw); diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index f7480898ea74e..22976d64a96a6 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h @@ -19,8 +19,9 @@ struct clk_hw *imx_clk_scu_alloc_dev(const char *name, const char * const *parents, int num_parents, u32 rsrc_id, u8 clk_type); -struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents, - int num_parents, u32 rsrc_id, u8 clk_type); +struct clk_hw *__imx_clk_scu(struct device *dev, const char *name, + const char * const *parents, int num_parents, + u32 rsrc_id, u8 clk_type); void imx_clk_scu_unregister(void); @@ -30,7 +31,7 @@ static inline struct clk_hw *imx_clk_scu(const char *name, u32 rsrc_id, if (clk_cells == 2) return imx_clk_scu_alloc_dev(name, NULL, 0, rsrc_id, clk_type); else - return __imx_clk_scu(name, NULL, 0, rsrc_id, clk_type); + return __imx_clk_scu(NULL, name, NULL, 0, rsrc_id, clk_type); } static inline struct clk_hw *imx_clk_scu2(const char *name, const char * const *parents, @@ -40,7 +41,7 @@ static inline struct clk_hw *imx_clk_scu2(const char *name, const char * const * if (clk_cells == 2) return imx_clk_scu_alloc_dev(name, parents, num_parents, rsrc_id, clk_type); else - return __imx_clk_scu(name, parents, num_parents, rsrc_id, clk_type); + return __imx_clk_scu(NULL, name, parents, num_parents, rsrc_id, clk_type); } struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, -- GitLab From 78edeb080330ca2bc6c9b20d388c8ceb7a2ef8c0 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:13 +0800 Subject: [PATCH 0004/1894] clk: imx: scu: add runtime pm support Add runtime pm support Cc: Shawn Guo Cc: Sascha Hauer Cc: Michael Turquette Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-scu.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index a67cca0a32adc..f5dc6731a2178 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "clk-scu.h" @@ -453,15 +454,32 @@ static int imx_clk_scu_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct imx_scu_clk_node *clk = dev_get_platdata(dev); struct clk_hw *hw; + int ret; + + pm_runtime_set_suspended(dev); + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_enable(dev); + + ret = pm_runtime_get_sync(dev); + if (ret) { + pm_runtime_disable(dev); + return ret; + } - hw = __imx_clk_scu(NULL, clk->name, clk->parents, clk->num_parents, + hw = __imx_clk_scu(dev, clk->name, clk->parents, clk->num_parents, clk->rsrc, clk->clk_type); - if (IS_ERR(hw)) + if (IS_ERR(hw)) { + pm_runtime_disable(dev); return PTR_ERR(hw); + } clk->hw = hw; list_add_tail(&clk->node, &imx_scu_clks[clk->rsrc]); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + dev_dbg(dev, "register SCU clock rsrc:%d type:%d\n", clk->rsrc, clk->clk_type); -- GitLab From d0409631f466ae2e572a6a0ca684cced97fa1ade Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:14 +0800 Subject: [PATCH 0005/1894] clk: imx: scu: add suspend/resume support Clock state will be lost when its power domain is completely off during system suspend/resume. So we save and restore the state accordingly in suspend/resume callback. Cc: Shawn Guo Cc: Sascha Hauer Cc: Michael Turquette Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-scu.c | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index f5dc6731a2178..229a290ca5b68 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -45,6 +45,10 @@ struct clk_scu { struct clk_hw hw; u16 rsrc_id; u8 clk_type; + + /* for state save&restore */ + bool is_enabled; + u32 rate; }; /* @@ -430,6 +434,9 @@ struct clk_hw *__imx_clk_scu(struct device *dev, const char *name, hw = ERR_PTR(ret); } + if (dev) + dev_set_drvdata(dev, clk); + return hw; } @@ -486,10 +493,52 @@ static int imx_clk_scu_probe(struct platform_device *pdev) return 0; } +static int __maybe_unused imx_clk_scu_suspend(struct device *dev) +{ + struct clk_scu *clk = dev_get_drvdata(dev); + + clk->rate = clk_hw_get_rate(&clk->hw); + clk->is_enabled = clk_hw_is_enabled(&clk->hw); + + if (clk->rate) + dev_dbg(dev, "save rate %d\n", clk->rate); + + if (clk->is_enabled) + dev_dbg(dev, "save enabled state\n"); + + return 0; +} + +static int __maybe_unused imx_clk_scu_resume(struct device *dev) +{ + struct clk_scu *clk = dev_get_drvdata(dev); + int ret = 0; + + if (clk->rate) { + ret = clk_scu_set_rate(&clk->hw, clk->rate, 0); + dev_dbg(dev, "restore rate %d %s\n", clk->rate, + !ret ? "success" : "failed"); + } + + if (clk->is_enabled) { + ret = clk_scu_prepare(&clk->hw); + dev_dbg(dev, "restore enabled state %s\n", + !ret ? "success" : "failed"); + } + + return ret; +} + +static const struct dev_pm_ops imx_clk_scu_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_clk_scu_suspend, + imx_clk_scu_resume) +}; + static struct platform_driver imx_clk_scu_driver = { .driver = { .name = "imx-scu-clk", .suppress_bind_attrs = true, + .pm = &imx_clk_scu_pm_ops, }, .probe = imx_clk_scu_probe, }; -- GitLab From d5f1e6a2bb61db8d4bd269edac8b52a853b48ce8 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:15 +0800 Subject: [PATCH 0006/1894] clk: imx: imx8qxp-lpcg: add parsing clocks from device tree One LPCG controller supports up to 8 clock outputs while each of them is fixed to 4 bits. It supports only gating function with fixed bits. So we can use the clk-indices to fetch the corresponding clock idx from device tree. With this way, we can write a generic LPCG clock drivers. This patch add that support to parse clocks from device tree. Cc: Shawn Guo Cc: Sascha Hauer Cc: Michael Turquette Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8qxp-lpcg.c | 124 +++++++++++++++++++++++++++++ drivers/clk/imx/clk-lpcg-scu.c | 8 ++ drivers/clk/imx/clk-scu.h | 1 + 3 files changed, 133 insertions(+) diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index e947a70054acd..84ac374ae8ff1 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -157,6 +158,122 @@ static const struct imx8qxp_ss_lpcg imx8qxp_ss_lsio = { .num_max = IMX_LSIO_LPCG_CLK_END, }; +#define IMX_LPCG_MAX_CLKS 8 + +static struct clk_hw *imx_lpcg_of_clk_src_get(struct of_phandle_args *clkspec, + void *data) +{ + struct clk_hw_onecell_data *hw_data = data; + unsigned int idx = clkspec->args[0] / 4; + + if (idx >= hw_data->num) { + pr_err("%s: invalid index %u\n", __func__, idx); + return ERR_PTR(-EINVAL); + } + + return hw_data->hws[idx]; +} + +static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev, + struct device_node *np) +{ + const char *output_names[IMX_LPCG_MAX_CLKS]; + const char *parent_names[IMX_LPCG_MAX_CLKS]; + unsigned int bit_offset[IMX_LPCG_MAX_CLKS]; + struct clk_hw_onecell_data *clk_data; + struct clk_hw **clk_hws; + struct resource *res; + void __iomem *base; + int count; + int idx; + int ret; + int i; + + if (!of_device_is_compatible(np, "fsl,imx8qxp-lpcg")) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + count = of_property_count_u32_elems(np, "clock-indices"); + if (count < 0) { + dev_err(&pdev->dev, "failed to count clocks\n"); + return -EINVAL; + } + + /* + * A trick here is that we set the num of clks to the MAX instead + * of the count from clock-indices because one LPCG supports up to + * 8 clock outputs which each of them is fixed to 4 bits. Then we can + * easily get the clock by clk-indices (bit-offset) / 4. + * And the cost is very limited few pointers. + */ + + clk_data = devm_kzalloc(&pdev->dev, struct_size(clk_data, hws, + IMX_LPCG_MAX_CLKS), GFP_KERNEL); + if (!clk_data) + return -ENOMEM; + + clk_data->num = IMX_LPCG_MAX_CLKS; + clk_hws = clk_data->hws; + + ret = of_property_read_u32_array(np, "clock-indices", bit_offset, + count); + if (ret < 0) { + dev_err(&pdev->dev, "failed to read clock-indices\n"); + return -EINVAL; + } + + ret = of_clk_parent_fill(np, parent_names, count); + if (ret != count) { + dev_err(&pdev->dev, "failed to get clock parent names\n"); + return count; + } + + ret = of_property_read_string_array(np, "clock-output-names", + output_names, count); + if (ret != count) { + dev_err(&pdev->dev, "failed to read clock-output-names\n"); + return -EINVAL; + } + + for (i = 0; i < count; i++) { + idx = bit_offset[i] / 4; + if (idx > IMX_LPCG_MAX_CLKS) { + dev_warn(&pdev->dev, "invalid bit offset of clock %d\n", + i); + ret = -EINVAL; + goto unreg; + } + + clk_hws[idx] = imx_clk_lpcg_scu(output_names[i], + parent_names[i], 0, base, + bit_offset[i], false); + if (IS_ERR(clk_hws[idx])) { + dev_warn(&pdev->dev, "failed to register clock %d\n", + idx); + ret = PTR_ERR(clk_hws[idx]); + goto unreg; + } + } + + ret = devm_of_clk_add_hw_provider(&pdev->dev, imx_lpcg_of_clk_src_get, + clk_data); + if (!ret) + return 0; + +unreg: + while (--i >= 0) { + idx = bit_offset[i] / 4; + if (clk_hws[idx]) + imx_clk_lpcg_scu_unregister(clk_hws[idx]); + } + + return ret; +} + static int imx8qxp_lpcg_clk_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -167,8 +284,14 @@ static int imx8qxp_lpcg_clk_probe(struct platform_device *pdev) struct resource *res; struct clk_hw **clks; void __iomem *base; + int ret; int i; + /* try new binding to parse clocks from device tree first */ + ret = imx_lpcg_parse_clks_from_dt(pdev, np); + if (!ret) + return 0; + ss_lpcg = of_device_get_match_data(dev); if (!ss_lpcg) return -ENODEV; @@ -219,6 +342,7 @@ static const struct of_device_id imx8qxp_lpcg_match[] = { { .compatible = "fsl,imx8qxp-lpcg-adma", &imx8qxp_ss_adma, }, { .compatible = "fsl,imx8qxp-lpcg-conn", &imx8qxp_ss_conn, }, { .compatible = "fsl,imx8qxp-lpcg-lsio", &imx8qxp_ss_lsio, }, + { .compatible = "fsl,imx8qxp-lpcg", NULL }, { /* sentinel */ } }; diff --git a/drivers/clk/imx/clk-lpcg-scu.c b/drivers/clk/imx/clk-lpcg-scu.c index 1f0e44f921aee..fab9c94a5e691 100644 --- a/drivers/clk/imx/clk-lpcg-scu.c +++ b/drivers/clk/imx/clk-lpcg-scu.c @@ -115,3 +115,11 @@ struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, return hw; } + +void imx_clk_lpcg_scu_unregister(struct clk_hw *hw) +{ + struct clk_lpcg_scu *clk = to_clk_lpcg_scu(hw); + + clk_hw_unregister(&clk->hw); + kfree(clk); +} diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index 22976d64a96a6..6a879526b3d5c 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h @@ -47,4 +47,5 @@ static inline struct clk_hw *imx_clk_scu2(const char *name, const char * const * struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, bool hw_gate); +void imx_clk_lpcg_scu_unregister(struct clk_hw *hw); #endif -- GitLab From a4bfc85ccf374fb4ff10c806a270bf241598a70a Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:16 +0800 Subject: [PATCH 0007/1894] clk: imx: lpcg: allow lpcg clk to take device pointer Used to support runtime pm. Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-lpcg-scu.c | 8 ++++---- drivers/clk/imx/clk-scu.h | 24 ++++++++++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/clk/imx/clk-lpcg-scu.c b/drivers/clk/imx/clk-lpcg-scu.c index fab9c94a5e691..4363ab6f46d5c 100644 --- a/drivers/clk/imx/clk-lpcg-scu.c +++ b/drivers/clk/imx/clk-lpcg-scu.c @@ -81,9 +81,9 @@ static const struct clk_ops clk_lpcg_scu_ops = { .disable = clk_lpcg_scu_disable, }; -struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, - unsigned long flags, void __iomem *reg, - u8 bit_idx, bool hw_gate) +struct clk_hw *__imx_clk_lpcg_scu(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, bool hw_gate) { struct clk_lpcg_scu *clk; struct clk_init_data init; @@ -107,7 +107,7 @@ struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, clk->hw.init = &init; hw = &clk->hw; - ret = clk_hw_register(NULL, hw); + ret = clk_hw_register(dev, hw); if (ret) { kfree(clk); hw = ERR_PTR(ret); diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index 6a879526b3d5c..faee1ba5c4ee4 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h @@ -25,6 +25,11 @@ struct clk_hw *__imx_clk_scu(struct device *dev, const char *name, void imx_clk_scu_unregister(void); +struct clk_hw *__imx_clk_lpcg_scu(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, bool hw_gate); +void imx_clk_lpcg_scu_unregister(struct clk_hw *hw); + static inline struct clk_hw *imx_clk_scu(const char *name, u32 rsrc_id, u8 clk_type, u8 clk_cells) { @@ -44,8 +49,19 @@ static inline struct clk_hw *imx_clk_scu2(const char *name, const char * const * return __imx_clk_scu(NULL, name, parents, num_parents, rsrc_id, clk_type); } -struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, - unsigned long flags, void __iomem *reg, - u8 bit_idx, bool hw_gate); -void imx_clk_lpcg_scu_unregister(struct clk_hw *hw); +static inline struct clk_hw *imx_clk_lpcg_scu_dev(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, bool hw_gate) +{ + return __imx_clk_lpcg_scu(dev, name, parent_name, flags, reg, + bit_idx, hw_gate); +} + +static inline struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name, + unsigned long flags, void __iomem *reg, + u8 bit_idx, bool hw_gate) +{ + return __imx_clk_lpcg_scu(NULL, name, parent_name, flags, reg, + bit_idx, hw_gate); +} #endif -- GitLab From 18cdbad40c6c138edf62273417180227e12b198a Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:17 +0800 Subject: [PATCH 0008/1894] clk: imx: clk-imx8qxp-lpcg: add runtime pm support add runtime pm support Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8qxp-lpcg.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index 84ac374ae8ff1..50ab66028f0b4 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "clk-scu.h" @@ -239,6 +240,12 @@ static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev, return -EINVAL; } + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, 500); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_enable(&pdev->dev); + for (i = 0; i < count; i++) { idx = bit_offset[i] / 4; if (idx > IMX_LPCG_MAX_CLKS) { @@ -248,9 +255,9 @@ static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev, goto unreg; } - clk_hws[idx] = imx_clk_lpcg_scu(output_names[i], - parent_names[i], 0, base, - bit_offset[i], false); + clk_hws[idx] = imx_clk_lpcg_scu_dev(&pdev->dev, output_names[i], + parent_names[i], 0, base, + bit_offset[i], false); if (IS_ERR(clk_hws[idx])) { dev_warn(&pdev->dev, "failed to register clock %d\n", idx); @@ -261,8 +268,13 @@ static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev, ret = devm_of_clk_add_hw_provider(&pdev->dev, imx_lpcg_of_clk_src_get, clk_data); - if (!ret) - return 0; + if (ret) + goto unreg; + + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + + return 0; unreg: while (--i >= 0) { @@ -271,6 +283,8 @@ unreg: imx_clk_lpcg_scu_unregister(clk_hws[idx]); } + pm_runtime_disable(&pdev->dev); + return ret; } -- GitLab From ea0c5cbaf8b70fd6fe0269fb0b951965c82229cc Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Jul 2020 16:00:18 +0800 Subject: [PATCH 0009/1894] clk: imx: lpcg: add suspend/resume support LPCG clock state may be lost when it's power domain is completely off during system suspend/resume and we need save and restore the state properly. Reviewed-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8qxp-lpcg.c | 1 + drivers/clk/imx/clk-lpcg-scu.c | 37 ++++++++++++++++++++++++++++++ drivers/clk/imx/clk-scu.h | 1 + 3 files changed, 39 insertions(+) diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index 50ab66028f0b4..d3e905cf867d7 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -364,6 +364,7 @@ static struct platform_driver imx8qxp_lpcg_clk_driver = { .driver = { .name = "imx8qxp-lpcg-clk", .of_match_table = imx8qxp_lpcg_match, + .pm = &imx_clk_lpcg_scu_pm_ops, .suppress_bind_attrs = true, }, .probe = imx8qxp_lpcg_clk_probe, diff --git a/drivers/clk/imx/clk-lpcg-scu.c b/drivers/clk/imx/clk-lpcg-scu.c index 4363ab6f46d5c..77be7632866d7 100644 --- a/drivers/clk/imx/clk-lpcg-scu.c +++ b/drivers/clk/imx/clk-lpcg-scu.c @@ -34,6 +34,9 @@ struct clk_lpcg_scu { void __iomem *reg; u8 bit_idx; bool hw_gate; + + /* for state save&restore */ + u32 state; }; #define to_clk_lpcg_scu(_hw) container_of(_hw, struct clk_lpcg_scu, hw) @@ -113,6 +116,9 @@ struct clk_hw *__imx_clk_lpcg_scu(struct device *dev, const char *name, hw = ERR_PTR(ret); } + if (dev) + dev_set_drvdata(dev, clk); + return hw; } @@ -123,3 +129,34 @@ void imx_clk_lpcg_scu_unregister(struct clk_hw *hw) clk_hw_unregister(&clk->hw); kfree(clk); } + +static int __maybe_unused imx_clk_lpcg_scu_suspend(struct device *dev) +{ + struct clk_lpcg_scu *clk = dev_get_drvdata(dev); + + clk->state = readl_relaxed(clk->reg); + dev_dbg(dev, "save lpcg state 0x%x\n", clk->state); + + return 0; +} + +static int __maybe_unused imx_clk_lpcg_scu_resume(struct device *dev) +{ + struct clk_lpcg_scu *clk = dev_get_drvdata(dev); + + /* + * FIXME: Sometimes writes don't work unless the CPU issues + * them twice + */ + + writel(clk->state, clk->reg); + writel(clk->state, clk->reg); + dev_dbg(dev, "restore lpcg state 0x%x\n", clk->state); + + return 0; +} + +const struct dev_pm_ops imx_clk_lpcg_scu_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_clk_lpcg_scu_suspend, + imx_clk_lpcg_scu_resume) +}; diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index faee1ba5c4ee4..e8352164923e0 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h @@ -11,6 +11,7 @@ #include extern struct list_head imx_scu_clks[]; +extern const struct dev_pm_ops imx_clk_lpcg_scu_pm_ops; int imx_clk_scu_init(struct device_node *np); struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec, -- GitLab From bc13809f1c47245cd584f4ad31ad06a5c5f40e54 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 3 Oct 2020 08:03:56 +0200 Subject: [PATCH 0010/1894] efi/libstub/x86: simplify efi_is_native() CONFIG_EFI_MIXED depends on CONFIG_X86_64=y. There is no need to check CONFIG_X86_64 again. Signed-off-by: Heinrich Schuchardt Link: https://lore.kernel.org/r/20201003060356.4913-1-xypron.glpk@gmx.de Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index bc9758ef292ef..7673dc833232e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -213,8 +213,6 @@ static inline bool efi_is_64bit(void) static inline bool efi_is_native(void) { - if (!IS_ENABLED(CONFIG_X86_64)) - return true; return efi_is_64bit(); } -- GitLab From 688eb28211abdf82a3f51e8997f1c8137947227d Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Sun, 11 Oct 2020 10:20:12 -0400 Subject: [PATCH 0011/1894] efi/x86: Only copy the compressed kernel image in efi_relocate_kernel() The image_size argument to efi_relocate_kernel() is currently specified as init_size, but this is unnecessarily large. The compressed kernel is much smaller, in fact, its image only extends up to the start of _bss, since at this point, the .bss section is still uninitialized. Depending on compression level, this can reduce the amount of data copied by 4-5x. Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20201011142012.96493-1-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 3672539cb96eb..f14c4ff5839f9 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -715,8 +715,11 @@ unsigned long efi_main(efi_handle_t handle, (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || (image_offset == 0)) { + extern char _bss[]; + status = efi_relocate_kernel(&bzimage_addr, - hdr->init_size, hdr->init_size, + (unsigned long)_bss - bzimage_addr, + hdr->init_size, hdr->pref_address, hdr->kernel_alignment, LOAD_PHYSICAL_ADDR); -- GitLab From 7f2c2f38c1c0dbfc5b1e13aa57678daa753e1c96 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 22 Sep 2020 14:00:36 +0200 Subject: [PATCH 0012/1894] clk: renesas: rcar-gen3: Remove stp_ck handling for SDHI There is no case (and none foreseen) where we would need to disable the SDn clock. So, for simplicity, remove its handling. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20200922120036.10298-1-wsa+renesas@sang-engineering.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/rcar-gen3-cpg.c | 51 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c index 488f8b3980c55..a7debddf7d099 100644 --- a/drivers/clk/renesas/rcar-gen3-cpg.c +++ b/drivers/clk/renesas/rcar-gen3-cpg.c @@ -224,10 +224,9 @@ static struct clk * __init cpg_z_clk_register(const char *name, #define CPG_SD_STP_MASK (CPG_SD_STP_HCK | CPG_SD_STP_CK) #define CPG_SD_FC_MASK (0x7 << 2 | 0x3 << 0) -#define CPG_SD_DIV_TABLE_DATA(stp_hck, stp_ck, sd_srcfc, sd_fc, sd_div) \ +#define CPG_SD_DIV_TABLE_DATA(stp_hck, sd_srcfc, sd_fc, sd_div) \ { \ .val = ((stp_hck) ? CPG_SD_STP_HCK : 0) | \ - ((stp_ck) ? CPG_SD_STP_CK : 0) | \ ((sd_srcfc) << 2) | \ ((sd_fc) << 0), \ .div = (sd_div), \ @@ -247,36 +246,36 @@ struct sd_clock { }; /* SDn divider - * sd_srcfc sd_fc div - * stp_hck stp_ck (div) (div) = sd_srcfc x sd_fc - *------------------------------------------------------------------- - * 0 0 0 (1) 1 (4) 4 : SDR104 / HS200 / HS400 (8 TAP) - * 0 0 1 (2) 1 (4) 8 : SDR50 - * 1 0 2 (4) 1 (4) 16 : HS / SDR25 - * 1 0 3 (8) 1 (4) 32 : NS / SDR12 - * 1 0 4 (16) 1 (4) 64 - * 0 0 0 (1) 0 (2) 2 - * 0 0 1 (2) 0 (2) 4 : SDR104 / HS200 / HS400 (4 TAP) - * 1 0 2 (4) 0 (2) 8 - * 1 0 3 (8) 0 (2) 16 - * 1 0 4 (16) 0 (2) 32 + * sd_srcfc sd_fc div + * stp_hck (div) (div) = sd_srcfc x sd_fc + *--------------------------------------------------------- + * 0 0 (1) 1 (4) 4 : SDR104 / HS200 / HS400 (8 TAP) + * 0 1 (2) 1 (4) 8 : SDR50 + * 1 2 (4) 1 (4) 16 : HS / SDR25 + * 1 3 (8) 1 (4) 32 : NS / SDR12 + * 1 4 (16) 1 (4) 64 + * 0 0 (1) 0 (2) 2 + * 0 1 (2) 0 (2) 4 : SDR104 / HS200 / HS400 (4 TAP) + * 1 2 (4) 0 (2) 8 + * 1 3 (8) 0 (2) 16 + * 1 4 (16) 0 (2) 32 * * NOTE: There is a quirk option to ignore the first row of the dividers * table when searching for suitable settings. This is because HS400 on * early ES versions of H3 and M3-W requires a specific setting to work. */ static const struct sd_div_table cpg_sd_div_table[] = { -/* CPG_SD_DIV_TABLE_DATA(stp_hck, stp_ck, sd_srcfc, sd_fc, sd_div) */ - CPG_SD_DIV_TABLE_DATA(0, 0, 0, 1, 4), - CPG_SD_DIV_TABLE_DATA(0, 0, 1, 1, 8), - CPG_SD_DIV_TABLE_DATA(1, 0, 2, 1, 16), - CPG_SD_DIV_TABLE_DATA(1, 0, 3, 1, 32), - CPG_SD_DIV_TABLE_DATA(1, 0, 4, 1, 64), - CPG_SD_DIV_TABLE_DATA(0, 0, 0, 0, 2), - CPG_SD_DIV_TABLE_DATA(0, 0, 1, 0, 4), - CPG_SD_DIV_TABLE_DATA(1, 0, 2, 0, 8), - CPG_SD_DIV_TABLE_DATA(1, 0, 3, 0, 16), - CPG_SD_DIV_TABLE_DATA(1, 0, 4, 0, 32), +/* CPG_SD_DIV_TABLE_DATA(stp_hck, sd_srcfc, sd_fc, sd_div) */ + CPG_SD_DIV_TABLE_DATA(0, 0, 1, 4), + CPG_SD_DIV_TABLE_DATA(0, 1, 1, 8), + CPG_SD_DIV_TABLE_DATA(1, 2, 1, 16), + CPG_SD_DIV_TABLE_DATA(1, 3, 1, 32), + CPG_SD_DIV_TABLE_DATA(1, 4, 1, 64), + CPG_SD_DIV_TABLE_DATA(0, 0, 0, 2), + CPG_SD_DIV_TABLE_DATA(0, 1, 0, 4), + CPG_SD_DIV_TABLE_DATA(1, 2, 0, 8), + CPG_SD_DIV_TABLE_DATA(1, 3, 0, 16), + CPG_SD_DIV_TABLE_DATA(1, 4, 0, 32), }; #define to_sd_clock(_hw) container_of(_hw, struct sd_clock, hw) -- GitLab From 6e0781e092a150b040cc305fd1832730cf78580a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 26 Oct 2020 11:17:20 +0000 Subject: [PATCH 0013/1894] clk: rockchip: Add appropriate arch dependencies There's no point offering support for 32-bit platforms to users configuring a 64-bit kernel - and vice-versa - unless they are explicitly interested in compile-testing. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/72abb0f794b8ed77e274e8ee21c22e0bd3223dfd.1603710913.git.robin.murphy@arm.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/Kconfig | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/Kconfig b/drivers/clk/rockchip/Kconfig index 47cd6c5de8372..effd05032e85c 100644 --- a/drivers/clk/rockchip/Kconfig +++ b/drivers/clk/rockchip/Kconfig @@ -11,67 +11,77 @@ config COMMON_CLK_ROCKCHIP if COMMON_CLK_ROCKCHIP config CLK_PX30 bool "Rockchip PX30 clock controller support" + depends on (ARM64 || COMPILE_TEST) default y help Build the driver for PX30 Clock Driver. config CLK_RV110X bool "Rockchip RV110x clock controller support" + depends on (ARM || COMPILE_TEST) default y help Build the driver for RV110x Clock Driver. config CLK_RK3036 bool "Rockchip RK3036 clock controller support" + depends on (ARM || COMPILE_TEST) default y help Build the driver for RK3036 Clock Driver. config CLK_RK312X bool "Rockchip RK312x clock controller support" + depends on (ARM || COMPILE_TEST) default y help Build the driver for RK312x Clock Driver. config CLK_RK3188 bool "Rockchip RK3188 clock controller support" + depends on (ARM || COMPILE_TEST) default y help Build the driver for RK3188 Clock Driver. config CLK_RK322X bool "Rockchip RK322x clock controller support" + depends on (ARM || COMPILE_TEST) default y help Build the driver for RK322x Clock Driver. config CLK_RK3288 bool "Rockchip RK3288 clock controller support" - depends on ARM + depends on (ARM || COMPILE_TEST) default y help Build the driver for RK3288 Clock Driver. config CLK_RK3308 bool "Rockchip RK3308 clock controller support" + depends on (ARM64 || COMPILE_TEST) default y help Build the driver for RK3308 Clock Driver. config CLK_RK3328 bool "Rockchip RK3328 clock controller support" + depends on (ARM64 || COMPILE_TEST) default y help Build the driver for RK3328 Clock Driver. config CLK_RK3368 bool "Rockchip RK3368 clock controller support" + depends on (ARM64 || COMPILE_TEST) default y help Build the driver for RK3368 Clock Driver. config CLK_RK3399 tristate "Rockchip RK3399 clock controller support" + depends on (ARM64 || COMPILE_TEST) default y help Build the driver for RK3399 Clock Driver. -- GitLab From 5d78533a0c53af9659227c803df944ba27cd56e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 24 Sep 2020 12:52:55 +0200 Subject: [PATCH 0014/1894] rtc: pcf2127: move watchdog initialisation to a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The obvious advantages are: - The linker can drop the watchdog functions if CONFIG_WATCHDOG is off. - All watchdog stuff grouped together with only a single function call left in generic code. - Watchdog register is only read when it is actually used. - Less #ifdefery Signed-off-by: Uwe Kleine-König Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200924105256.18162-2-u.kleine-koenig@pengutronix.de --- drivers/rtc/rtc-pcf2127.c | 56 ++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 07a5630ec841f..e7be77af5a973 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -335,6 +335,36 @@ static const struct watchdog_ops pcf2127_watchdog_ops = { .set_timeout = pcf2127_wdt_set_timeout, }; +static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127) +{ + u32 wdd_timeout; + int ret; + + if (!IS_ENABLED(CONFIG_WATCHDOG)) + return 0; + + pcf2127->wdd.parent = dev; + pcf2127->wdd.info = &pcf2127_wdt_info; + pcf2127->wdd.ops = &pcf2127_watchdog_ops; + pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN; + pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX; + pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT; + pcf2127->wdd.min_hw_heartbeat_ms = 500; + pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS; + + watchdog_set_drvdata(&pcf2127->wdd, pcf2127); + + /* Test if watchdog timer is started by bootloader */ + ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout); + if (ret) + return ret; + + if (wdd_timeout) + set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status); + + return devm_watchdog_register_device(dev, &pcf2127->wdd); +} + /* Alarm */ static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { @@ -536,7 +566,6 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, int alarm_irq, const char *name, bool has_nvmem) { struct pcf2127 *pcf2127; - u32 wdd_timeout; int ret = 0; dev_dbg(dev, "%s\n", __func__); @@ -575,17 +604,6 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, pcf2127->rtc->ops = &pcf2127_rtc_alrm_ops; } - pcf2127->wdd.parent = dev; - pcf2127->wdd.info = &pcf2127_wdt_info; - pcf2127->wdd.ops = &pcf2127_watchdog_ops; - pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN; - pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX; - pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT; - pcf2127->wdd.min_hw_heartbeat_ms = 500; - pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS; - - watchdog_set_drvdata(&pcf2127->wdd, pcf2127); - if (has_nvmem) { struct nvmem_config nvmem_cfg = { .priv = pcf2127, @@ -615,19 +633,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, return ret; } - /* Test if watchdog timer is started by bootloader */ - ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout); - if (ret) - return ret; - - if (wdd_timeout) - set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status); - -#ifdef CONFIG_WATCHDOG - ret = devm_watchdog_register_device(dev, &pcf2127->wdd); - if (ret) - return ret; -#endif /* CONFIG_WATCHDOG */ + pcf2127_watchdog_init(dev, pcf2127); /* * Disable battery low/switch-over timestamp and interrupts. -- GitLab From ba1c30bf3f2536f248d262c6f257b5a787305991 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Oct 2020 10:04:51 +0300 Subject: [PATCH 0015/1894] rtc: pcf2127: fix pcf2127_nvmem_read/write() returns These functions should return zero on success. Non-zero returns are treated as error. On some paths, this doesn't matter but in nvmem_cell_read() a non-zero return would be passed to ERR_PTR() and lead to an Oops. Fixes: d6c3029f32f7 ("rtc: pcf2127: add support for accessing internal static RAM") Signed-off-by: Dan Carpenter Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201022070451.GA2817669@mwanda --- drivers/rtc/rtc-pcf2127.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index e7be77af5a973..fd46860152e15 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -243,10 +243,8 @@ static int pcf2127_nvmem_read(void *priv, unsigned int offset, if (ret) return ret; - ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_RD_CMD, - val, bytes); - - return ret ?: bytes; + return regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_RD_CMD, + val, bytes); } static int pcf2127_nvmem_write(void *priv, unsigned int offset, @@ -261,10 +259,8 @@ static int pcf2127_nvmem_write(void *priv, unsigned int offset, if (ret) return ret; - ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_WRT_CMD, - val, bytes); - - return ret ?: bytes; + return regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_WRT_CMD, + val, bytes); } /* watchdog driver */ -- GitLab From e9a2f8b599d0bc22a1b13e69527246ac39c697b4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 11 Oct 2020 10:20:16 +0100 Subject: [PATCH 0016/1894] ARM: 9011/1: centralize phys-to-virt conversion of DT/ATAGS address Before moving the DT mapping out of the linear region, let's prepare for this change by removing all the phys-to-virt translations of the __atags_pointer variable, and perform this translation only once at setup time. Tested-by: Linus Walleij Reviewed-by: Linus Walleij Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/prom.h | 4 ++-- arch/arm/kernel/atags.h | 4 ++-- arch/arm/kernel/atags_parse.c | 6 +++--- arch/arm/kernel/devtree.c | 6 +++--- arch/arm/kernel/setup.c | 14 +++++++++----- arch/arm/mm/mmu.c | 4 ++-- 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h index 1e36c40533c16..402e3f34c7ed8 100644 --- a/arch/arm/include/asm/prom.h +++ b/arch/arm/include/asm/prom.h @@ -9,12 +9,12 @@ #ifdef CONFIG_OF -extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys); +extern const struct machine_desc *setup_machine_fdt(void *dt_virt); extern void __init arm_dt_init_cpu_maps(void); #else /* CONFIG_OF */ -static inline const struct machine_desc *setup_machine_fdt(unsigned int dt_phys) +static inline const struct machine_desc *setup_machine_fdt(void *dt_virt) { return NULL; } diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index 067e12edc3419..f2819c25b6029 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -2,11 +2,11 @@ void convert_to_tag_list(struct tag *tags); #ifdef CONFIG_ATAGS -const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, +const struct machine_desc *setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr); #else static inline const struct machine_desc * __init __noreturn -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr) { early_print("no ATAGS support: can't continue\n"); while (true); diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 6c12d9fe694e3..373b61f9a4f01 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -174,7 +174,7 @@ static void __init squash_mem_tags(struct tag *tag) } const struct machine_desc * __init -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *atags_vaddr, unsigned int machine_nr) { struct tag *tags = (struct tag *)&default_tags; const struct machine_desc *mdesc = NULL, *p; @@ -195,8 +195,8 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) if (!mdesc) return NULL; - if (__atags_pointer) - tags = phys_to_virt(__atags_pointer); + if (atags_vaddr) + tags = atags_vaddr; else if (mdesc->atag_offset) tags = (void *)(PAGE_OFFSET + mdesc->atag_offset); diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 7f0745a97e20f..28311dd0fee68 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -203,12 +203,12 @@ static const void * __init arch_get_next_mach(const char *const **match) /** * setup_machine_fdt - Machine setup when an dtb was passed to the kernel - * @dt_phys: physical address of dt blob + * @dt_virt: virtual address of dt blob * * If a dtb was passed to the kernel in r2, then use it to choose the * correct machine_desc and to setup the system. */ -const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) +const struct machine_desc * __init setup_machine_fdt(void *dt_virt) { const struct machine_desc *mdesc, *mdesc_best = NULL; @@ -221,7 +221,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) mdesc_best = &__mach_desc_GENERIC_DT; #endif - if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) + if (!dt_virt || !early_init_dt_verify(dt_virt)) return NULL; mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3f65d0ac9f632..306bcd9844be7 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -89,6 +89,7 @@ unsigned int cacheid __read_mostly; EXPORT_SYMBOL(cacheid); unsigned int __atags_pointer __initdata; +void *atags_vaddr __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); @@ -1081,19 +1082,22 @@ void __init hyp_mode_check(void) void __init setup_arch(char **cmdline_p) { - const struct machine_desc *mdesc; + const struct machine_desc *mdesc = NULL; + + if (__atags_pointer) + atags_vaddr = phys_to_virt(__atags_pointer); setup_processor(); - mdesc = setup_machine_fdt(__atags_pointer); + if (atags_vaddr) + mdesc = setup_machine_fdt(atags_vaddr); if (!mdesc) - mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type); + mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type); if (!mdesc) { early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n"); early_print(" r1=0x%08x, r2=0x%08x\n", __machine_arch_type, __atags_pointer); if (__atags_pointer) - early_print(" r2[]=%*ph\n", 16, - phys_to_virt(__atags_pointer)); + early_print(" r2[]=%*ph\n", 16, atags_vaddr); dump_machine_table(); } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ab69250a86bc3..55991fe60054e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1489,7 +1489,7 @@ static void __init map_lowmem(void) } #ifdef CONFIG_ARM_PV_FIXUP -extern unsigned long __atags_pointer; +extern void *atags_vaddr; typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); pgtables_remap lpae_pgtables_remap_asm; @@ -1520,7 +1520,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc) */ lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); pa_pgd = __pa(swapper_pg_dir); - boot_data = __va(__atags_pointer); + boot_data = atags_vaddr; barrier(); pr_info("Switching physical address space to 0x%08llx\n", -- GitLab From 7a1be318f5795cb66fa0dc86b3ace427fe68057f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 11 Oct 2020 10:21:37 +0100 Subject: [PATCH 0017/1894] ARM: 9012/1: move device tree mapping out of linear region On ARM, setting up the linear region is tricky, given the constraints around placement and alignment of the memblocks, and how the kernel itself as well as the DT are placed in physical memory. Let's simplify matters a bit, by moving the device tree mapping to the top of the address space, right between the end of the vmalloc region and the start of the the fixmap region, and create a read-only mapping for it that is independent of the size of the linear region, and how it is organized. Since this region was formerly used as a guard region, which will now be populated fully on LPAE builds by this read-only mapping (which will still be able to function as a guard region for stray writes), bump the start of the [underutilized] fixmap region by 512 KB as well, to ensure that there is always a proper guard region here. Doing so still leaves ample room for the fixmap space, even with NR_CPUS set to its maximum value of 32. Tested-by: Linus Walleij Reviewed-by: Linus Walleij Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- Documentation/arm/memory.rst | 7 ++++++- arch/arm/include/asm/fixmap.h | 2 +- arch/arm/include/asm/memory.h | 5 +++++ arch/arm/kernel/head.S | 5 ++--- arch/arm/kernel/setup.c | 11 ++++++++--- arch/arm/mm/init.c | 1 - arch/arm/mm/mmu.c | 20 ++++++++++++++------ arch/arm/mm/pv-fixup-asm.S | 4 ++-- 8 files changed, 38 insertions(+), 17 deletions(-) diff --git a/Documentation/arm/memory.rst b/Documentation/arm/memory.rst index 0521b4ce5c961..34bb23c44a710 100644 --- a/Documentation/arm/memory.rst +++ b/Documentation/arm/memory.rst @@ -45,9 +45,14 @@ fffe8000 fffeffff DTCM mapping area for platforms with fffe0000 fffe7fff ITCM mapping area for platforms with ITCM mounted inside the CPU. -ffc00000 ffefffff Fixmap mapping region. Addresses provided +ffc80000 ffefffff Fixmap mapping region. Addresses provided by fix_to_virt() will be located here. +ffc00000 ffc7ffff Guard region + +ff800000 ffbfffff Permanent, fixed read-only mapping of the + firmware provided DT blob + fee00000 feffffff Mapping of PCI I/O space. This is a static mapping within the vmalloc space. diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index fc56fc3e19316..9575b404019c9 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -2,7 +2,7 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -#define FIXADDR_START 0xffc00000UL +#define FIXADDR_START 0xffc80000UL #define FIXADDR_END 0xfff00000UL #define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 99035b5891ef4..bb79e52aeb904 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -67,6 +67,10 @@ */ #define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) +#define FDT_FIXED_BASE UL(0xff800000) +#define FDT_FIXED_SIZE (2 * PMD_SIZE) +#define FDT_VIRT_ADDR(physaddr) ((void *)(FDT_FIXED_BASE | (physaddr) % PMD_SIZE)) + #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* * Allow 16MB-aligned ioremap pages @@ -107,6 +111,7 @@ extern unsigned long vectors_base; #define MODULES_VADDR PAGE_OFFSET #define XIP_VIRT_ADDR(physaddr) (physaddr) +#define FDT_VIRT_ADDR(physaddr) ((void *)(physaddr)) #endif /* !CONFIG_MMU */ diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f8904227e7fdc..9b18d8c661295 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -275,9 +275,8 @@ __create_page_tables: */ mov r0, r2, lsr #SECTION_SHIFT movs r0, r0, lsl #SECTION_SHIFT - subne r3, r0, r8 - addne r3, r3, #PAGE_OFFSET - addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) + ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER) + addne r3, r3, r4 orrne r6, r7, r0 strne r6, [r3], #1 << PMD_ORDER addne r6, r6, #1 << SECTION_SHIFT diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 306bcd9844be7..694aa6b4bd03d 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -89,7 +90,6 @@ unsigned int cacheid __read_mostly; EXPORT_SYMBOL(cacheid); unsigned int __atags_pointer __initdata; -void *atags_vaddr __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); @@ -1083,13 +1083,18 @@ void __init hyp_mode_check(void) void __init setup_arch(char **cmdline_p) { const struct machine_desc *mdesc = NULL; + void *atags_vaddr = NULL; if (__atags_pointer) - atags_vaddr = phys_to_virt(__atags_pointer); + atags_vaddr = FDT_VIRT_ADDR(__atags_pointer); setup_processor(); - if (atags_vaddr) + if (atags_vaddr) { mdesc = setup_machine_fdt(atags_vaddr); + if (mdesc) + memblock_reserve(__atags_pointer, + fdt_totalsize(atags_vaddr)); + } if (!mdesc) mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type); if (!mdesc) { diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d57112a276f5a..a391804c7ce3c 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -223,7 +223,6 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); - early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* reserve memory for DMA contiguous allocations */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 55991fe60054e..fa259825310c5 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -39,6 +39,8 @@ #include "mm.h" #include "tcm.h" +extern unsigned long __atags_pointer; + /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. @@ -946,7 +948,7 @@ static void __init create_mapping(struct map_desc *md) return; } - if ((md->type == MT_DEVICE || md->type == MT_ROM) && + if (md->type == MT_DEVICE && md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", @@ -1333,6 +1335,15 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); + if (__atags_pointer) { + /* create a read-only mapping of the device tree */ + map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); + map.virtual = FDT_FIXED_BASE; + map.length = FDT_FIXED_SIZE; + map.type = MT_ROM; + create_mapping(&map); + } + /* * Map the kernel if it is XIP. * It is always first in the modulearea. @@ -1489,8 +1500,7 @@ static void __init map_lowmem(void) } #ifdef CONFIG_ARM_PV_FIXUP -extern void *atags_vaddr; -typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +typedef void pgtables_remap(long long offset, unsigned long pgd); pgtables_remap lpae_pgtables_remap_asm; /* @@ -1503,7 +1513,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) unsigned long pa_pgd; unsigned int cr, ttbcr; long long offset; - void *boot_data; if (!mdesc->pv_fixup) return; @@ -1520,7 +1529,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) */ lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); pa_pgd = __pa(swapper_pg_dir); - boot_data = atags_vaddr; barrier(); pr_info("Switching physical address space to 0x%08llx\n", @@ -1556,7 +1564,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc) * needs to be assembly. It's fairly simple, as we're using the * temporary tables setup by the initial assembly code. */ - lpae_pgtables_remap(offset, pa_pgd, boot_data); + lpae_pgtables_remap(offset, pa_pgd); /* Re-enable the caches and cacheable TLB walks */ asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S index 8eade04167399..5c5e1952000ab 100644 --- a/arch/arm/mm/pv-fixup-asm.S +++ b/arch/arm/mm/pv-fixup-asm.S @@ -39,8 +39,8 @@ ENTRY(lpae_pgtables_remap_asm) /* Update level 2 entries for the boot data */ add r7, r2, #0x1000 - add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER - bic r7, r7, #(1 << L2_ORDER) - 1 + movw r3, #FDT_FIXED_BASE >> (SECTION_SHIFT - L2_ORDER) + add r7, r7, r3 ldrd r4, r5, [r7] adds r4, r4, r0 adc r5, r5, r1 -- GitLab From d5d44e7e3507b0ad868f68e0c5bca6a57afa1b8b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:50:09 +0100 Subject: [PATCH 0018/1894] ARM: 9013/2: Disable KASan instrumentation for some code Disable instrumentation for arch/arm/boot/compressed/* since that code is executed before the kernel has even set up its mappings and definately out of scope for KASan. Disable instrumentation of arch/arm/vdso/* because that code is not linked with the kernel image, so the KASan management code would fail to link. Disable instrumentation of arch/arm/mm/physaddr.c. See commit ec6d06efb0ba ("arm64: Add support for CONFIG_DEBUG_VIRTUAL") for more details. Disable kasan check in the function unwind_pop_register because it does not matter that kasan checks failed when unwind_pop_register() reads the stack memory of a task. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Florian Fainelli Reported-by: Marc Zyngier Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 1 + arch/arm/kernel/unwind.c | 6 +++++- arch/arm/mm/Makefile | 2 ++ arch/arm/vdso/Makefile | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 47f001ca5499d..a815b1ae990d2 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -24,6 +24,7 @@ OBJS += hyp-stub.o endif GCOV_PROFILE := n +KASAN_SANITIZE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index d2bd0df2318d6..f35eb584a18aa 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -236,7 +236,11 @@ static int unwind_pop_register(struct unwind_ctrl_block *ctrl, if (*vsp >= (unsigned long *)ctrl->sp_high) return -URC_FAILURE; - ctrl->vrs[reg] = *(*vsp)++; + /* Use READ_ONCE_NOCHECK here to avoid this memory access + * from being tracked by KASAN. + */ + ctrl->vrs[reg] = READ_ONCE_NOCHECK(*(*vsp)); + (*vsp)++; return URC_OK; } diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 7cb1699fbfc4f..99699c32d8a5e 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -7,6 +7,7 @@ obj-y := extable.o fault.o init.o iomap.o obj-y += dma-mapping$(MMUEXT).o obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ mmap.o pgd.o mmu.o pageattr.o +KASAN_SANITIZE_mmu.o := n ifneq ($(CONFIG_MMU),y) obj-y += nommu.o @@ -16,6 +17,7 @@ endif obj-$(CONFIG_ARM_PTDUMP_CORE) += dump.o obj-$(CONFIG_ARM_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_MODULES) += proc-syms.o +KASAN_SANITIZE_physaddr.o := n obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 150ce6e6a5d31..b558bee0e1f6b 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -42,6 +42,8 @@ GCOV_PROFILE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n +KASAN_SANITIZE := n + # Force dependency $(obj)/vdso.o : $(obj)/vdso.so -- GitLab From d6d51a96c7d63b7450860a3037f2d62388286a52 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:52:08 +0100 Subject: [PATCH 0019/1894] ARM: 9014/2: Replace string mem* functions for KASan Functions like memset()/memmove()/memcpy() do a lot of memory accesses. If a bad pointer is passed to one of these functions it is important to catch this. Compiler instrumentation cannot do this since these functions are written in assembly. KASan replaces these memory functions with instrumented variants. The original functions are declared as weak symbols so that the strong definitions in mm/kasan/kasan.c can replace them. The original functions have aliases with a '__' prefix in their name, so we can call the non-instrumented variant if needed. We must use __memcpy()/__memset() in place of memcpy()/memset() when we copy .data to RAM and when we clear .bss, because kasan_early_init cannot be called before the initialization of .data and .bss. For the kernel compression and EFI libstub's custom string libraries we need a special quirk: even if these are built without KASan enabled, they rely on the global headers for their custom string libraries, which means that e.g. memcpy() will be defined to __memcpy() and we get link failures. Since these implementations are written i C rather than assembly we use e.g. __alias(memcpy) to redirected any users back to the local implementation. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Russell King - ARM Linux Signed-off-by: Ahmad Fatoum Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/boot/compressed/string.c | 19 +++++++++++++++++++ arch/arm/include/asm/string.h | 26 ++++++++++++++++++++++++++ arch/arm/kernel/head-common.S | 4 ++-- arch/arm/lib/memcpy.S | 3 +++ arch/arm/lib/memmove.S | 5 ++++- arch/arm/lib/memset.S | 3 +++ 6 files changed, 57 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c index ade5079bebbf9..8c0fa276d9946 100644 --- a/arch/arm/boot/compressed/string.c +++ b/arch/arm/boot/compressed/string.c @@ -7,6 +7,25 @@ #include +/* + * The decompressor is built without KASan but uses the same redirects as the + * rest of the kernel when CONFIG_KASAN is enabled, defining e.g. memcpy() + * to __memcpy() but since we are not linking with the main kernel string + * library in the decompressor, that will lead to link failures. + * + * Undefine KASan's versions, define the wrapped functions and alias them to + * the right names so that when e.g. __memcpy() appear in the code, it will + * still be linked to this local version of memcpy(). + */ +#ifdef CONFIG_KASAN +#undef memcpy +#undef memmove +#undef memset +void *__memcpy(void *__dest, __const void *__src, size_t __n) __alias(memcpy); +void *__memmove(void *__dest, __const void *__src, size_t count) __alias(memmove); +void *__memset(void *s, int c, size_t count) __alias(memset); +#endif + void *memcpy(void *__dest, __const void *__src, size_t __n) { int i = 0; diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h index 111a1d8a41ddf..6c607c68f3ad7 100644 --- a/arch/arm/include/asm/string.h +++ b/arch/arm/include/asm/string.h @@ -5,6 +5,9 @@ /* * We don't do inline string functions, since the * optimised inline asm versions are not small. + * + * The __underscore versions of some functions are for KASan to be able + * to replace them with instrumented versions. */ #define __HAVE_ARCH_STRRCHR @@ -15,15 +18,18 @@ extern char * strchr(const char * s, int c); #define __HAVE_ARCH_MEMCPY extern void * memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *dest, const void *src, __kernel_size_t n); #define __HAVE_ARCH_MEMMOVE extern void * memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *dest, const void *src, __kernel_size_t n); #define __HAVE_ARCH_MEMCHR extern void * memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void * memset(void *, int, __kernel_size_t); +extern void *__memset(void *s, int c, __kernel_size_t n); #define __HAVE_ARCH_MEMSET32 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); @@ -39,4 +45,24 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) return __memset64(p, v, n * 8, v >> 32); } +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * must use non-instrumented versions of the mem* + * functions named __memcpy() etc. All such kernel code has + * been tagged with KASAN_SANITIZE_file.o = n, which means + * that the address sanitization argument isn't passed to the + * compiler, and __SANITIZE_ADDRESS__ is not set. As a result + * these defines kick in. + */ +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif + #endif diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 4a3982812a401..6840c7c60a858 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -95,7 +95,7 @@ __mmap_switched: THUMB( ldmia r4!, {r0, r1, r2, r3} ) THUMB( mov sp, r3 ) sub r2, r2, r1 - bl memcpy @ copy .data to RAM + bl __memcpy @ copy .data to RAM #endif ARM( ldmia r4!, {r0, r1, sp} ) @@ -103,7 +103,7 @@ __mmap_switched: THUMB( mov sp, r3 ) sub r2, r1, r0 mov r1, #0 - bl memset @ clear .bss + bl __memset @ clear .bss ldmia r4, {r0, r1, r2, r3} str r9, [r0] @ Save processor ID diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 09a333153dc66..ad4625d16e117 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -58,6 +58,8 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +.weak memcpy +ENTRY(__memcpy) ENTRY(mmiocpy) ENTRY(memcpy) @@ -65,3 +67,4 @@ ENTRY(memcpy) ENDPROC(memcpy) ENDPROC(mmiocpy) +ENDPROC(__memcpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index b50e5770fb44d..fd123ea5a5a4a 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -24,12 +24,14 @@ * occurring in the opposite direction. */ +.weak memmove +ENTRY(__memmove) ENTRY(memmove) UNWIND( .fnstart ) subs ip, r0, r1 cmphi r2, ip - bls memcpy + bls __memcpy stmfd sp!, {r0, r4, lr} UNWIND( .fnend ) @@ -222,3 +224,4 @@ ENTRY(memmove) 18: backward_copy_shift push=24 pull=8 ENDPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 6ca4535c47fb6..0e7ff0423f50b 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -13,6 +13,8 @@ .text .align 5 +.weak memset +ENTRY(__memset) ENTRY(mmioset) ENTRY(memset) UNWIND( .fnstart ) @@ -132,6 +134,7 @@ UNWIND( .fnstart ) UNWIND( .fnend ) ENDPROC(memset) ENDPROC(mmioset) +ENDPROC(__memset) ENTRY(__memset32) UNWIND( .fnstart ) -- GitLab From c12366ba441da2f6f2b915410aca2b5b39c16514 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:53:46 +0100 Subject: [PATCH 0020/1894] ARM: 9015/2: Define the virtual space of KASan's shadow region Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB addressable by a 32bit architecture) out of the virtual address space to use as shadow memory for KASan as follows: +----+ 0xffffffff | | | | |-> Static kernel image (vmlinux) BSS and page table | |/ +----+ PAGE_OFFSET | | | | |-> Loadable kernel modules virtual address space area | |/ +----+ MODULES_VADDR = KASAN_SHADOW_END | | | | |-> The shadow area of kernel virtual address. | |/ +----+-> TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the | | shadow address of MODULES_VADDR | | | | | | | | |-> The user space area in lowmem. The kernel address | | | sanitizer do not use this space, nor does it map it. | | | | | | | | | | | | | |/ ------ 0 0 .. TASK_SIZE is the memory that can be used by shared userspace/kernelspace. It us used for userspace processes and for passing parameters and memory buffers in system calls etc. We do not need to shadow this area. KASAN_SHADOW_START: This value begins with the MODULE_VADDR's shadow address. It is the start of kernel virtual space. Since we have modules to load, we need to cover also that area with shadow memory so we can find memory bugs in modules. KASAN_SHADOW_END This value is the 0x100000000's shadow address: the mapping that would be after the end of the kernel memory at 0xffffffff. It is the end of kernel address sanitizer shadow area. It is also the start of the module area. KASAN_SHADOW_OFFSET: This value is used to map an address to the corresponding shadow address by the following formula: shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; As you would expect, >> 3 is equal to dividing by 8, meaning each byte in the shadow memory covers 8 bytes of kernel memory, so one bit shadow memory per byte of kernel memory is used. The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending on the VMSPLIT layout of the system: the kernel and userspace can split up lowmem in different ways according to needs, so we calculate the shadow offset depending on this. When kasan is enabled, the definition of TASK_SIZE is not an 8-bit rotated constant, so we need to modify the TASK_SIZE access code in the *.s file. The kernel and modules may use different amounts of memory, according to the VMSPLIT configuration, which in turn determines the PAGE_OFFSET. We use the following KASAN_SHADOW_OFFSETs depending on how the virtual memory is split up: - 0x1f000000 if we have 1G userspace / 3G kernelspace split: - The kernel address space is 3G (0xc0000000) - PAGE_OFFSET is then set to 0x40000000 so the kernel static image (vmlinux) uses addresses 0x40000000 .. 0xffffffff - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0x3f000000 so the modules use addresses 0x3f000000 .. 0x3fffffff - So the addresses 0x3f000000 .. 0xffffffff need to be covered with shadow memory. That is 0xc1000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x18200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0x26e00000, to KASAN_SHADOW_END at 0x3effffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0x3f000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0x26e00000 = (0x3f000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0x26e00000 - (0x3f000000 >> 3) KASAN_SHADOW_OFFSET = 0x26e00000 - 0x07e00000 KASAN_SHADOW_OFFSET = 0x1f000000 - 0x5f000000 if we have 2G userspace / 2G kernelspace split: - The kernel space is 2G (0x80000000) - PAGE_OFFSET is set to 0x80000000 so the kernel static image uses 0x80000000 .. 0xffffffff. - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0x7f000000 so the modules use addresses 0x7f000000 .. 0x7fffffff - So the addresses 0x7f000000 .. 0xffffffff need to be covered with shadow memory. That is 0x81000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x10200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0x6ee00000, to KASAN_SHADOW_END at 0x7effffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0x7f000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0x6ee00000 = (0x7f000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0x6ee00000 - (0x7f000000 >> 3) KASAN_SHADOW_OFFSET = 0x6ee00000 - 0x0fe00000 KASAN_SHADOW_OFFSET = 0x5f000000 - 0x9f000000 if we have 3G userspace / 1G kernelspace split, and this is the default split for ARM: - The kernel address space is 1GB (0x40000000) - PAGE_OFFSET is set to 0xc0000000 so the kernel static image uses 0xc0000000 .. 0xffffffff. - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0xbf000000 so the modules use addresses 0xbf000000 .. 0xbfffffff - So the addresses 0xbf000000 .. 0xffffffff need to be covered with shadow memory. That is 0x41000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x08200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0xb6e00000, to KASAN_SHADOW_END at 0xbfffffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0xbf000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0xb6e00000 = (0xbf000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0xb6e00000 - (0xbf000000 >> 3) KASAN_SHADOW_OFFSET = 0xb6e00000 - 0x17e00000 KASAN_SHADOW_OFFSET = 0x9f000000 - 0x8f000000 if we have 3G userspace / 1G kernelspace with full 1 GB low memory (VMSPLIT_3G_OPT): - The kernel address space is 1GB (0x40000000) - PAGE_OFFSET is set to 0xb0000000 so the kernel static image uses 0xb0000000 .. 0xffffffff. - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0xaf000000 so the modules use addresses 0xaf000000 .. 0xaffffff - So the addresses 0xaf000000 .. 0xffffffff need to be covered with shadow memory. That is 0x51000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x0a200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0xa4e00000, to KASAN_SHADOW_END at 0xaeffffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0xaf000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0xa4e00000 = (0xaf000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0xa4e00000 - (0xaf000000 >> 3) KASAN_SHADOW_OFFSET = 0xa4e00000 - 0x15e00000 KASAN_SHADOW_OFFSET = 0x8f000000 - The default value of 0xffffffff for KASAN_SHADOW_OFFSET is an error value. We should always match one of the above shadow offsets. When we do this, TASK_SIZE will sometimes get a bit odd values that will not fit into immediate mov assembly instructions. To account for this, we need to rewrite some assembly using TASK_SIZE like this: - mov r1, #TASK_SIZE + ldr r1, =TASK_SIZE or - cmp r4, #TASK_SIZE + ldr r0, =TASK_SIZE + cmp r4, r0 this is done to avoid the immediate #TASK_SIZE that need to fit into a limited number of bits. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Cc: Mike Rapoport Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Ard Biesheuvel Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- Documentation/arm/memory.rst | 5 ++ arch/arm/Kconfig | 9 ++++ arch/arm/include/asm/kasan_def.h | 81 ++++++++++++++++++++++++++++++ arch/arm/include/asm/memory.h | 5 ++ arch/arm/include/asm/uaccess-asm.h | 2 +- arch/arm/kernel/entry-armv.S | 3 +- arch/arm/kernel/entry-common.S | 9 ++-- arch/arm/mm/mmu.c | 18 +++++++ 8 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 arch/arm/include/asm/kasan_def.h diff --git a/Documentation/arm/memory.rst b/Documentation/arm/memory.rst index 34bb23c44a710..0cb1e29388234 100644 --- a/Documentation/arm/memory.rst +++ b/Documentation/arm/memory.rst @@ -77,6 +77,11 @@ MODULES_VADDR MODULES_END-1 Kernel module space Kernel modules inserted via insmod are placed here using dynamic mappings. +TASK_SIZE MODULES_VADDR-1 KASAn shadow memory when KASan is in use. + The range from MODULES_VADDR to the top + of the memory is shadowed here with 1 bit + per byte of memory. + 00001000 TASK_SIZE-1 User space mappings Per-thread mappings are placed here via the mmap() system call. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b505..194032568b7a9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1323,6 +1323,15 @@ config PAGE_OFFSET default 0xB0000000 if VMSPLIT_3G_OPT default 0xC0000000 +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0x1f000000 if PAGE_OFFSET=0x40000000 + default 0x5f000000 if PAGE_OFFSET=0x80000000 + default 0x9f000000 if PAGE_OFFSET=0xC0000000 + default 0x8f000000 if PAGE_OFFSET=0xB0000000 + default 0xffffffff + config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm/include/asm/kasan_def.h b/arch/arm/include/asm/kasan_def.h new file mode 100644 index 0000000000000..5739605aa7cf0 --- /dev/null +++ b/arch/arm/include/asm/kasan_def.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/arm/include/asm/kasan_def.h + * + * Copyright (c) 2018 Huawei Technologies Co., Ltd. + * + * Author: Abbott Liu + */ + +#ifndef __ASM_KASAN_DEF_H +#define __ASM_KASAN_DEF_H + +#ifdef CONFIG_KASAN + +/* + * Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for + * the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB + * addressable by a 32bit architecture) out of the virtual address + * space to use as shadow memory for KASan as follows: + * + * +----+ 0xffffffff + * | | \ + * | | |-> Static kernel image (vmlinux) BSS and page table + * | |/ + * +----+ PAGE_OFFSET + * | | \ + * | | |-> Loadable kernel modules virtual address space area + * | |/ + * +----+ MODULES_VADDR = KASAN_SHADOW_END + * | | \ + * | | |-> The shadow area of kernel virtual address. + * | |/ + * +----+-> TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the + * | |\ shadow address of MODULES_VADDR + * | | | + * | | | + * | | |-> The user space area in lowmem. The kernel address + * | | | sanitizer do not use this space, nor does it map it. + * | | | + * | | | + * | | | + * | | | + * | |/ + * ------ 0 + * + * 1) KASAN_SHADOW_START + * This value begins with the MODULE_VADDR's shadow address. It is the + * start of kernel virtual space. Since we have modules to load, we need + * to cover also that area with shadow memory so we can find memory + * bugs in modules. + * + * 2) KASAN_SHADOW_END + * This value is the 0x100000000's shadow address: the mapping that would + * be after the end of the kernel memory at 0xffffffff. It is the end of + * kernel address sanitizer shadow area. It is also the start of the + * module area. + * + * 3) KASAN_SHADOW_OFFSET: + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * As you would expect, >> 3 is equal to dividing by 8, meaning each + * byte in the shadow memory covers 8 bytes of kernel memory, so one + * bit shadow memory per byte of kernel memory is used. + * + * The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending + * on the VMSPLIT layout of the system: the kernel and userspace can + * split up lowmem in different ways according to needs, so we calculate + * the shadow offset depending on this. + */ + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_END ((UL(1) << (32 - KASAN_SHADOW_SCALE_SHIFT)) \ + + KASAN_SHADOW_OFFSET) +#define KASAN_SHADOW_START ((KASAN_SHADOW_END >> 3) + KASAN_SHADOW_OFFSET) + +#endif +#endif diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index bb79e52aeb904..598dbdca2017f 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,6 +18,7 @@ #ifdef CONFIG_NEED_MACH_MEMORY_H #include #endif +#include /* PAGE_OFFSET - the virtual address of the start of the kernel image */ #define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) @@ -28,7 +29,11 @@ * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area */ +#ifndef CONFIG_KASAN #define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M)) +#else +#define TASK_SIZE (KASAN_SHADOW_START) +#endif #define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) /* diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h index 907571fd05c65..e6eb7a2aaf1e3 100644 --- a/arch/arm/include/asm/uaccess-asm.h +++ b/arch/arm/include/asm/uaccess-asm.h @@ -85,7 +85,7 @@ */ .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable ldr \tmp1, [\tsk, #TI_ADDR_LIMIT] - mov \tmp2, #TASK_SIZE + ldr \tmp2, =TASK_SIZE str \tmp2, [\tsk, #TI_ADDR_LIMIT] DACR( mrc p15, 0, \tmp0, c3, c0, 0) DACR( str \tmp0, [sp, #SVC_DACR]) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 55a47df047738..c4220f51fcf3a 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -427,7 +427,8 @@ ENDPROC(__fiq_abt) @ if it was interrupted in a critical region. Here we @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. - cmp r4, #TASK_SIZE + ldr r0, =TASK_SIZE + cmp r4, r0 blhs kuser_cmpxchg64_fixup #endif #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 271cb8a1eba1e..fee279e28a72e 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -50,7 +50,8 @@ __ret_fast_syscall: UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK @@ -87,7 +88,8 @@ __ret_fast_syscall: #endif disable_irq_notrace @ disable interrupts ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK @@ -128,7 +130,8 @@ ret_slow_syscall: disable_irq_notrace @ disable interrupts ENTRY(ret_to_user_from_irq) ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index fa259825310c5..c06ebfbc48c4a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -1255,8 +1256,25 @@ static inline void prepare_page_table(void) /* * Clear out all the mappings below the kernel image. */ +#ifdef CONFIG_KASAN + /* + * KASan's shadow memory inserts itself between the TASK_SIZE + * and MODULES_VADDR. Do not clear the KASan shadow memory mappings. + */ + for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + /* + * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes + * equal to MODULES_VADDR and then we exit the pmd clearing. If we + * are using a thumb-compiled kernel, there there will be 8MB more + * to clear as KASan always offset to 16 MB below MODULES_VADDR. + */ + for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +#else for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); +#endif #ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */ -- GitLab From 5615f69bc2097452ecc954f5264d784e158d6801 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:55:16 +0100 Subject: [PATCH 0021/1894] ARM: 9016/2: Initialize the mapping of KASan shadow memory This patch initializes KASan shadow region's page table and memory. There are two stage for KASan initializing: 1. At early boot stage the whole shadow region is mapped to just one physical page (kasan_zero_page). It is finished by the function kasan_early_init which is called by __mmap_switched(arch/arm/kernel/ head-common.S) 2. After the calling of paging_init, we use kasan_zero_page as zero shadow for some memory that KASan does not need to track, and we allocate a new shadow space for the other memory that KASan need to track. These issues are finished by the function kasan_init which is call by setup_arch. When using KASan we also need to increase the THREAD_SIZE_ORDER from 1 to 2 as the extra calls for shadow memory uses quite a bit of stack. As we need to make a temporary copy of the PGD when setting up shadow memory we create a helpful PGD_SIZE definition for both LPAE and non-LPAE setups. The KASan core code unconditionally calls pud_populate() so this needs to be changed from BUG() to do {} while (0) when building with KASan enabled. After the initial development by Andre Ryabinin several modifications have been made to this code: Abbott Liu - Add support ARM LPAE: If LPAE is enabled, KASan shadow region's mapping table need be copied in the pgd_alloc() function. - Change kasan_pte_populate,kasan_pmd_populate,kasan_pud_populate, kasan_pgd_populate from .meminit.text section to .init.text section. Reported by Florian Fainelli Linus Walleij : - Drop the custom mainpulation of TTBR0 and just use cpu_switch_mm() to switch the pgd table. - Adopt to handle 4th level page tabel folding. - Rewrite the entire page directory and page entry initialization sequence to be recursive based on ARM64:s kasan_init.c. Ard Biesheuvel : - Necessary underlying fixes. - Crucial bug fixes to the memory set-up code. Co-developed-by: Andrey Ryabinin Co-developed-by: Abbott Liu Co-developed-by: Ard Biesheuvel Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Cc: Mike Rapoport Acked-by: Mike Rapoport Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Russell King - ARM Linux Reported-by: Florian Fainelli Signed-off-by: Andrey Ryabinin Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/include/asm/kasan.h | 33 ++++ arch/arm/include/asm/pgalloc.h | 8 +- arch/arm/include/asm/thread_info.h | 8 + arch/arm/kernel/head-common.S | 3 + arch/arm/kernel/setup.c | 2 + arch/arm/mm/Makefile | 3 + arch/arm/mm/kasan_init.c | 291 +++++++++++++++++++++++++++++ arch/arm/mm/pgd.c | 16 +- 8 files changed, 362 insertions(+), 2 deletions(-) create mode 100644 arch/arm/include/asm/kasan.h create mode 100644 arch/arm/mm/kasan_init.c diff --git a/arch/arm/include/asm/kasan.h b/arch/arm/include/asm/kasan.h new file mode 100644 index 0000000000000..303c35df3135f --- /dev/null +++ b/arch/arm/include/asm/kasan.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/arm/include/asm/kasan.h + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * + */ + +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifdef CONFIG_KASAN + +#include + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +/* + * The compiler uses a shadow offset assuming that addresses start + * from 0. Kernel addresses don't start from 0, so shadow + * for kernel really starts from 'compiler's shadow offset' + + * ('kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT) + */ + +asmlinkage void kasan_early_init(void); +extern void kasan_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 15f4674715f84..fdee1f04f4f3d 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -21,6 +21,7 @@ #define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL)) #ifdef CONFIG_ARM_LPAE +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { @@ -28,14 +29,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) } #else /* !CONFIG_ARM_LPAE */ +#define PGD_SIZE (PAGE_SIZE << 2) /* * Since we have only two-level page tables, these are trivial */ #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(mm, pmd) do { } while (0) +#ifdef CONFIG_KASAN +/* The KASan core unconditionally calls pud_populate() on all architectures */ +#define pud_populate(mm,pmd,pte) do { } while (0) +#else #define pud_populate(mm,pmd,pte) BUG() - +#endif #endif /* CONFIG_ARM_LPAE */ extern pgd_t *pgd_alloc(struct mm_struct *mm); diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 536b6b979f634..56fae7861fd38 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -13,7 +13,15 @@ #include #include +#ifdef CONFIG_KASAN +/* + * KASan uses a lot of extra stack space so the thread size order needs to + * be increased. + */ +#define THREAD_SIZE_ORDER 2 +#else #define THREAD_SIZE_ORDER 1 +#endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_START_SP (THREAD_SIZE - 8) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 6840c7c60a858..89c80154b9efc 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -111,6 +111,9 @@ __mmap_switched: str r8, [r2] @ Save atags pointer cmp r3, #0 strne r10, [r3] @ Save control register values +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif mov lr, #0 b start_kernel ENDPROC(__mmap_switched) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 694aa6b4bd03d..f4bd8b6542550 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "atags.h" @@ -1145,6 +1146,7 @@ void __init setup_arch(char **cmdline_p) early_ioremap_reset(); paging_init(mdesc); + kasan_init(); request_standard_resources(mdesc); if (mdesc->restart) diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 99699c32d8a5e..4536159bc8faa 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -113,3 +113,6 @@ obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o obj-$(CONFIG_CACHE_UNIPHIER) += cache-uniphier.o + +KASAN_SANITIZE_kasan_init.o := n +obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c new file mode 100644 index 0000000000000..9c348042a7244 --- /dev/null +++ b/arch/arm/mm/kasan_init.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file contains kasan initialization code for ARM. + * + * Copyright (c) 2018 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * Author: Linus Walleij + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mm.h" + +static pgd_t tmp_pgd_table[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss; + +static __init void *kasan_alloc_block(size_t size) +{ + return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE); +} + +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + do { + pte_t entry; + void *p; + + next = addr + PAGE_SIZE; + + if (!early) { + if (!pte_none(READ_ONCE(*ptep))) + continue; + + p = kasan_alloc_block(PAGE_SIZE); + if (!p) { + panic("%s failed to allocate shadow page for address 0x%lx\n", + __func__, addr); + return; + } + memset(p, KASAN_SHADOW_INIT, PAGE_SIZE); + entry = pfn_pte(virt_to_pfn(p), + __pgprot(pgprot_val(PAGE_KERNEL))); + } else if (pte_none(READ_ONCE(*ptep))) { + /* + * The early shadow memory is mapping all KASan + * operations to one and the same page in memory, + * "kasan_early_shadow_page" so that the instrumentation + * will work on a scratch area until we can set up the + * proper KASan shadow memory. + */ + entry = pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(_L_PTE_DEFAULT | L_PTE_DIRTY | L_PTE_XN)); + } else { + /* + * Early shadow mappings are PMD_SIZE aligned, so if the + * first entry is already set, they must all be set. + */ + return; + } + + set_pte_at(&init_mm, addr, ptep, entry); + } while (ptep++, addr = next, addr != end); +} + +/* + * The pmd (page middle directory) is only used on LPAE + */ +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pmd_t *pmdp = pmd_offset(pudp, addr); + + do { + if (pmd_none(*pmdp)) { + /* + * We attempt to allocate a shadow block for the PMDs + * used by the PTEs for this address if it isn't already + * allocated. + */ + void *p = early ? kasan_early_shadow_pte : + kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pmd_populate_kernel(&init_mm, pmdp, p); + flush_pmd_entry(pmdp); + } + + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, early); + } while (pmdp++, addr = next, addr != end); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + bool early) +{ + unsigned long next; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + + pgdp = pgd_offset_k(addr); + + do { + /* + * Allocate and populate the shadow block of p4d folded into + * pud folded into pmd if it doesn't already exist + */ + if (!early && pgd_none(*pgdp)) { + void *p = kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pgd_populate(&init_mm, pgdp, p); + } + + next = pgd_addr_end(addr, end); + /* + * We just immediately jump over the p4d and pud page + * directories since we believe ARM32 will never gain four + * nor five level page tables. + */ + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); + + kasan_pmd_populate(pudp, addr, next, early); + } while (pgdp++, addr = next, addr != end); +} + +extern struct proc_info_list *lookup_processor_type(unsigned int); + +void __init kasan_early_init(void) +{ + struct proc_info_list *list; + + /* + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm/mm/proc-*.S + */ + list = lookup_processor_type(read_cpuid_id()); + if (list) { +#ifdef MULTI_CPU + processor = *list->proc; +#endif + } + + BUILD_BUG_ON((KASAN_SHADOW_END - (1UL << 29)) != KASAN_SHADOW_OFFSET); + /* + * We walk the page table and set all of the shadow memory to point + * to the scratch page. + */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, true); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start && start < end; start += PMD_SIZE) + pmd_clear(pmd_off_k(start)); +} + +static int __init create_mapping(void *start, void *end) +{ + void *shadow_start, *shadow_end; + + shadow_start = kasan_mem_to_shadow(start); + shadow_end = kasan_mem_to_shadow(end); + + pr_info("Mapping kernel virtual memory block: %px-%px at shadow: %px-%px\n", + start, end, shadow_start, shadow_end); + + kasan_pgd_populate((unsigned long)shadow_start & PAGE_MASK, + PAGE_ALIGN((unsigned long)shadow_end), false); + return 0; +} + +void __init kasan_init(void) +{ + phys_addr_t pa_start, pa_end; + u64 i; + + /* + * We are going to perform proper setup of shadow memory. + * + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code can't execute without shadow memory. + * + * To keep the early shadow memory MMU tables around while setting up + * the proper shadow memory, we copy swapper_pg_dir (the initial page + * table) to tmp_pgd_table and use that to keep the early shadow memory + * mapped until the full shadow setup is finished. Then we swap back + * to the proper swapper_pg_dir. + */ + + memcpy(tmp_pgd_table, swapper_pg_dir, sizeof(tmp_pgd_table)); +#ifdef CONFIG_ARM_LPAE + /* We need to be in the same PGD or this won't work */ + BUILD_BUG_ON(pgd_index(KASAN_SHADOW_START) != + pgd_index(KASAN_SHADOW_END)); + memcpy(tmp_pmd_table, + pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)), + sizeof(tmp_pmd_table)); + set_pgd(&tmp_pgd_table[pgd_index(KASAN_SHADOW_START)], + __pgd(__pa(tmp_pmd_table) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); +#endif + cpu_switch_mm(tmp_pgd_table, &init_mm); + local_flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)-1UL) + 1); + + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = __va(pa_start); + void *end = __va(pa_end); + + /* Do not attempt to shadow highmem */ + if (pa_start >= arm_lowmem_limit) { + pr_info("Skip highmem block at %pa-%pa\n", &pa_start, &pa_end); + continue; + } + if (pa_end > arm_lowmem_limit) { + pr_info("Truncating shadow for memory block at %pa-%pa to lowmem region at %pa\n", + &pa_start, &pa_end, &arm_lowmem_limit); + end = __va(arm_lowmem_limit); + } + if (start >= end) { + pr_info("Skipping invalid memory block %pa-%pa (virtual %p-%p)\n", + &pa_start, &pa_end, start, end); + continue; + } + + create_mapping(start, end); + } + + /* + * 1. The module global variables are in MODULES_VADDR ~ MODULES_END, + * so we need to map this area. + * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR + * ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't + * use kasan_populate_zero_shadow. + */ + create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE)); + + /* + * KAsan may reuse the contents of kasan_early_shadow_pte directly, so + * we should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte_at(&init_mm, KASAN_SHADOW_START + i*PAGE_SIZE, + &kasan_early_shadow_pte[i], + pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(pgprot_val(PAGE_KERNEL) + | L_PTE_RDONLY))); + + cpu_switch_mm(swapper_pg_dir, &init_mm); + local_flush_tlb_all(); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + pr_info("Kernel address sanitizer initialized\n"); + init_task.kasan_depth = 0; +} diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index c5e1b27046a88..f8e9bc58a84f0 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -66,7 +66,21 @@ pgd_t *pgd_alloc(struct mm_struct *mm) new_pmd = pmd_alloc(mm, new_pud, 0); if (!new_pmd) goto no_pmd; -#endif +#ifdef CONFIG_KASAN + /* + * Copy PMD table for KASAN shadow mappings. + */ + init_pgd = pgd_offset_k(TASK_SIZE); + init_p4d = p4d_offset(init_pgd, TASK_SIZE); + init_pud = pud_offset(init_p4d, TASK_SIZE); + init_pmd = pmd_offset(init_pud, TASK_SIZE); + new_pmd = pmd_offset(new_pud, TASK_SIZE); + memcpy(new_pmd, init_pmd, + (pmd_index(MODULES_VADDR) - pmd_index(TASK_SIZE)) + * sizeof(pmd_t)); + clean_dcache_area(new_pmd, PTRS_PER_PMD * sizeof(pmd_t)); +#endif /* CONFIG_KASAN */ +#endif /* CONFIG_LPAE */ if (!vectors_high()) { /* -- GitLab From 421015713b306e47af95d4d61cdfbd96d462e4cb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:56:18 +0100 Subject: [PATCH 0022/1894] ARM: 9017/2: Enable KASan for ARM This patch enables the kernel address sanitizer for ARM. XIP_KERNEL has not been tested and is therefore not allowed for now. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Acked-by: Dmitry Vyukov Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- Documentation/dev-tools/kasan.rst | 4 ++-- Documentation/features/debug/KASAN/arch-support.txt | 2 +- arch/arm/Kconfig | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index c09c9ca2ff1cb..3edd3946d4c7a 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -18,8 +18,8 @@ out-of-bounds accesses for global variables is only supported since Clang 11. Tag-based KASAN is only supported in Clang. -Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and -riscv architectures, and tag-based KASAN is supported only for arm64. +Currently generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390 +and riscv architectures, and tag-based KASAN is supported only for arm64. Usage ----- diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index c3fe9b266e7bd..b2288dc14b726 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -8,7 +8,7 @@ ----------------------- | alpha: | TODO | | arc: | TODO | - | arm: | TODO | + | arm: | ok | | arm64: | ok | | c6x: | TODO | | csky: | TODO | diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 194032568b7a9..2c5eb5c2b3101 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -67,6 +67,7 @@ config ARM select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT -- GitLab From fc2933c133744305236793025b00c2f7d258b687 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 14:20:55 +0100 Subject: [PATCH 0023/1894] ARM: 9020/1: mm: use correct section size macro to describe the FDT virtual address Commit 149a3ffe62b9dbc3 ("9012/1: move device tree mapping out of linear region") created a permanent, read-only section mapping of the device tree blob provided by the firmware, and added a set of macros to get the base and size of the virtually mapped FDT based on the physical address. However, while the mapping code uses the SECTION_SIZE macro correctly, the macros use PMD_SIZE instead, which means something entirely different on ARM when using short descriptors, and is therefore not the right quantity to use here. So replace PMD_SIZE with SECTION_SIZE. While at it, change the names of the macro and its parameter to clarify that it returns the virtual address of the start of the FDT, based on the physical address in memory. Tested-by: Joel Stanley Tested-by: Marek Szyprowski Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 6 +++--- arch/arm/kernel/setup.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 598dbdca2017f..38a163f501301 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -73,8 +73,8 @@ #define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) #define FDT_FIXED_BASE UL(0xff800000) -#define FDT_FIXED_SIZE (2 * PMD_SIZE) -#define FDT_VIRT_ADDR(physaddr) ((void *)(FDT_FIXED_BASE | (physaddr) % PMD_SIZE)) +#define FDT_FIXED_SIZE (2 * SECTION_SIZE) +#define FDT_VIRT_BASE(physbase) ((void *)(FDT_FIXED_BASE | (physbase) % SECTION_SIZE)) #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* @@ -116,7 +116,7 @@ extern unsigned long vectors_base; #define MODULES_VADDR PAGE_OFFSET #define XIP_VIRT_ADDR(physaddr) (physaddr) -#define FDT_VIRT_ADDR(physaddr) ((void *)(physaddr)) +#define FDT_VIRT_BASE(physbase) ((void *)(physbase)) #endif /* !CONFIG_MMU */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f4bd8b6542550..d32f7652a5bf4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1087,7 +1087,7 @@ void __init setup_arch(char **cmdline_p) void *atags_vaddr = NULL; if (__atags_pointer) - atags_vaddr = FDT_VIRT_ADDR(__atags_pointer); + atags_vaddr = FDT_VIRT_BASE(__atags_pointer); setup_processor(); if (atags_vaddr) { -- GitLab From 4e79f0211b473f8e1eab8211a9fd50cc41a3a061 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 21 Sep 2020 00:10:16 +0200 Subject: [PATCH 0024/1894] ARM: p2v: fix handling of LPAE translation in BE mode When running in BE mode on LPAE hardware with a PA-to-VA translation that exceeds 4 GB, we patch bits 39:32 of the offset into the wrong byte of the opcode. So fix that, by rotating the offset in r0 to the right by 8 bits, which will put the 8-bit immediate in bits 31:24. Note that this will also move bit #22 in its correct place when applying the rotation to the constant #0x400000. Fixes: d9a790df8e984 ("ARM: 7883/1: fix mov to mvn conversion in case of 64 bit phys_addr_t and BE") Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f8904227e7fdc..98c1e68bdfcbb 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -671,12 +671,8 @@ ARM_BE8(rev16 ip, ip) ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b bx lr -#else -#ifdef CONFIG_CPU_ENDIAN_BE8 - moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction #else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction -#endif b 2f 1: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -685,7 +681,7 @@ ARM_BE8(rev16 ip, ip) tst ip, #0x000f0000 @ check the rotation field orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 + orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 #else bic ip, ip, #0x000000ff tst ip, #0xf00 @ check the rotation field -- GitLab From 0b1674638a5c69cbace63278625c199100955490 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:23:39 +0300 Subject: [PATCH 0025/1894] ARM: assembler: introduce adr_l, ldr_l and str_l macros Like arm64, ARM supports position independent code sequences that produce symbol references with a greater reach than the ordinary adr/ldr instructions. Since on ARM, the adrl pseudo-instruction is only supported in ARM mode (and not at all when using Clang), having a adr_l macro like we do on arm64 is useful, and increases symmetry as well. Currently, we use open coded instruction sequences involving literals and arithmetic operations. Instead, we can use movw/movt pairs on v7 CPUs, circumventing the D-cache entirely. E.g., on v7+ CPUs, we can emit a PC-relative reference as follows: movw , #:lower16: - (1f + 8) movt , #:upper16: - (1f + 8) 1: add , , pc For older CPUs, we can emit the literal into a subsection, allowing it to be emitted out of line while retaining the ability to perform arithmetic on label offsets. E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows: ldr , 2f 1: add , , pc .subsection 1 2: .long - (1b + 8) .previous This is allowed by the assembler because, unlike ordinary sections, subsections are combined into a single section in the object file, and so the label references are not true cross-section references that are visible as relocations. (Subsections have been available in binutils since 2004 at least, so they should not cause any issues with older toolchains.) So use the above to implement the macros mov_l, adr_l, ldr_l and str_l, all of which will use movw/movt pairs on v7 and later CPUs, and use PC-relative literals otherwise. Reviewed-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/assembler.h | 84 ++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index feac2c8b86f29..72627c5fb3b2c 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #define _ASM_NOKPROBE(entry) #endif + .macro __adldst_l, op, reg, sym, tmp, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \tmp, .La\@ + .subsection 1 + .align 2 +.La\@: .long \sym - .Lpc\@ + .previous + .else + .ifnb \c + THUMB( ittt \c ) + .endif + movw\c \tmp, #:lower16:\sym - .Lpc\@ + movt\c \tmp, #:upper16:\sym - .Lpc\@ + .endif + +#ifndef CONFIG_THUMB2_KERNEL + .set .Lpc\@, . + 8 // PC bias + .ifc \op, add + add\c \reg, \tmp, pc + .else + \op\c \reg, [pc, \tmp] + .endif +#else +.Lb\@: add\c \tmp, \tmp, pc + /* + * In Thumb-2 builds, the PC bias depends on whether we are currently + * emitting into a .arm or a .thumb section. The size of the add opcode + * above will be 2 bytes when emitting in Thumb mode and 4 bytes when + * emitting in ARM mode, so let's use this to account for the bias. + */ + .set .Lpc\@, . + (. - .Lb\@) + + .ifnc \op, add + \op\c \reg, [\tmp] + .endif +#endif + .endm + + /* + * mov_l - move a constant value or [relocated] address into a register + */ + .macro mov_l, dst:req, imm:req + .if __LINUX_ARM_ARCH__ < 7 + ldr \dst, =\imm + .else + movw \dst, #:lower16:\imm + movt \dst, #:upper16:\imm + .endif + .endm + + /* + * adr_l - adr pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro adr_l, dst:req, sym:req, cond + __adldst_l add, \dst, \sym, \dst, \cond + .endm + + /* + * ldr_l - ldr pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro ldr_l, dst:req, sym:req, cond + __adldst_l ldr, \dst, \sym, \dst, \cond + .endm + + /* + * str_l - str pseudo-op with unlimited range + * + * @src: source register + * @sym: name of the symbol + * @tmp: mandatory scratch register + * @cond: conditional opcode suffix + */ + .macro str_l, src:req, sym:req, tmp:req, cond + __adldst_l str, \src, \sym, \tmp, \cond + .endm + #endif /* __ASM_ASSEMBLER_H__ */ -- GitLab From 22f2d23098f7d34fc5142531cffb241d14611684 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:24:37 +0300 Subject: [PATCH 0026/1894] ARM: module: add support for place relative relocations When using the new adr_l/ldr_l/str_l macros to refer to external symbols from modules, the linker may emit place relative ELF relocations that need to be fixed up by the module loader. So add support for these. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/elf.h | 5 +++++ arch/arm/kernel/module.c | 20 ++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index b078d992414b7..0ac62a54b73c2 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_NONE 0 #define R_ARM_PC24 1 #define R_ARM_ABS32 2 +#define R_ARM_REL32 3 #define R_ARM_CALL 28 #define R_ARM_JUMP24 29 #define R_ARM_TARGET1 38 @@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_PREL31 42 #define R_ARM_MOVW_ABS_NC 43 #define R_ARM_MOVT_ABS 44 +#define R_ARM_MOVW_PREL_NC 45 +#define R_ARM_MOVT_PREL 46 #define R_ARM_THM_CALL 10 #define R_ARM_THM_JUMP24 30 #define R_ARM_THM_MOVW_ABS_NC 47 #define R_ARM_THM_MOVT_ABS 48 +#define R_ARM_THM_MOVW_PREL_NC 49 +#define R_ARM_THM_MOVT_PREL 50 /* * These are used to set parameters in the core dumps. diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index e15444b25ca05..beac45e89ba64 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, *(u32 *)loc |= offset & 0x7fffffff; break; + case R_ARM_REL32: + *(u32 *)loc += sym->st_value - loc; + break; + case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL) offset >>= 16; tmp &= 0xfff0f000; @@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: upper = __mem_to_opcode_thumb16(*(u16 *)loc); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); @@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL) offset >>= 16; upper = (u16)((upper & 0xfbf0) | -- GitLab From eae78e1a97201a81a851342ad9659b60f61a3951 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 17:51:55 +0200 Subject: [PATCH 0027/1894] ARM: p2v: move patching code to separate assembler source file Move the phys2virt patching code into a separate .S file before doing some work on it. Suggested-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/head.S | 138 -------------------------------- arch/arm/kernel/phys2virt.S | 151 ++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 138 deletions(-) create mode 100644 arch/arm/kernel/phys2virt.S diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 89e5d864e9234..9e465efcc8b65 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o # This is executed very early using a temporary stack when no memory allocator # nor global data is available. Everything has to be allocated on the stack. diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 98c1e68bdfcbb..7e3f368090116 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -586,142 +586,4 @@ ENTRY(fixup_smp) ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -#ifdef __ARMEB__ -#define LOW_OFFSET 0x4 -#define HIGH_OFFSET 0x0 -#else -#define LOW_OFFSET 0x0 -#define HIGH_OFFSET 0x4 -#endif - -#ifdef CONFIG_ARM_PATCH_PHYS_VIRT - -/* __fixup_pv_table - patch the stub instructions with the delta between - * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and - * can be expressed by an immediate shifter operand. The stub instruction - * has a form of '(add|sub) rd, rn, #imm'. - */ - __HEAD -__fixup_pv_table: - adr r0, 1f - ldmia r0, {r3-r7} - mvn ip, #0 - subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET - add r4, r4, r3 @ adjust table start address - add r5, r5, r3 @ adjust table end address - add r6, r6, r3 @ adjust __pv_phys_pfn_offset address - add r7, r7, r3 @ adjust __pv_offset address - mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN - str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset - strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits - mov r6, r3, lsr #24 @ constant for add/sub instructions - teq r3, r6, lsl #24 @ must be 16MiB aligned -THUMB( it ne @ cross section branch ) - bne __error - str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits - b __fixup_a_pv_table -ENDPROC(__fixup_pv_table) - - .align -1: .long . - .long __pv_table_begin - .long __pv_table_end -2: .long __pv_phys_pfn_offset - .long __pv_offset - - .text -__fixup_a_pv_table: - adr r0, 3f - ldr r6, [r0] - add r6, r6, r3 - ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word - ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word - mov r6, r6, lsr #24 - cmn r0, #1 -#ifdef CONFIG_THUMB2_KERNEL - moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq 2f - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 - b 2f -1: add r7, r3 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne 2f - ldrh ip, [r7] -ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 -ARM_BE8(rev16 ip, ip) - strh ip, [r7] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - bx lr -#else - moveq r0, #0x400000 @ set bit 22, mov to mvn instruction - b 2f -1: ldr ip, [r7, r3] -#ifdef CONFIG_CPU_ENDIAN_BE8 - @ in BE8, we load data in BE, but instructions still in LE - bic ip, ip, #0xff000000 - tst ip, #0x000f0000 @ check the rotation field - orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 - biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 -#else - bic ip, ip, #0x000000ff - tst ip, #0xf00 @ check the rotation field - orrne ip, ip, r6 @ mask in offset bits 31-24 - biceq ip, ip, #0x400000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 -#endif - str ip, [r7, r3] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - ret lr -#endif -ENDPROC(__fixup_a_pv_table) - - .align -3: .long __pv_offset - -ENTRY(fixup_pv_table) - stmfd sp!, {r4 - r7, lr} - mov r3, #0 @ no offset - mov r4, r0 @ r0 = table start - add r5, r0, r1 @ r1 = table size - bl __fixup_a_pv_table - ldmfd sp!, {r4 - r7, pc} -ENDPROC(fixup_pv_table) - - .data - .align 2 - .globl __pv_phys_pfn_offset - .type __pv_phys_pfn_offset, %object -__pv_phys_pfn_offset: - .word 0 - .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset - - .globl __pv_offset - .type __pv_offset, %object -__pv_offset: - .quad 0 - .size __pv_offset, . -__pv_offset -#endif - #include "head-common.S" diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S new file mode 100644 index 0000000000000..7c17fbfeeedd1 --- /dev/null +++ b/arch/arm/kernel/phys2virt.S @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 1994-2002 Russell King + * Copyright (c) 2003 ARM Limited + * All Rights Reserved + */ + +#include +#include +#include +#include + +#ifdef __ARMEB__ +#define LOW_OFFSET 0x4 +#define HIGH_OFFSET 0x0 +#else +#define LOW_OFFSET 0x0 +#define HIGH_OFFSET 0x4 +#endif + +/* + * __fixup_pv_table - patch the stub instructions with the delta between + * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be + * 16MiB aligned. + * + * Called from head.S, which expects the following registers to be preserved: + * r1 = machine no, r2 = atags or dtb, + * r8 = phys_offset, r9 = cpuid, r10 = procinfo + */ + __HEAD +ENTRY(__fixup_pv_table) + adr r0, 1f + ldmia r0, {r3-r7} + mvn ip, #0 + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET + add r4, r4, r3 @ adjust table start address + add r5, r5, r3 @ adjust table end address + add r6, r6, r3 @ adjust __pv_phys_pfn_offset address + add r7, r7, r3 @ adjust __pv_offset address + mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN + str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits + mov r6, r3, lsr #24 @ constant for add/sub instructions + teq r3, r6, lsl #24 @ must be 16MiB aligned + bne 0f + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits + b __fixup_a_pv_table +0: mov r0, r0 @ deadloop on error + b 0b +ENDPROC(__fixup_pv_table) + + .align +1: .long . + .long __pv_table_begin + .long __pv_table_end +2: .long __pv_phys_pfn_offset + .long __pv_offset + + .text +__fixup_a_pv_table: + adr r0, 3f + ldr r6, [r0] + add r6, r6, r3 + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word + mov r6, r6, lsr #24 + cmn r0, #1 +#ifdef CONFIG_THUMB2_KERNEL + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction + lsls r6, #24 + beq 2f + clz r7, r6 + lsr r6, #24 + lsl r6, r7 + bic r6, #0x0080 + lsrs r7, #1 + orrcs r6, #0x0080 + orr r6, r6, r7, lsl #12 + orr r6, #0x4000 + b 2f +1: add r7, r3 + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + tst ip, #0x4000 + and ip, #0x8f00 + orrne ip, r6 @ mask in offset bits 31-24 + orreq ip, r0 @ mask in offset bits 7-0 +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] + bne 2f + ldrh ip, [r7] +ARM_BE8(rev16 ip, ip) + bic ip, #0x20 + orr ip, ip, r0, lsr #16 +ARM_BE8(rev16 ip, ip) + strh ip, [r7] +2: cmp r4, r5 + ldrcc r7, [r4], #4 @ use branch for delay slot + bcc 1b + bx lr +#else + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + b 2f +1: ldr ip, [r7, r3] +#ifdef CONFIG_CPU_ENDIAN_BE8 + @ in BE8, we load data in BE, but instructions still in LE + bic ip, ip, #0xff000000 + tst ip, #0x000f0000 @ check the rotation field + orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 + biceq ip, ip, #0x00004000 @ clear bit 22 + orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 +#else + bic ip, ip, #0x000000ff + tst ip, #0xf00 @ check the rotation field + orrne ip, ip, r6 @ mask in offset bits 31-24 + biceq ip, ip, #0x400000 @ clear bit 22 + orreq ip, ip, r0 @ mask in offset bits 7-0 +#endif + str ip, [r7, r3] +2: cmp r4, r5 + ldrcc r7, [r4], #4 @ use branch for delay slot + bcc 1b + ret lr +#endif +ENDPROC(__fixup_a_pv_table) + + .align +3: .long __pv_offset + +ENTRY(fixup_pv_table) + stmfd sp!, {r4 - r7, lr} + mov r3, #0 @ no offset + mov r4, r0 @ r0 = table start + add r5, r0, r1 @ r1 = table size + bl __fixup_a_pv_table + ldmfd sp!, {r4 - r7, pc} +ENDPROC(fixup_pv_table) + + .data + .align 2 + .globl __pv_phys_pfn_offset + .type __pv_phys_pfn_offset, %object +__pv_phys_pfn_offset: + .word 0 + .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset + + .globl __pv_offset + .type __pv_offset, %object +__pv_offset: + .quad 0 + .size __pv_offset, . -__pv_offset -- GitLab From 4b16421c3e955f440eb45546db6ce33d47f29c78 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Sep 2020 21:29:36 +0300 Subject: [PATCH 0028/1894] ARM: p2v: factor out shared loop processing The ARM and Thumb2 versions of the p2v patching loop have some overlap at the end of the loop, so factor that out. As numeric labels are not required to be unique, and may therefore be ambiguous, use named local labels for the start and end of the loop instead. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/phys2virt.S | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 7c17fbfeeedd1..8fb1f7bcc720d 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -68,7 +68,7 @@ __fixup_a_pv_table: #ifdef CONFIG_THUMB2_KERNEL moveq r0, #0x200000 @ set bit 21, mov to mvn instruction lsls r6, #24 - beq 2f + beq .Lnext clz r7, r6 lsr r6, #24 lsl r6, r7 @@ -77,8 +77,8 @@ __fixup_a_pv_table: orrcs r6, #0x0080 orr r6, r6, r7, lsl #12 orr r6, #0x4000 - b 2f -1: add r7, r3 + b .Lnext +.Lloop: add r7, r3 ldrh ip, [r7, #2] ARM_BE8(rev16 ip, ip) tst ip, #0x4000 @@ -87,21 +87,17 @@ ARM_BE8(rev16 ip, ip) orreq ip, r0 @ mask in offset bits 7-0 ARM_BE8(rev16 ip, ip) strh ip, [r7, #2] - bne 2f + bne .Lnext ldrh ip, [r7] ARM_BE8(rev16 ip, ip) bic ip, #0x20 orr ip, ip, r0, lsr #16 ARM_BE8(rev16 ip, ip) strh ip, [r7] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - bx lr #else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction - b 2f -1: ldr ip, [r7, r3] + b .Lnext +.Lloop: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @ in BE8, we load data in BE, but instructions still in LE bic ip, ip, #0xff000000 @@ -117,11 +113,13 @@ ARM_BE8(rev16 ip, ip) orreq ip, ip, r0 @ mask in offset bits 7-0 #endif str ip, [r7, r3] -2: cmp r4, r5 +#endif + +.Lnext: + cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b + bcc .Lloop ret lr -#endif ENDPROC(__fixup_a_pv_table) .align -- GitLab From 7a94849e81b5c10e71f0a555300313c2789d9b0d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Sep 2020 21:36:46 +0300 Subject: [PATCH 0029/1894] ARM: p2v: factor out BE8 handling The big and little endian versions of the ARM p2v patching routine only differ in the values of the constants, so factor those out into macros so that we only have one version of the logic sequence to maintain. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/phys2virt.S | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 8fb1f7bcc720d..5031e5a2e78bf 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -95,23 +95,25 @@ ARM_BE8(rev16 ip, ip) ARM_BE8(rev16 ip, ip) strh ip, [r7] #else - moveq r0, #0x400000 @ set bit 22, mov to mvn instruction - b .Lnext -.Lloop: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 - @ in BE8, we load data in BE, but instructions still in LE - bic ip, ip, #0xff000000 - tst ip, #0x000f0000 @ check the rotation field - orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 - biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 +@ in BE8, we load data in BE, but instructions still in LE +#define PV_BIT22 0x00004000 +#define PV_IMM8_MASK 0xff000000 +#define PV_ROT_MASK 0x000f0000 #else - bic ip, ip, #0x000000ff - tst ip, #0xf00 @ check the rotation field - orrne ip, ip, r6 @ mask in offset bits 31-24 - biceq ip, ip, #0x400000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 +#define PV_BIT22 0x00400000 +#define PV_IMM8_MASK 0x000000ff +#define PV_ROT_MASK 0xf00 #endif + + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + b .Lnext +.Lloop: ldr ip, [r7, r3] + bic ip, ip, #PV_IMM8_MASK + tst ip, #PV_ROT_MASK @ check the rotation field + orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + biceq ip, ip, #PV_BIT22 @ clear bit 22 + orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22) str ip, [r7, r3] #endif -- GitLab From 0869f3b9da38889faef2ccafcf675c713d4a3aa8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Sep 2020 10:00:24 +0300 Subject: [PATCH 0030/1894] ARM: p2v: drop redundant 'type' argument from __pv_stub We always pass the same value for 'type' so pull it into the __pv_stub macro itself. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/memory.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 99035b5891ef4..eb3c8e6e960ab 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -183,14 +183,14 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) -#define __pv_stub(from,to,instr,type) \ +#define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ " .pushsection .pv_table,\"a\"\n" \ " .long 1b\n" \ " .popsection\n" \ : "=r" (to) \ - : "r" (from), "I" (type)) + : "r" (from), "I" (__PV_BITS_31_24)) #define __pv_stub_mov_hi(t) \ __asm__ volatile("@ __pv_stub_mov\n" \ @@ -217,7 +217,7 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) phys_addr_t t; if (sizeof(phys_addr_t) == 4) { - __pv_stub(x, t, "add", __PV_BITS_31_24); + __pv_stub(x, t, "add"); } else { __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); @@ -235,7 +235,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) * assembler expression receives 32 bit argument * in place where 'r' 32 bit operand is expected. */ - __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); + __pv_stub((unsigned long) x, t, "sub"); return t; } -- GitLab From 2730e8eaa4f2baccc03296e0c5ee109c0673fe5f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 18:23:35 +0200 Subject: [PATCH 0031/1894] ARM: p2v: use relative references in patch site arrays Free up a register in the p2v patching code by switching to relative references, which don't require keeping the phys-to-virt displacement live in a register. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/memory.h | 6 +++--- arch/arm/kernel/phys2virt.S | 18 +++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index eb3c8e6e960ab..4121662dea5a4 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -187,7 +187,7 @@ extern const void *__pv_table_begin, *__pv_table_end; __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - .\n" \ " .popsection\n" \ : "=r" (to) \ : "r" (from), "I" (__PV_BITS_31_24)) @@ -196,7 +196,7 @@ extern const void *__pv_table_begin, *__pv_table_end; __asm__ volatile("@ __pv_stub_mov\n" \ "1: mov %R0, %1\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - .\n" \ " .popsection\n" \ : "=r" (t) \ : "I" (__PV_BITS_7_0)) @@ -206,7 +206,7 @@ extern const void *__pv_table_begin, *__pv_table_end; "1: adds %Q0, %1, %2\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - .\n" \ " .popsection\n" \ : "+r" (y) \ : "r" (x), "I" (__PV_BITS_31_24) \ diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 5031e5a2e78bf..8e4be15e15599 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -58,9 +58,7 @@ ENDPROC(__fixup_pv_table) .text __fixup_a_pv_table: - adr r0, 3f - ldr r6, [r0] - add r6, r6, r3 + adr_l r6, __pv_offset ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word mov r6, r6, lsr #24 @@ -78,7 +76,8 @@ __fixup_a_pv_table: orr r6, r6, r7, lsl #12 orr r6, #0x4000 b .Lnext -.Lloop: add r7, r3 +.Lloop: add r7, r4 + adds r4, #4 ldrh ip, [r7, #2] ARM_BE8(rev16 ip, ip) tst ip, #0x4000 @@ -108,28 +107,25 @@ ARM_BE8(rev16 ip, ip) moveq r0, #0x400000 @ set bit 22, mov to mvn instruction b .Lnext -.Lloop: ldr ip, [r7, r3] +.Lloop: ldr ip, [r7, r4] bic ip, ip, #PV_IMM8_MASK tst ip, #PV_ROT_MASK @ check the rotation field orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 biceq ip, ip, #PV_BIT22 @ clear bit 22 orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22) - str ip, [r7, r3] + str ip, [r7, r4] + add r4, r4, #4 #endif .Lnext: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot + ldrcc r7, [r4] @ use branch for delay slot bcc .Lloop ret lr ENDPROC(__fixup_a_pv_table) - .align -3: .long __pv_offset - ENTRY(fixup_pv_table) stmfd sp!, {r4 - r7, lr} - mov r3, #0 @ no offset mov r4, r0 @ r0 = table start add r5, r0, r1 @ r1 = table size bl __fixup_a_pv_table -- GitLab From 0e3db6c9d7f6fd0ee263325027e8d3fdac5a4c9e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 19:19:32 +0200 Subject: [PATCH 0032/1894] ARM: p2v: simplify __fixup_pv_table() Declutter the code in __fixup_pv_table() by using the new adr_l/str_l macros to take PC relative references to external symbols, and by using the value of PHYS_OFFSET passed in r8 to calculate the p2v offset. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/phys2virt.S | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 8e4be15e15599..be8fb0d898776 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -29,33 +29,27 @@ */ __HEAD ENTRY(__fixup_pv_table) - adr r0, 1f - ldmia r0, {r3-r7} - mvn ip, #0 - subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET - add r4, r4, r3 @ adjust table start address - add r5, r5, r3 @ adjust table end address - add r6, r6, r3 @ adjust __pv_phys_pfn_offset address - add r7, r7, r3 @ adjust __pv_offset address mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN - str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset - strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits - mov r6, r3, lsr #24 @ constant for add/sub instructions - teq r3, r6, lsl #24 @ must be 16MiB aligned + str_l r0, __pv_phys_pfn_offset, r3 + + adr_l r0, __pv_offset + subs r3, r8, #PAGE_OFFSET @ PHYS_OFFSET - PAGE_OFFSET + mvn ip, #0 + strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits + str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits + + mov r0, r3, lsr #24 @ constant for add/sub instructions + teq r3, r0, lsl #24 @ must be 16MiB aligned bne 0f - str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits + + adr_l r4, __pv_table_begin + adr_l r5, __pv_table_end b __fixup_a_pv_table + 0: mov r0, r0 @ deadloop on error b 0b ENDPROC(__fixup_pv_table) - .align -1: .long . - .long __pv_table_begin - .long __pv_table_end -2: .long __pv_phys_pfn_offset - .long __pv_offset - .text __fixup_a_pv_table: adr_l r6, __pv_offset -- GitLab From e8e00f5afb087912fb3edb225ee373aa6499bb79 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 23:02:25 +0200 Subject: [PATCH 0033/1894] ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE In preparation for reducing the phys-to-virt minimum relative alignment from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW instructions that can more easily be manipulated to carry a 12-bit immediate. Note that the non-LPAE ARM sequence is not updated: MOVW may not be supported on non-LPAE platforms, and the sequence itself can be updated more easily to apply the 12 bits of displacement. For Thumb2, which has many more versions of opcodes, switch to a sequence that can be patched by the same patching code for both versions. Note that the Thumb2 opcodes for MOVW and MVN are unambiguous, and have no rotation bits in their immediate fields, so there is no need to use placeholder constants in the asm blocks. While at it, drop the 'volatile' qualifiers from the asm blocks: the code does not have any side effects that are invisible to the compiler, so it is free to omit these sequences if the outputs are not used. Suggested-by: Russell King Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/memory.h | 44 +++++++--- arch/arm/kernel/phys2virt.S | 147 +++++++++++++++++++++++++++------- 2 files changed, 148 insertions(+), 43 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 4121662dea5a4..ccf55cef6ab95 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) +#ifndef CONFIG_THUMB2_KERNEL #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ @@ -192,25 +193,45 @@ extern const void *__pv_table_begin, *__pv_table_end; : "=r" (to) \ : "r" (from), "I" (__PV_BITS_31_24)) -#define __pv_stub_mov_hi(t) \ - __asm__ volatile("@ __pv_stub_mov\n" \ - "1: mov %R0, %1\n" \ +#define __pv_add_carry_stub(x, y) \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " adds %Q0, %1, %R0, lsl #24\n" \ + "1: mov %R0, %2\n" \ + " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "=r" (t) \ - : "I" (__PV_BITS_7_0)) + : "=&r" (y) \ + : "r" (x), "I" (__PV_BITS_7_0) \ + : "cc") + +#else +#define __pv_stub(from,to,instr) \ + __asm__("@ __pv_stub\n" \ + "0: movw %0, #0\n" \ + " lsl %0, #24\n" \ + " " instr " %0, %1, %0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 0b - .\n" \ + " .popsection\n" \ + : "=&r" (to) \ + : "r" (from)) #define __pv_add_carry_stub(x, y) \ - __asm__ volatile("@ __pv_add_carry_stub\n" \ - "1: adds %Q0, %1, %2\n" \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " lsls %R0, #24\n" \ + " adds %Q0, %1, %R0\n" \ + "1: mvn %R0, #0\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "+r" (y) \ - : "r" (x), "I" (__PV_BITS_31_24) \ + : "=&r" (y) \ + : "r" (x) \ : "cc") +#endif static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { @@ -219,7 +240,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) if (sizeof(phys_addr_t) == 4) { __pv_stub(x, t, "add"); } else { - __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); } return t; diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index be8fb0d898776..a4e364689663d 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 1994-2002 Russell King - * Copyright (c) 2003 ARM Limited + * Copyright (c) 2003, 2020 ARM Limited * All Rights Reserved */ @@ -58,55 +58,140 @@ __fixup_a_pv_table: mov r6, r6, lsr #24 cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL + @ + @ The Thumb-2 versions of the patchable sequences are + @ + @ phys-to-virt: movw , #offset<31:24> + @ lsl , #24 + @ sub , , + @ + @ virt-to-phys (non-LPAE): movw , #offset<31:24> + @ lsl , #24 + @ add , , + @ + @ virt-to-phys (LPAE): movw , #offset<31:24> + @ lsl , #24 + @ adds , , + @ mov , #offset<39:32> + @ adc , , #0 + @ + @ In the non-LPAE case, all patchable instructions are MOVW + @ instructions, where we need to patch in the offset into the + @ second halfword of the opcode (the 16-bit immediate is encoded + @ as imm4:i:imm3:imm8) + @ + @ 15 11 10 9 4 3 0 15 14 12 11 8 7 0 + @ +-----------+---+-------------+------++---+------+----+------+ + @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+-------------+------++---+------+----+------+ + @ + @ In the LPAE case, we also need to patch in the high word of the + @ offset into the immediate field of the MOV instruction, or patch it + @ to a MVN instruction if the offset is negative. In this case, we + @ need to inspect the first halfword of the opcode, to check whether + @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if + @ needed. The encoding of the immediate is rather complex for values + @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower + @ order bits, which can be patched into imm8 directly (and i:imm3 + @ cleared) + @ + @ 15 11 10 9 5 0 15 14 12 11 8 7 0 + @ +-----------+---+---------------------++---+------+----+------+ + @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+---------------------++---+------+----+------+ + @ moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq .Lnext - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 b .Lnext .Lloop: add r7, r4 - adds r4, #4 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne .Lnext + adds r4, #4 @ clears Z flag +#ifdef CONFIG_ARM_LPAE ldrh ip, [r7] ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 + tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear + bne 0f @ skip to MOVW handling (Z flag is clear) + bic ip, #0x20 @ clear bit 5 (MVN -> MOV) + orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0 ARM_BE8(rev16 ip, ip) strh ip, [r7] + @ Z flag is set +0: +#endif + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + and ip, #0xf00 @ clear everything except Rd field + orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits + orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] #else #ifdef CONFIG_CPU_ENDIAN_BE8 @ in BE8, we load data in BE, but instructions still in LE -#define PV_BIT22 0x00004000 +#define PV_BIT24 0x00000001 #define PV_IMM8_MASK 0xff000000 -#define PV_ROT_MASK 0x000f0000 #else -#define PV_BIT22 0x00400000 +#define PV_BIT24 0x01000000 #define PV_IMM8_MASK 0x000000ff -#define PV_ROT_MASK 0xf00 #endif + @ + @ The ARM versions of the patchable sequences are + @ + @ phys-to-virt: sub , , #offset<31:24>, lsl #24 + @ + @ virt-to-phys (non-LPAE): add , , #offset<31:24>, lsl #24 + @ + @ virt-to-phys (LPAE): movw , #offset<31:24> + @ adds , , , lsl #24 + @ mov , #offset<39:32> + @ adc , , #0 + @ + @ In the non-LPAE case, all patchable instructions are ADD or SUB + @ instructions, where we need to patch in the offset into the + @ immediate field of the opcode, which is emitted with the correct + @ rotation value. (The effective value of the immediate is imm12<7:0> + @ rotated right by [2 * imm12<11:8>] bits) + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 | + @ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 | + @ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 | + @ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ + @ In the LPAE case, we use a MOVW instruction to carry the low offset + @ word, and patch in the high word of the offset into the immediate + @ field of the subsequent MOV instruction, or patch it to a MVN + @ instruction if the offset is negative. We can distinguish MOVW + @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be + @ distinguished from MOV/MVN (all using the encodings above) using + @ bit 24. + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ moveq r0, #0x400000 @ set bit 22, mov to mvn instruction b .Lnext .Lloop: ldr ip, [r7, r4] +#ifdef CONFIG_ARM_LPAE + tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear + beq 1f +ARM_BE8(rev ip, ip) + tst ip, #0xc00000 @ MOVW has bits 23:22 clear + bic ip, ip, #0x400000 @ clear bit 22 + bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction + orreq ip, ip, r6 @ MOVW -> mask in offset bits 31-24 + orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) +ARM_BE8(rev ip, ip) + b 2f +1: +#endif bic ip, ip, #PV_IMM8_MASK - tst ip, #PV_ROT_MASK @ check the rotation field - orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 - biceq ip, ip, #PV_BIT22 @ clear bit 22 - orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22) + orr ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 +2: str ip, [r7, r4] add r4, r4, #4 #endif -- GitLab From 9443076e4330a14ae2c6114307668b98a8293b77 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Sep 2020 11:55:42 +0300 Subject: [PATCH 0034/1894] ARM: p2v: reduce p2v alignment requirement to 2 MiB The ARM kernel's linear map starts at PAGE_OFFSET, which maps to a physical address (PHYS_OFFSET) that is platform specific, and is discovered at boot. Since we don't want to slow down translations between physical and virtual addresses by keeping the offset in a variable in memory, we implement this by patching the code performing the translation, and putting the offset between PAGE_OFFSET and the start of physical RAM directly into the instruction opcodes. As we only patch up to 8 bits of offset, yielding 4 GiB >> 8 == 16 MiB of granularity, we have to round up PHYS_OFFSET to the next multiple if the start of physical RAM is not a multiple of 16 MiB. This wastes some physical RAM, since the memory that was skipped will now live below PAGE_OFFSET, making it inaccessible to the kernel. We can improve this by changing the patchable sequences and the patching logic to carry more bits of offset: 11 bits gives us 4 GiB >> 11 == 2 MiB of granularity, and so we will never waste more than that amount by rounding up the physical start of DRAM to the next multiple of 2 MiB. (Note that 2 MiB granularity guarantees that the linear mapping can be created efficiently, whereas less than 2 MiB may result in the linear mapping needing another level of page tables) This helps Zhen Lei's scenario, where the start of DRAM is known to be occupied. It also helps EFI boot, which relies on the firmware's page allocator to allocate space for the decompressed kernel as low as possible. And if the KASLR patches ever land for 32-bit, it will give us 3 more bits of randomization of the placement of the kernel inside the linear region. For the ARM code path, it simply comes down to using two add/sub instructions instead of one for the carryless version, and patching each of them with the correct immediate depending on the rotation field. For the LPAE calculation, which has to deal with a carry, it patches the MOVW instruction with up to 12 bits of offset (but we only need 11 bits anyway) For the Thumb2 code path, patching more than 11 bits of displacement would be somewhat cumbersome, but the 11 bits we need fit nicely into the second word of the u16[2] opcode, so we simply update the immediate assignment and the left shift to create an addend of the right magnitude. Suggested-by: Zhen Lei Acked-by: Nicolas Pitre Acked-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/Kconfig | 2 +- arch/arm/include/asm/memory.h | 13 +++++++----- arch/arm/kernel/phys2virt.S | 40 +++++++++++++++++++++++------------ 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b505..d8d62b8e72e4c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -243,7 +243,7 @@ config ARM_PATCH_PHYS_VIRT kernel in system memory. This can only be used with non-XIP MMU kernels where the base - of physical memory is at a 16MB boundary. + of physical memory is at a 2 MiB boundary. Only disable this option if you know that you do not require this feature (eg, building a kernel for a single machine) and diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index ccf55cef6ab95..2611be35f26b3 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -173,6 +173,7 @@ extern unsigned long vectors_base; * so that all we need to do is modify the 8-bit constant field. */ #define __PV_BITS_31_24 0x81000000 +#define __PV_BITS_23_16 0x810000 #define __PV_BITS_7_0 0x81 extern unsigned long __pv_phys_pfn_offset; @@ -187,16 +188,18 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ + "2: " instr " %0, %0, %3\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 1b - ., 2b - .\n" \ " .popsection\n" \ : "=r" (to) \ - : "r" (from), "I" (__PV_BITS_31_24)) + : "r" (from), "I" (__PV_BITS_31_24), \ + "I"(__PV_BITS_23_16)) #define __pv_add_carry_stub(x, y) \ __asm__("@ __pv_add_carry_stub\n" \ "0: movw %R0, #0\n" \ - " adds %Q0, %1, %R0, lsl #24\n" \ + " adds %Q0, %1, %R0, lsl #20\n" \ "1: mov %R0, %2\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ @@ -210,7 +213,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "0: movw %0, #0\n" \ - " lsl %0, #24\n" \ + " lsl %0, #21\n" \ " " instr " %0, %1, %0\n" \ " .pushsection .pv_table,\"a\"\n" \ " .long 0b - .\n" \ @@ -221,7 +224,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_add_carry_stub(x, y) \ __asm__("@ __pv_add_carry_stub\n" \ "0: movw %R0, #0\n" \ - " lsls %R0, #24\n" \ + " lsls %R0, #21\n" \ " adds %Q0, %1, %R0\n" \ "1: mvn %R0, #0\n" \ " adc %R0, %R0, #0\n" \ diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index a4e364689663d..fb53db78fe785 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -21,7 +21,7 @@ /* * __fixup_pv_table - patch the stub instructions with the delta between * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be - * 16MiB aligned. + * 2 MiB aligned. * * Called from head.S, which expects the following registers to be preserved: * r1 = machine no, r2 = atags or dtb, @@ -38,8 +38,8 @@ ENTRY(__fixup_pv_table) strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits - mov r0, r3, lsr #24 @ constant for add/sub instructions - teq r3, r0, lsl #24 @ must be 16MiB aligned + mov r0, r3, lsr #21 @ constant for add/sub instructions + teq r3, r0, lsl #21 @ must be 2 MiB aligned bne 0f adr_l r4, __pv_table_begin @@ -55,22 +55,21 @@ __fixup_a_pv_table: adr_l r6, __pv_offset ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word - mov r6, r6, lsr #24 cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL @ @ The Thumb-2 versions of the patchable sequences are @ - @ phys-to-virt: movw , #offset<31:24> - @ lsl , #24 + @ phys-to-virt: movw , #offset<31:21> + @ lsl , #21 @ sub , , @ - @ virt-to-phys (non-LPAE): movw , #offset<31:24> - @ lsl , #24 + @ virt-to-phys (non-LPAE): movw , #offset<31:21> + @ lsl , #21 @ add , , @ - @ virt-to-phys (LPAE): movw , #offset<31:24> - @ lsl , #24 + @ virt-to-phys (LPAE): movw , #offset<31:21> + @ lsl , #21 @ adds , , @ mov , #offset<39:32> @ adc , , #0 @@ -102,6 +101,9 @@ __fixup_a_pv_table: @ +-----------+---+---------------------++---+------+----+------+ @ moveq r0, #0x200000 @ set bit 21, mov to mvn instruction + lsrs r3, r6, #29 @ isolate top 3 bits of displacement + ubfx r6, r6, #21, #8 @ put bits 28:21 into the MOVW imm8 field + bfi r6, r3, #12, #3 @ put bits 31:29 into the MOVW imm3 field b .Lnext .Lloop: add r7, r4 adds r4, #4 @ clears Z flag @@ -129,20 +131,24 @@ ARM_BE8(rev16 ip, ip) @ in BE8, we load data in BE, but instructions still in LE #define PV_BIT24 0x00000001 #define PV_IMM8_MASK 0xff000000 +#define PV_IMMR_MSB 0x00080000 #else #define PV_BIT24 0x01000000 #define PV_IMM8_MASK 0x000000ff +#define PV_IMMR_MSB 0x00000800 #endif @ @ The ARM versions of the patchable sequences are @ @ phys-to-virt: sub , , #offset<31:24>, lsl #24 + @ sub , , #offset<23:16>, lsl #16 @ @ virt-to-phys (non-LPAE): add , , #offset<31:24>, lsl #24 + @ add , , #offset<23:16>, lsl #16 @ - @ virt-to-phys (LPAE): movw , #offset<31:24> - @ adds , , , lsl #24 + @ virt-to-phys (LPAE): movw , #offset<31:20> + @ adds , , , lsl #20 @ mov , #offset<39:32> @ adc , , #0 @ @@ -174,6 +180,9 @@ ARM_BE8(rev16 ip, ip) @ +------+-----------------+------+------+-------+ @ moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + mov r3, r6, lsr #16 @ put offset bits 31-16 into r3 + mov r6, r6, lsr #24 @ put offset bits 31-24 into r6 + and r3, r3, #0xf0 @ only keep offset bits 23-20 in r3 b .Lnext .Lloop: ldr ip, [r7, r4] #ifdef CONFIG_ARM_LPAE @@ -183,14 +192,17 @@ ARM_BE8(rev ip, ip) tst ip, #0xc00000 @ MOVW has bits 23:22 clear bic ip, ip, #0x400000 @ clear bit 22 bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction - orreq ip, ip, r6 @ MOVW -> mask in offset bits 31-24 + orreq ip, ip, r6, lsl #4 @ MOVW -> mask in offset bits 31-24 + orreq ip, ip, r3, lsr #4 @ MOVW -> mask in offset bits 23-20 orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) ARM_BE8(rev ip, ip) b 2f 1: #endif + tst ip, #PV_IMMR_MSB @ rotation value >= 16 ? bic ip, ip, #PV_IMM8_MASK - orr ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + orreq ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + orrne ip, ip, r3 ARM_BE8(, lsl #24) @ mask in offset bits 23-20 2: str ip, [r7, r4] add r4, r4, #4 -- GitLab From 67e3f828bd4bf5e4eb4214dc4eb227d8f1c8a877 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 12:28:01 +0300 Subject: [PATCH 0035/1894] ARM: efistub: replace adrl pseudo-op with adr_l macro invocation The ARM 'adrl' pseudo instruction is a bit problematic, as it does not exist in Thumb mode, and it is not implemented by Clang either. Since the Thumb variant has a slightly bigger range, it is sometimes necessary to emit the 'adrl' variant in ARM mode where Thumb mode can use adr just fine. However, that still leaves the Clang issue, which does not appear to be supporting this any time soon. So let's switch to the adr_l macro, which works for both ARM and Thumb, and has unlimited range. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/boot/compressed/head.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b54469..5b591dacbaaf6 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1440,8 +1440,7 @@ ENTRY(efi_enter_kernel) mov r4, r0 @ preserve image base mov r8, r1 @ preserve DT pointer - ARM( adrl r0, call_cache_fn ) - THUMB( adr r0, call_cache_fn ) + adr_l r0, call_cache_fn adr r1, 0f @ clean the region of code we bl cache_clean_flush @ may run with the MMU off -- GitLab From 62c4a2e202b18e1d7176875b7e7af240f340596b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:06 +0300 Subject: [PATCH 0036/1894] ARM: head-common.S: use PC-relative insn sequence for __proc_info Replace the open coded PC relative offset calculations with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head-common.S | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 4a3982812a401..9a5ab6c19568e 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -170,11 +170,12 @@ ENDPROC(lookup_processor_type) * r9 = cpuid (preserved) */ __lookup_processor_type: - adr r3, __lookup_processor_type_data - ldmia r3, {r4 - r6} - sub r3, r3, r4 @ get offset between virt&phys - add r5, r5, r3 @ convert virt addresses to - add r6, r6, r3 @ physical address space + /* + * Look in for information about the __proc_info + * structure. + */ + adr_l r5, __proc_info_begin + adr_l r6, __proc_info_end 1: ldmia r5, {r3, r4} @ value, mask and r4, r4, r9 @ mask wanted bits teq r3, r4 @@ -186,17 +187,6 @@ __lookup_processor_type: 2: ret lr ENDPROC(__lookup_processor_type) -/* - * Look in for information about the __proc_info structure. - */ - .align 2 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .long . - .long __proc_info_begin - .long __proc_info_end - .size __lookup_processor_type_data, . - __lookup_processor_type_data - __error_lpae: #ifdef CONFIG_DEBUG_LL adr r0, str_lpae -- GitLab From 172c34c9ff0144c3e1d96a9b54d6fecfe5d17c3c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:16 +0300 Subject: [PATCH 0037/1894] ARM: head-common.S: use PC-relative insn sequence for idmap creation Replace the open coded PC relative offset calculations involving __turn_mmu_on and __turn_mmu_on_end with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 7e3f368090116..f5a636fee9d0d 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -224,11 +224,8 @@ __create_page_tables: * Create identity mapping to cater for __enable_mmu. * This identity mapping will be removed by paging_init(). */ - adr r0, __turn_mmu_on_loc - ldmia r0, {r3, r5, r6} - sub r0, r0, r3 @ virt->phys offset - add r5, r5, r0 @ phys __turn_mmu_on - add r6, r6, r0 @ phys __turn_mmu_on_end + adr_l r5, __turn_mmu_on @ _pa(__turn_mmu_on) + adr_l r6, __turn_mmu_on_end @ _pa(__turn_mmu_on_end) mov r5, r5, lsr #SECTION_SHIFT mov r6, r6, lsr #SECTION_SHIFT @@ -351,11 +348,6 @@ __create_page_tables: ret lr ENDPROC(__create_page_tables) .ltorg - .align -__turn_mmu_on_loc: - .long . - .long __turn_mmu_on - .long __turn_mmu_on_end #if defined(CONFIG_SMP) .text -- GitLab From 91580f0dbf24c6d616091526a900213bc7aa48fe Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:23 +0300 Subject: [PATCH 0038/1894] ARM: head.S: use PC-relative insn sequence for secondary_data Replace the open coded PC relative offset calculations with adr_l and ldr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Note that it also removes a stale comment about the contents of r6. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f5a636fee9d0d..478506b2d51f7 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -383,10 +383,8 @@ ENTRY(secondary_startup) /* * Use the page tables supplied from __cpu_up. */ - adr r4, __secondary_data - ldmia r4, {r5, r7, r12} @ address to jump to after - sub lr, r4, r5 @ mmu has been enabled - add r3, r7, lr + adr_l r3, secondary_data + mov_l r12, __secondary_switched ldrd r4, r5, [r3, #0] @ get secondary_data.pgdir ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps @@ -401,22 +399,13 @@ ARM_BE8(eor r4, r4, r5) @ without using a temp reg. ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) - /* - * r6 = &secondary_data - */ ENTRY(__secondary_switched) - ldr sp, [r7, #12] @ get secondary_data.stack + ldr_l r7, secondary_data + 12 @ get secondary_data.stack + mov sp, r7 mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) - .align - - .type __secondary_data, %object -__secondary_data: - .long . - .long secondary_data - .long __secondary_switched #endif /* defined(CONFIG_SMP) */ -- GitLab From 450abd38fe6c6313ce9bdd9dce81c1dd604f6fb0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:48:20 +0300 Subject: [PATCH 0039/1894] ARM: kernel: use relative references for UP/SMP alternatives Currently, the .alt.smp.init section contains the virtual addresses of the patch sites. Since patching may occur both before and after switching into virtual mode, this requires some manual handling of the address when applying the UP alternative. Let's simplify this by using relative offsets in the table entries: this allows us to simply add each entry's address to its contents, regardless of whether we are running in virtual mode or not. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/assembler.h | 4 ++-- arch/arm/include/asm/processor.h | 2 +- arch/arm/kernel/head.S | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 72627c5fb3b2c..6ed30421f6979 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -259,7 +259,7 @@ */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ nop ;\ @@ -270,7 +270,7 @@ .popsection #define ALT_UP_B(label) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ W(b) . + (label - 9998b) ;\ .popsection #else diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index b9241051e5cb2..9e6b972863077 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -96,7 +96,7 @@ unsigned long get_wchan(struct task_struct *p); #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ " .pushsection \".alt.smp.init\", \"a\"\n" \ - " .long 9998b\n" \ + " .long 9998b - .\n" \ " " up "\n" \ " .popsection\n" #else diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 478506b2d51f7..cdc79fcee43ea 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -546,14 +546,15 @@ smp_on_up: __do_fixup_smp_on_up: cmp r4, r5 reths lr - ldmia r4!, {r0, r6} - ARM( str r6, [r0, r3] ) - THUMB( add r0, r0, r3 ) + ldmia r4, {r0, r6} + ARM( str r6, [r0, r4] ) + THUMB( add r0, r0, r4 ) + add r4, r4, #8 #ifdef __ARMEB__ THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. #endif THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords - THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r0. THUMB( strh r6, [r0] ) b __do_fixup_smp_on_up ENDPROC(__do_fixup_smp_on_up) @@ -562,7 +563,6 @@ ENTRY(fixup_smp) stmfd sp!, {r4 - r6, lr} mov r4, r0 add r5, r0, r1 - mov r3, #0 bl __do_fixup_smp_on_up ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -- GitLab From 59d2f2827dfdccf8911d5e51465136b52ba623c4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:29 +0300 Subject: [PATCH 0040/1894] ARM: head: use PC-relative insn sequence for __smp_alt Now that calling __do_fixup_smp_on_up() can be done without passing the physical-to-virtual offset in r3, we can replace the open coded PC relative offset calculations with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index cdc79fcee43ea..5e031a0bf9a96 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -520,19 +520,11 @@ ARM_BE8(rev r0, r0) @ byteswap if big endian retne lr __fixup_smp_on_up: - adr r0, 1f - ldmia r0, {r3 - r5} - sub r3, r0, r3 - add r4, r4, r3 - add r5, r5, r3 + adr_l r4, __smpalt_begin + adr_l r5, __smpalt_end b __do_fixup_smp_on_up ENDPROC(__fixup_smp) - .align -1: .word . - .word __smpalt_begin - .word __smpalt_end - .pushsection .data .align 2 .globl smp_on_up -- GitLab From d74d2b225018baa0e04e080ee9e80b21667ba3a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:34 +0300 Subject: [PATCH 0041/1894] ARM: sleep.S: use PC-relative insn sequence for sleep_save_sp/mpidr_hash Replace the open coded PC relative offset calculations with adr_l and ldr_l invocations. This removes some open coded PC relative arithmetic, avoids literal pools on v7+, and slightly reduces the footprint of the code. Note that ALT_SMP() expects a single instruction so move the macro invocation after it. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/sleep.S | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 5dc8b80bb6938..43077e11dafda 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -72,8 +72,9 @@ ENTRY(__cpu_suspend) ldr r3, =sleep_save_sp stmfd sp!, {r0, r1} @ save suspend func arg and pointer ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(ldr r0, =mpidr_hash) + ALT_SMP(W(nop)) @ don't use adr_l inside ALT_SMP() ALT_UP_B(1f) + adr_l r0, mpidr_hash /* This ldmia relies on the memory layout of the mpidr_hash struct */ ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts compute_mpidr_hash r0, r6, r7, r8, r2, r1 @@ -147,9 +148,8 @@ no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) - adr r2, mpidr_hash_ptr - ldr r3, [r2] - add r2, r2, r3 @ r2 = struct mpidr_hash phys address + adr_l r2, mpidr_hash @ r2 = struct mpidr_hash phys address + /* * This ldmia relies on the memory layout of the mpidr_hash * struct mpidr_hash. @@ -157,10 +157,7 @@ no_hyp: ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: - adr r0, _sleep_save_sp - ldr r2, [r0] - add r0, r0, r2 - ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr_l r0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr r0, [r0, r1, lsl #2] @ load phys pgd, stack, resume fn @@ -177,12 +174,6 @@ ENDPROC(cpu_resume_arm) ENDPROC(cpu_resume_no_hyp) #endif - .align 2 -_sleep_save_sp: - .long sleep_save_sp - . -mpidr_hash_ptr: - .long mpidr_hash - . @ mpidr_hash struct offset - .data .align 2 .type sleep_save_sp, #object -- GitLab From 3bcf906b194cebb6817cbb2f07b69e12aa5d7f51 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:46 +0300 Subject: [PATCH 0042/1894] ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET Replace the open coded arithmetic with a simple adr_l/sub pair. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 5e031a0bf9a96..ae0b08b47f52c 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -103,10 +103,8 @@ ENTRY(stext) #endif #ifndef CONFIG_XIP_KERNEL - adr r3, 2f - ldmia r3, {r4, r8} - sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) - add r8, r8, r4 @ PHYS_OFFSET + adr_l r8, _text @ __pa(_text) + sub r8, r8, #TEXT_OFFSET @ PHYS_OFFSET #else ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case #endif @@ -158,10 +156,6 @@ ENTRY(stext) 1: b __enable_mmu ENDPROC(stext) .ltorg -#ifndef CONFIG_XIP_KERNEL -2: .long . - .long PAGE_OFFSET -#endif /* * Setup the initial page tables. We only setup the barest -- GitLab From aaac3733171fca948c4fb66b78257620e3885339 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:52 +0300 Subject: [PATCH 0043/1894] ARM: kvm: replace open coded VA->PA calculations with adr_l call Replace the open coded calculations of the actual physical address of the KVM stub vector table with a single adr_l invocation. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/boot/compressed/head.S | 15 ++------------- arch/arm/kernel/hyp-stub.S | 27 ++++++++++++--------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 5b591dacbaaf6..9905fb7560df2 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -468,15 +468,10 @@ dtb_check_done: /* * Compute the address of the hyp vectors after relocation. - * This requires some arithmetic since we cannot directly - * reference __hyp_stub_vectors in a PC-relative way. * Call __hyp_set_vectors with the new address so that we * can HVC again after the copy. */ -0: adr r0, 0b - movw r1, #:lower16:__hyp_stub_vectors - 0b - movt r1, #:upper16:__hyp_stub_vectors - 0b - add r0, r0, r1 + adr_l r0, __hyp_stub_vectors sub r0, r0, r5 add r0, r0, r10 bl __hyp_set_vectors @@ -627,17 +622,11 @@ not_relocated: mov r0, #0 cmp r0, #HYP_MODE @ if not booted in HYP mode... bne __enter_kernel @ boot kernel directly - adr r12, .L__hyp_reentry_vectors_offset - ldr r0, [r12] - add r0, r0, r12 - + adr_l r0, __hyp_reentry_vectors bl __hyp_set_vectors __HVC(0) @ otherwise bounce to hyp mode b . @ should never be reached - - .align 2 -.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . #else b __enter_kernel #endif diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 26d8e03b1dd33..103d0bdb2b7e4 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -24,41 +24,38 @@ ENTRY(__boot_cpu_mode) .text /* - * Save the primary CPU boot mode. Requires 3 scratch registers. + * Save the primary CPU boot mode. Requires 2 scratch registers. */ - .macro store_primary_cpu_mode reg1, reg2, reg3 + .macro store_primary_cpu_mode reg1, reg2 mrs \reg1, cpsr and \reg1, \reg1, #MODE_MASK - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - str \reg1, [\reg2, \reg3] + str_l \reg1, __boot_cpu_mode, \reg2 .endm /* * Compare the current mode with the one saved on the primary CPU. * If they don't match, record that fact. The Z bit indicates * if there's a match or not. - * Requires 3 additionnal scratch registers. + * Requires 2 additional scratch registers. */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - ldr \reg1, [\reg2, \reg3] + .macro compare_cpu_mode_with_primary mode, reg1, reg2 + adr_l \reg2, __boot_cpu_mode + ldr \reg1, [\reg2] cmp \mode, \reg1 @ matches primary CPU boot mode? orrne \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH - strne \reg1, [\reg2, \reg3] @ record what happened and give up + strne \reg1, [\reg2] @ record what happened and give up .endm #else /* ZIMAGE */ - .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req + .macro store_primary_cpu_mode reg1:req, reg2:req .endm /* * The zImage loader only runs on one CPU, so we don't bother with mult-CPU * consistency checking: */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + .macro compare_cpu_mode_with_primary mode, reg1, reg2 cmp \mode, \mode .endm @@ -73,7 +70,7 @@ ENTRY(__boot_cpu_mode) */ @ Call this from the primary CPU ENTRY(__hyp_stub_install) - store_primary_cpu_mode r4, r5, r6 + store_primary_cpu_mode r4, r5 ENDPROC(__hyp_stub_install) @ fall through... @@ -87,7 +84,7 @@ ENTRY(__hyp_stub_install_secondary) * If the secondary has booted with a different mode, give up * immediately. */ - compare_cpu_mode_with_primary r4, r5, r6, r7 + compare_cpu_mode_with_primary r4, r5, r6 retne lr /* -- GitLab From 28d211919e422f58c1e6c900e5810eee4f1ce4c8 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 20 Oct 2020 14:12:26 +0800 Subject: [PATCH 0044/1894] rtc: sun6i: Fix memleak in sun6i_rtc_clk_init When clk_hw_register_fixed_rate_with_accuracy() fails, clk_data should be freed. It's the same for the subsequent two error paths, but we should also unregister the already registered clocks in them. Signed-off-by: Dinghao Liu Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201020061226.6572-1-dinghao.liu@zju.edu.cn --- drivers/rtc/rtc-sun6i.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index e2b8b150bcb44..f2818cdd11d82 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -272,7 +272,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, 300000000); if (IS_ERR(rtc->int_osc)) { pr_crit("Couldn't register the internal oscillator\n"); - return; + goto err; } parents[0] = clk_hw_get_name(rtc->int_osc); @@ -290,7 +290,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, rtc->losc = clk_register(NULL, &rtc->hw); if (IS_ERR(rtc->losc)) { pr_crit("Couldn't register the LOSC clock\n"); - return; + goto err_register; } of_property_read_string_index(node, "clock-output-names", 1, @@ -301,7 +301,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, &rtc->lock); if (IS_ERR(rtc->ext_losc)) { pr_crit("Couldn't register the LOSC external gate\n"); - return; + goto err_register; } clk_data->num = 2; @@ -314,6 +314,8 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); return; +err_register: + clk_hw_unregister_fixed_rate(rtc->int_osc); err: kfree(clk_data); } -- GitLab From 6edcf9dc2e1aff3aa1f5a69ee420fb30dd0e968a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 28 Oct 2020 16:34:02 +0100 Subject: [PATCH 0045/1894] efi/libstub: EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER should not default to yes EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER is deprecated, so it should not be enabled by default. In light of commit 4da0b2b7e67524cc ("efi/libstub: Re-enable command line initrd loading for x86"), keep the default for X86. Fixes: cf6b83664895a5c7 ("efi/libstub: Make initrd file loader configurable") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201028153402.1736103-1-geert+renesas@glider.be Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 36ec1f7188934..b452cfa2100b4 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -122,7 +122,7 @@ config EFI_ARMSTUB_DTB_LOADER config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER bool "Enable the command line initrd loader" if !X86 depends on EFI_STUB && (EFI_GENERIC_STUB || X86) - default y + default y if X86 depends on !RISCV help Select this config option to add support for the initrd= command -- GitLab From 8404c66140e209794b15ee5529d4559334b3d364 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 27 Oct 2020 11:57:56 -0700 Subject: [PATCH 0046/1894] clk: imx: remove unneeded semicolon A semicolon is not needed after a switch statement. Signed-off-by: Tom Rix Reviewed-by: Abel Vesa Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-pll14xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-pll14xx.c b/drivers/clk/imx/clk-pll14xx.c index aba36e4217d2d..2b5ed86b9dbbb 100644 --- a/drivers/clk/imx/clk-pll14xx.c +++ b/drivers/clk/imx/clk-pll14xx.c @@ -416,7 +416,7 @@ struct clk_hw *imx_dev_clk_hw_pll14xx(struct device *dev, const char *name, __func__, name); kfree(pll); return ERR_PTR(-EINVAL); - }; + } pll->base = base; pll->hw.init = &init; -- GitLab From f2644bd7413c8f2fcef208c576e83335709c5120 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 29 Oct 2020 22:40:07 +0000 Subject: [PATCH 0047/1894] clk: imx: remove redundant assignment to pointer np Pointer np is being initialized with a value that is never read and it is being updated with a value later on. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Reviewed-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 12ce4770f702a..3cb2bc46eae0a 100644 --- a/drivers/clk/imx/clk-imx8mp.c +++ b/drivers/clk/imx/clk-imx8mp.c @@ -425,7 +425,7 @@ static struct clk **uart_clks[ARRAY_SIZE(uart_clk_ids) + 1]; static int imx8mp_clocks_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; + struct device_node *np; void __iomem *anatop_base, *ccm_base; int i; -- GitLab From 220175cd3979fdb860decf757cc7a5980fdd045f Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 30 Oct 2020 23:37:33 +0800 Subject: [PATCH 0048/1894] clk: imx: scu: fix build break when compiled as modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit e0d0d4d86c76 ("clk: imx8qxp: Support building i.MX8QXP clock driver as module"), clk-scu.c and clk-imx8qxp.c are complied in one module, thus there can be only one module_init() in those two files. Commit 77d8f3068c63 ("clk: imx: scu: add two cells binding support") introduced another module_init() in clk_scu.c which caused the errors below. To fix the issue, we can remove the unnecessary builtin_platform_driver from clk_scu.c and directly register the driver in imx_clk_scu_init(). CC [M] drivers/clk/imx/clk-scu.o In file included from ../include/linux/of_device.h:6, from ../include/linux/of_platform.h:12, from ../drivers/clk/imx/clk-scu.c:11: ../drivers/clk/imx/clk-scu.c: In function ‘imx_clk_scu_init’: ../drivers/clk/imx/clk-scu.c:176:35: error: ‘imx_clk_scu_driver’ undeclared (first use in this function); did you mean ‘imx_clk_scu_init’? 176 | return platform_driver_register(&imx_clk_scu_driver); | ^~~~~~~~~~~~~~~~~~ ../include/linux/platform_device.h:218:29: note: in definition of macro ‘platform_driver_register’ 218 | __platform_driver_register(drv, THIS_MODULE) | ^~~ ../drivers/clk/imx/clk-scu.c:176:35: note: each undeclared identifier is reported only once for each function it appears in 176 | return platform_driver_register(&imx_clk_scu_driver); | ^~~~~~~~~~~~~~~~~~ ../include/linux/platform_device.h:218:29: note: in definition of macro ‘platform_driver_register’ 218 | __platform_driver_register(drv, THIS_MODULE) | ^~~ ../drivers/clk/imx/clk-scu.c:177:1: error: control reaches end of non-void function [-Werror=return-type] 177 | } | ^ At top level: ../drivers/clk/imx/clk-scu.c:470:31: warning: ‘imx_clk_scu_driver’ defined but not used [-Wunused-variable] 470 | static struct platform_driver imx_clk_scu_driver = { Reported-by: kernel test robot Fixes: 77d8f3068c63 ("clk: imx: scu: add two cells binding support") Signed-off-by: Dong Aisheng Acked-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-scu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index 229a290ca5b68..d0243c52ccbcf 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -21,6 +21,7 @@ static struct imx_sc_ipc *ccm_ipc_handle; struct device_node *pd_np; +static struct platform_driver imx_clk_scu_driver; struct imx_scu_clk_node { const char *name; @@ -178,7 +179,7 @@ int imx_clk_scu_init(struct device_node *np) } } - return 0; + return platform_driver_register(&imx_clk_scu_driver); } /* @@ -542,7 +543,6 @@ static struct platform_driver imx_clk_scu_driver = { }, .probe = imx_clk_scu_probe, }; -builtin_platform_driver(imx_clk_scu_driver); static int imx_clk_scu_attach_pd(struct device *dev, u32 rsrc_id) { -- GitLab From 12309428c27737c21735fb28540c6c6f69f632f6 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 28 Oct 2020 14:58:58 +0200 Subject: [PATCH 0049/1894] clk: imx: gate2: Remove the IMX_CLK_GATE2_SINGLE_BIT special case This was a hack which would allow multiple HW gates to be controlled by a single bit. The only user of this is the imx_dev_clk_hw_gate_shared which is not used anywhere as of now. Basically, complicates the logic of the driver for no reason. Signed-off-by: Abel Vesa Reviewed-by: Sascha Hauer Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-gate2.c | 28 +++++++--------------------- drivers/clk/imx/clk.h | 5 +---- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/drivers/clk/imx/clk-gate2.c b/drivers/clk/imx/clk-gate2.c index 7eed7083f46e2..49952eec4e282 100644 --- a/drivers/clk/imx/clk-gate2.c +++ b/drivers/clk/imx/clk-gate2.c @@ -49,14 +49,10 @@ static int clk_gate2_enable(struct clk_hw *hw) if (gate->share_count && (*gate->share_count)++ > 0) goto out; - if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT) { - ret = clk_gate_ops.enable(hw); - } else { - reg = readl(gate->reg); - reg &= ~(3 << gate->bit_idx); - reg |= gate->cgr_val << gate->bit_idx; - writel(reg, gate->reg); - } + reg = readl(gate->reg); + reg &= ~(3 << gate->bit_idx); + reg |= gate->cgr_val << gate->bit_idx; + writel(reg, gate->reg); out: spin_unlock_irqrestore(gate->lock, flags); @@ -79,13 +75,9 @@ static void clk_gate2_disable(struct clk_hw *hw) goto out; } - if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT) { - clk_gate_ops.disable(hw); - } else { - reg = readl(gate->reg); - reg &= ~(3 << gate->bit_idx); - writel(reg, gate->reg); - } + reg = readl(gate->reg); + reg &= ~(3 << gate->bit_idx); + writel(reg, gate->reg); out: spin_unlock_irqrestore(gate->lock, flags); @@ -105,9 +97,6 @@ static int clk_gate2_is_enabled(struct clk_hw *hw) { struct clk_gate2 *gate = to_clk_gate2(hw); - if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT) - return clk_gate_ops.is_enabled(hw); - return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx); } @@ -117,9 +106,6 @@ static void clk_gate2_disable_unused(struct clk_hw *hw) unsigned long flags; u32 reg; - if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT) - return; - spin_lock_irqsave(gate->lock, flags); if (!gate->share_count || *gate->share_count == 0) { diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h index 3b796b3da2498..87b2744f72f75 100644 --- a/drivers/clk/imx/clk.h +++ b/drivers/clk/imx/clk.h @@ -6,8 +6,6 @@ #include #include -#define IMX_CLK_GATE2_SINGLE_BIT 1 - extern spinlock_t imx_ccm_lock; void imx_check_clocks(struct clk *clks[], unsigned int count); @@ -384,8 +382,7 @@ static inline struct clk_hw *imx_dev_clk_hw_gate_shared(struct device *dev, unsigned int *share_count) { return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT | - CLK_OPS_PARENT_ENABLE, reg, shift, 0x3, - IMX_CLK_GATE2_SINGLE_BIT, + CLK_OPS_PARENT_ENABLE, reg, shift, 0x1, 0, &imx_ccm_lock, share_count); } -- GitLab From 040adb5fe95ad39c0a714cf3b05950974caf42ed Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 28 Oct 2020 14:58:59 +0200 Subject: [PATCH 0050/1894] clk: imx: gate2: Keep the register writing in on place Move all the register writing to the newly added clk_gate2_do_shared_clks and call that everywhere need needed. Cleans up the code a little bit. Signed-off-by: Abel Vesa Reviewed-by: Sascha Hauer Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-gate2.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/clk/imx/clk-gate2.c b/drivers/clk/imx/clk-gate2.c index 49952eec4e282..e1f6cd931a8d1 100644 --- a/drivers/clk/imx/clk-gate2.c +++ b/drivers/clk/imx/clk-gate2.c @@ -37,10 +37,21 @@ struct clk_gate2 { #define to_clk_gate2(_hw) container_of(_hw, struct clk_gate2, hw) -static int clk_gate2_enable(struct clk_hw *hw) +static void clk_gate2_do_shared_clks(struct clk_hw *hw, bool enable) { struct clk_gate2 *gate = to_clk_gate2(hw); u32 reg; + + reg = readl(gate->reg); + reg &= ~(3 << gate->bit_idx); + if (enable) + reg |= gate->cgr_val << gate->bit_idx; + writel(reg, gate->reg); +} + +static int clk_gate2_enable(struct clk_hw *hw) +{ + struct clk_gate2 *gate = to_clk_gate2(hw); unsigned long flags; int ret = 0; @@ -49,11 +60,7 @@ static int clk_gate2_enable(struct clk_hw *hw) if (gate->share_count && (*gate->share_count)++ > 0) goto out; - reg = readl(gate->reg); - reg &= ~(3 << gate->bit_idx); - reg |= gate->cgr_val << gate->bit_idx; - writel(reg, gate->reg); - + clk_gate2_do_shared_clks(hw, true); out: spin_unlock_irqrestore(gate->lock, flags); @@ -63,7 +70,6 @@ out: static void clk_gate2_disable(struct clk_hw *hw) { struct clk_gate2 *gate = to_clk_gate2(hw); - u32 reg; unsigned long flags; spin_lock_irqsave(gate->lock, flags); @@ -75,10 +81,7 @@ static void clk_gate2_disable(struct clk_hw *hw) goto out; } - reg = readl(gate->reg); - reg &= ~(3 << gate->bit_idx); - writel(reg, gate->reg); - + clk_gate2_do_shared_clks(hw, false); out: spin_unlock_irqrestore(gate->lock, flags); } @@ -104,15 +107,11 @@ static void clk_gate2_disable_unused(struct clk_hw *hw) { struct clk_gate2 *gate = to_clk_gate2(hw); unsigned long flags; - u32 reg; spin_lock_irqsave(gate->lock, flags); - if (!gate->share_count || *gate->share_count == 0) { - reg = readl(gate->reg); - reg &= ~(3 << gate->bit_idx); - writel(reg, gate->reg); - } + if (!gate->share_count || *gate->share_count == 0) + clk_gate2_do_shared_clks(hw, false); spin_unlock_irqrestore(gate->lock, flags); } -- GitLab From 03681d06a555a6c5f39de48d68082e7444db329f Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 28 Oct 2020 14:59:00 +0200 Subject: [PATCH 0051/1894] clk: imx: gate2: Check if clock is enabled against cgr_val Seems the logic here was wrong all along. For example, if the cgr_val is 2 (0b10), the clk_gate2_reg_is_enabled would report the clock as disabled. So check against cgr_val instead. Signed-off-by: Abel Vesa Reviewed-by: Sascha Hauer Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-gate2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/imx/clk-gate2.c b/drivers/clk/imx/clk-gate2.c index e1f6cd931a8d1..40bcc2ddddba5 100644 --- a/drivers/clk/imx/clk-gate2.c +++ b/drivers/clk/imx/clk-gate2.c @@ -86,11 +86,11 @@ out: spin_unlock_irqrestore(gate->lock, flags); } -static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx) +static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx, u8 cgr_val) { u32 val = readl(reg); - if (((val >> bit_idx) & 1) == 1) + if (((val >> bit_idx) & 3) == cgr_val) return 1; return 0; @@ -100,7 +100,7 @@ static int clk_gate2_is_enabled(struct clk_hw *hw) { struct clk_gate2 *gate = to_clk_gate2(hw); - return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx); + return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx, gate->cgr_val); } static void clk_gate2_disable_unused(struct clk_hw *hw) -- GitLab From bcd418a632b621510ebc731cb707d8fe3e873119 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 28 Oct 2020 14:59:01 +0200 Subject: [PATCH 0052/1894] clk: imx: gate2: Add cgr_mask for more flexible number of control bits On some i.MX8 platforms, there are HW gates that share the same bit. So in order to make this clock type more usable, use a mask to specify how many bits belong to those HW gates. Signed-off-by: Abel Vesa Reviewed-by: Sascha Hauer Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-gate2.c | 16 ++++++++++------ drivers/clk/imx/clk.h | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/clk/imx/clk-gate2.c b/drivers/clk/imx/clk-gate2.c index 40bcc2ddddba5..7e4b5e82de7fc 100644 --- a/drivers/clk/imx/clk-gate2.c +++ b/drivers/clk/imx/clk-gate2.c @@ -30,6 +30,7 @@ struct clk_gate2 { void __iomem *reg; u8 bit_idx; u8 cgr_val; + u8 cgr_mask; u8 flags; spinlock_t *lock; unsigned int *share_count; @@ -43,9 +44,9 @@ static void clk_gate2_do_shared_clks(struct clk_hw *hw, bool enable) u32 reg; reg = readl(gate->reg); - reg &= ~(3 << gate->bit_idx); + reg &= ~(gate->cgr_mask << gate->bit_idx); if (enable) - reg |= gate->cgr_val << gate->bit_idx; + reg |= (gate->cgr_val & gate->cgr_mask) << gate->bit_idx; writel(reg, gate->reg); } @@ -86,11 +87,12 @@ out: spin_unlock_irqrestore(gate->lock, flags); } -static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx, u8 cgr_val) +static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx, + u8 cgr_val, u8 cgr_mask) { u32 val = readl(reg); - if (((val >> bit_idx) & 3) == cgr_val) + if (((val >> bit_idx) & cgr_mask) == cgr_val) return 1; return 0; @@ -100,7 +102,8 @@ static int clk_gate2_is_enabled(struct clk_hw *hw) { struct clk_gate2 *gate = to_clk_gate2(hw); - return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx, gate->cgr_val); + return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx, + gate->cgr_val, gate->cgr_mask); } static void clk_gate2_disable_unused(struct clk_hw *hw) @@ -125,7 +128,7 @@ static const struct clk_ops clk_gate2_ops = { struct clk_hw *clk_hw_register_gate2(struct device *dev, const char *name, const char *parent_name, unsigned long flags, - void __iomem *reg, u8 bit_idx, u8 cgr_val, + void __iomem *reg, u8 bit_idx, u8 cgr_val, u8 cgr_mask, u8 clk_gate2_flags, spinlock_t *lock, unsigned int *share_count) { @@ -142,6 +145,7 @@ struct clk_hw *clk_hw_register_gate2(struct device *dev, const char *name, gate->reg = reg; gate->bit_idx = bit_idx; gate->cgr_val = cgr_val; + gate->cgr_mask = cgr_mask; gate->flags = clk_gate2_flags; gate->lock = lock; gate->share_count = share_count; diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h index 87b2744f72f75..3d8e40b4edbf0 100644 --- a/drivers/clk/imx/clk.h +++ b/drivers/clk/imx/clk.h @@ -66,9 +66,9 @@ extern struct imx_pll14xx_clk imx_1443x_dram_pll; to_clk(imx_clk_hw_cpu(name, parent_name, div, mux, pll, step)) #define clk_register_gate2(dev, name, parent_name, flags, reg, bit_idx, \ - cgr_val, clk_gate_flags, lock, share_count) \ + cgr_val, cgr_mask, clk_gate_flags, lock, share_count) \ to_clk(clk_hw_register_gate2(dev, name, parent_name, flags, reg, bit_idx, \ - cgr_val, clk_gate_flags, lock, share_count)) + cgr_val, cgr_mask, clk_gate_flags, lock, share_count)) #define imx_clk_pllv3(type, name, parent_name, base, div_mask) \ to_clk(imx_clk_hw_pllv3(type, name, parent_name, base, div_mask)) @@ -196,7 +196,7 @@ struct clk_hw *imx_clk_hw_pllv4(const char *name, const char *parent_name, struct clk_hw *clk_hw_register_gate2(struct device *dev, const char *name, const char *parent_name, unsigned long flags, - void __iomem *reg, u8 bit_idx, u8 cgr_val, + void __iomem *reg, u8 bit_idx, u8 cgr_val, u8 cgr_mask, u8 clk_gate_flags, spinlock_t *lock, unsigned int *share_count); @@ -349,14 +349,14 @@ static inline struct clk_hw *imx_clk_hw_gate2(const char *name, const char *pare void __iomem *reg, u8 shift) { return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT, reg, - shift, 0x3, 0, &imx_ccm_lock, NULL); + shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL); } static inline struct clk_hw *imx_clk_hw_gate2_flags(const char *name, const char *parent, void __iomem *reg, u8 shift, unsigned long flags) { return clk_hw_register_gate2(NULL, name, parent, flags | CLK_SET_RATE_PARENT, reg, - shift, 0x3, 0, &imx_ccm_lock, NULL); + shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL); } static inline struct clk_hw *imx_clk_hw_gate2_shared(const char *name, @@ -364,7 +364,7 @@ static inline struct clk_hw *imx_clk_hw_gate2_shared(const char *name, unsigned int *share_count) { return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT, reg, - shift, 0x3, 0, &imx_ccm_lock, share_count); + shift, 0x3, 0x3, 0, &imx_ccm_lock, share_count); } static inline struct clk_hw *imx_clk_hw_gate2_shared2(const char *name, @@ -372,7 +372,7 @@ static inline struct clk_hw *imx_clk_hw_gate2_shared2(const char *name, unsigned int *share_count) { return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT | - CLK_OPS_PARENT_ENABLE, reg, shift, 0x3, 0, + CLK_OPS_PARENT_ENABLE, reg, shift, 0x3, 0x3, 0, &imx_ccm_lock, share_count); } @@ -382,15 +382,15 @@ static inline struct clk_hw *imx_dev_clk_hw_gate_shared(struct device *dev, unsigned int *share_count) { return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT | - CLK_OPS_PARENT_ENABLE, reg, shift, 0x1, 0, - &imx_ccm_lock, share_count); + CLK_OPS_PARENT_ENABLE, reg, shift, 0x1, + 0x1, 0, &imx_ccm_lock, share_count); } static inline struct clk *imx_clk_gate2_cgr(const char *name, const char *parent, void __iomem *reg, u8 shift, u8 cgr_val) { return clk_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT, reg, - shift, cgr_val, 0, &imx_ccm_lock, NULL); + shift, cgr_val, 0x3, 0, &imx_ccm_lock, NULL); } static inline struct clk_hw *imx_clk_hw_gate3(const char *name, const char *parent, @@ -418,7 +418,7 @@ static inline struct clk_hw *imx_clk_hw_gate4(const char *name, const char *pare { return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, - reg, shift, 0x3, 0, &imx_ccm_lock, NULL); + reg, shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL); } static inline struct clk_hw *imx_clk_hw_gate4_flags(const char *name, @@ -427,7 +427,7 @@ static inline struct clk_hw *imx_clk_hw_gate4_flags(const char *name, { return clk_hw_register_gate2(NULL, name, parent, flags | CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, - reg, shift, 0x3, 0, &imx_ccm_lock, NULL); + reg, shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL); } #define imx_clk_gate4_flags(name, parent, reg, shift, flags) \ -- GitLab From 65188f07456d4a65b4a66069a701823878e098ff Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 28 Oct 2020 14:59:02 +0200 Subject: [PATCH 0053/1894] clk: imx: gate2: Add locking in is_enabled op Protect against enabling/disabling the gate while we're checking if it is enabled. Signed-off-by: Abel Vesa Reviewed-by: Sascha Hauer Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-gate2.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-gate2.c b/drivers/clk/imx/clk-gate2.c index 7e4b5e82de7fc..480a184207d56 100644 --- a/drivers/clk/imx/clk-gate2.c +++ b/drivers/clk/imx/clk-gate2.c @@ -101,9 +101,17 @@ static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx, static int clk_gate2_is_enabled(struct clk_hw *hw) { struct clk_gate2 *gate = to_clk_gate2(hw); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(gate->lock, flags); - return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx, + ret = clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx, gate->cgr_val, gate->cgr_mask); + + spin_unlock_irqrestore(gate->lock, flags); + + return ret; } static void clk_gate2_disable_unused(struct clk_hw *hw) -- GitLab From 154372e67d4053e56591245eb413686621941333 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Wed, 23 Sep 2020 22:11:43 +0800 Subject: [PATCH 0054/1894] fs/9p: fix create-unlink-getattr idiom Fixes several outstanding bug reports of not being able to getattr from an open file after an unlink. This patch cleans up transient fids on an unlink and will search open fids on a client if it detects a dentry that appears to have been unlinked. This search is necessary because fstat does not pass fd information through the VFS API to the filesystem, only the dentry which for 9p has an imperfect match to fids. Inherent in this patch is also a fix for the qid handling on create/open which apparently wasn't being set correctly and was necessary for the search to succeed. A possible optimization over this fix is to include accounting of open fids with the inode in the private data (in a similar fashion to the way we track transient fids with dentries). This would allow a much quicker search for a matching open fid. (changed v9fs_fid_find_global to v9fs_fid_find_inode in comment) Link: http://lkml.kernel.org/r/20200923141146.90046-2-jianyong.wu@arm.com Signed-off-by: Eric Van Hensbergen Signed-off-by: Greg Kurz Signed-off-by: Jianyong Wu Signed-off-by: Dominique Martinet --- fs/9p/fid.c | 30 ++++++++++++++++++++++++++++++ fs/9p/vfs_inode.c | 4 ++++ net/9p/client.c | 5 ++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 3d681a2c27316..3304984c0fad7 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -38,6 +38,33 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) spin_unlock(&dentry->d_lock); } +/** + * v9fs_fid_find_inode - search for a fid off of the client list + * @inode: return a fid pointing to a specific inode + * @uid: return a fid belonging to the specified user + * + */ + +static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) +{ + struct p9_client *clnt = v9fs_inode2v9ses(inode)->clnt; + struct p9_fid *fid, *fidptr, *ret = NULL; + unsigned long flags; + + p9_debug(P9_DEBUG_VFS, " inode: %p\n", inode); + + spin_lock_irqsave(&clnt->lock, flags); + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) { + if (uid_eq(fid->uid, uid) && + (inode->i_ino == v9fs_qid2ino(&fid->qid))) { + ret = fid; + break; + } + } + spin_unlock_irqrestore(&clnt->lock, flags); + return ret; +} + /** * v9fs_fid_find - retrieve a fid that belongs to the specified uid * @dentry: dentry to look for fid in @@ -65,6 +92,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) } } spin_unlock(&dentry->d_lock); + } else { + if (dentry->d_inode) + ret = v9fs_fid_find_inode(dentry->d_inode, uid); } return ret; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index ae0c38ad1fcbe..31c2fddabb82a 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -570,6 +570,10 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); + + /* invalidate all fids associated with dentry */ + /* NOTE: This will not include open fids */ + dentry->d_op->d_release(dentry); } return retval; } diff --git a/net/9p/client.c b/net/9p/client.c index 09f1ec589b80b..1a3f72bf45fc1 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1219,7 +1219,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, if (nwname) memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); else - fid->qid = oldfid->qid; + memmove(&fid->qid, &oldfid->qid, sizeof(struct p9_qid)); kfree(wqids); return fid; @@ -1272,6 +1272,7 @@ int p9_client_open(struct p9_fid *fid, int mode) p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, (unsigned long long)qid.path, qid.version, iounit); + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; fid->iounit = iounit; @@ -1317,6 +1318,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 (unsigned long long)qid->path, qid->version, iounit); + memmove(&ofid->qid, qid, sizeof(struct p9_qid)); ofid->mode = mode; ofid->iounit = iounit; @@ -1362,6 +1364,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, (unsigned long long)qid.path, qid.version, iounit); + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; fid->iounit = iounit; -- GitLab From 987a64850996db22bbcf2c1d0a051446a343fa2c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 23 Sep 2020 22:11:44 +0800 Subject: [PATCH 0055/1894] fs/9p: track open fids This patch adds accounting of open fids in a list hanging off the i_private field of the corresponding inode. This allows faster lookups compared to searching the full 9p client list. The lookup code is modified accordingly. Link: http://lkml.kernel.org/r/20200923141146.90046-3-jianyong.wu@arm.com Signed-off-by: Greg Kurz Signed-off-by: Jianyong Wu Signed-off-by: Dominique Martinet --- fs/9p/fid.c | 32 +++++++++++++++++++++++--------- fs/9p/fid.h | 1 + fs/9p/vfs_dir.c | 3 +++ fs/9p/vfs_file.c | 1 + fs/9p/vfs_inode.c | 6 +++++- fs/9p/vfs_inode_dotl.c | 1 + include/net/9p/client.h | 1 + 7 files changed, 35 insertions(+), 10 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 3304984c0fad7..d11dd430590dc 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -39,7 +39,7 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) } /** - * v9fs_fid_find_inode - search for a fid off of the client list + * v9fs_fid_find_inode - search for an open fid off of the inode list * @inode: return a fid pointing to a specific inode * @uid: return a fid belonging to the specified user * @@ -47,24 +47,38 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) { - struct p9_client *clnt = v9fs_inode2v9ses(inode)->clnt; - struct p9_fid *fid, *fidptr, *ret = NULL; - unsigned long flags; + struct hlist_head *h; + struct p9_fid *fid, *ret = NULL; p9_debug(P9_DEBUG_VFS, " inode: %p\n", inode); - spin_lock_irqsave(&clnt->lock, flags); - list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) { - if (uid_eq(fid->uid, uid) && - (inode->i_ino == v9fs_qid2ino(&fid->qid))) { + spin_lock(&inode->i_lock); + h = (struct hlist_head *)&inode->i_private; + hlist_for_each_entry(fid, h, ilist) { + if (uid_eq(fid->uid, uid)) { ret = fid; break; } } - spin_unlock_irqrestore(&clnt->lock, flags); + spin_unlock(&inode->i_lock); return ret; } +/** + * v9fs_open_fid_add - add an open fid to an inode + * @dentry: inode that the fid is being added to + * @fid: fid to add + * + */ + +void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid) +{ + spin_lock(&inode->i_lock); + hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private); + spin_unlock(&inode->i_lock); +} + + /** * v9fs_fid_find - retrieve a fid that belongs to the specified uid * @dentry: dentry to look for fid in diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 928b1093f511c..dfa11df028182 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -15,6 +15,7 @@ static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry) } void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); struct p9_fid *v9fs_writeback_fid(struct dentry *dentry); +void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid); static inline struct p9_fid *clone_fid(struct p9_fid *fid) { return IS_ERR(fid) ? fid : p9_client_walk(fid, 0, NULL, 1); diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 674d22bf4f6f7..d82d8a346f866 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -210,6 +210,9 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) fid = filp->private_data; p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, fid ? fid->fid : -1); + spin_lock(&inode->i_lock); + hlist_del(&fid->ilist); + spin_unlock(&inode->i_lock); if (fid) p9_client_clunk(fid); return 0; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b177fd3b1eb3b..b0ef225cecd00 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -96,6 +96,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) mutex_unlock(&v9inode->v_mutex); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(inode, file); + v9fs_open_fid_add(inode, fid); return 0; out_error: p9_client_clunk(file->private_data); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 31c2fddabb82a..6b243ffcbcf06 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -256,6 +256,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, inode->i_rdev = rdev; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_mapping->a_ops = &v9fs_addr_operations; + inode->i_private = NULL; switch (mode & S_IFMT) { case S_IFIFO: @@ -796,6 +797,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct v9fs_session_info *v9ses; struct p9_fid *fid, *inode_fid; struct dentry *res = NULL; + struct inode *inode; if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); @@ -824,7 +826,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, } v9fs_invalidate_inode_attr(dir); - v9inode = V9FS_I(d_inode(dentry)); + inode = d_inode(dentry); + v9inode = V9FS_I(inode); mutex_lock(&v9inode->v_mutex); if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) && !v9inode->writeback_fid && @@ -852,6 +855,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, file->private_data = fid; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(d_inode(dentry), file); + v9fs_open_fid_add(inode, fid); file->f_mode |= FMODE_CREATED; out: diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0028eccb665a6..08f2e089fb0ec 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -342,6 +342,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, file->private_data = ofid; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(inode, file); + v9fs_open_fid_add(inode, ofid); file->f_mode |= FMODE_CREATED; out: v9fs_put_acl(dacl, pacl); diff --git a/include/net/9p/client.h b/include/net/9p/client.h index dd5b5bd781a49..ce7882da8e860 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -152,6 +152,7 @@ struct p9_fid { void *rdir; struct hlist_node dlist; /* list of all fids attached to a dentry */ + struct hlist_node ilist; }; /** -- GitLab From 478ba09edc1f2f2ee27180a06150cb2d1a686f9c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 23 Sep 2020 22:11:45 +0800 Subject: [PATCH 0056/1894] fs/9p: search open fids first A previous patch fixed the "create-unlink-getattr" idiom: if getattr is called on an unlinked file, we try to find an open fid attached to the corresponding inode. We have a similar issue with file permissions and setattr: open("./test.txt", O_RDWR|O_CREAT, 0666) = 4 chmod("./test.txt", 0) = 0 truncate("./test.txt", 0) = -1 EACCES (Permission denied) ftruncate(4, 0) = -1 EACCES (Permission denied) The failure is expected with truncate() but not with ftruncate(). This happens because the lookup code does find a matching fid in the dentry list. Unfortunately, this is not an open fid and the server will be forced to rely on the path name, rather than on an open file descriptor. This is the case in QEMU: the setattr operation will use truncate() and fail because of bad write permissions. This patch changes the logic in the lookup code, so that we consider open fids first. It gives a chance to the server to match this open fid to an open file descriptor and use ftruncate() instead of truncate(). This does not change the current behaviour for truncate() and other path name based syscalls, since file permissions are checked earlier in the VFS layer. With this patch, we get: open("./test.txt", O_RDWR|O_CREAT, 0666) = 4 chmod("./test.txt", 0) = 0 truncate("./test.txt", 0) = -1 EACCES (Permission denied) ftruncate(4, 0) = 0 Link: http://lkml.kernel.org/r/20200923141146.90046-4-jianyong.wu@arm.com Signed-off-by: Greg Kurz Signed-off-by: Jianyong Wu Signed-off-by: Dominique Martinet --- fs/9p/fid.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index d11dd430590dc..0b23b0fe6c515 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -95,8 +95,12 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) dentry, dentry, from_kuid(&init_user_ns, uid), any); ret = NULL; + + if (d_inode(dentry)) + ret = v9fs_fid_find_inode(d_inode(dentry), uid); + /* we'll recheck under lock if there's anything to look in */ - if (dentry->d_fsdata) { + if (!ret && dentry->d_fsdata) { struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata; spin_lock(&dentry->d_lock); hlist_for_each_entry(fid, h, dlist) { @@ -106,9 +110,6 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) } } spin_unlock(&dentry->d_lock); - } else { - if (dentry->d_inode) - ret = v9fs_fid_find_inode(dentry->d_inode, uid); } return ret; -- GitLab From 4cb3fb1cd96f9e9a2c5095db42c2d7adbd5c5af9 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Mon, 19 Oct 2020 20:16:49 +0300 Subject: [PATCH 0057/1894] doc/admin-guide: Note credentials consolidation under CAP_PERFMON Add note that starting from Linux v5.9 CAP_PERFMON Linux capability is enough to conduct performance monitoring and observability using perf_events API. Signed-off-by: Alexey Budankov Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linux-doc@vger.kernel.org Cc: linux-man@vger.kernel.org Cc: linux-security-module@vger.kernel.org Link: http://lore.kernel.org/lkml/2b1a92a1-84ce-5c70-837d-8ffe96849588@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/admin-guide/perf-security.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/perf-security.rst b/Documentation/admin-guide/perf-security.rst index 1307b5274a0f9..57a65e27eeb9e 100644 --- a/Documentation/admin-guide/perf-security.rst +++ b/Documentation/admin-guide/perf-security.rst @@ -84,11 +84,14 @@ capabilities then providing the process with CAP_PERFMON capability singly is recommended as the preferred secure approach to resolve double access denial logging related to usage of performance monitoring and observability. -Unprivileged processes using perf_events system call are also subject -for PTRACE_MODE_READ_REALCREDS ptrace access mode check [7]_ , whose -outcome determines whether monitoring is permitted. So unprivileged -processes provided with CAP_SYS_PTRACE capability are effectively -permitted to pass the check. +Prior Linux v5.9 unprivileged processes using perf_events system call +are also subject for PTRACE_MODE_READ_REALCREDS ptrace access mode check +[7]_ , whose outcome determines whether monitoring is permitted. +So unprivileged processes provided with CAP_SYS_PTRACE capability are +effectively permitted to pass the check. Starting from Linux v5.9 +CAP_SYS_PTRACE capability is not required and CAP_PERFMON is enough to +be provided for processes to make performance monitoring and observability +operations. Other capabilities being granted to unprivileged processes can effectively enable capturing of additional data required for later -- GitLab From 1dd88c195d59b79f0a974618cdf723f74c192b52 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Mon, 19 Oct 2020 20:18:12 +0300 Subject: [PATCH 0058/1894] doc/admin-guide: Document creation of CAP_PERFMON privileged shell Document steps to create CAP_PERFMON privileged shell to unblock Perf tool usage in cases when capabilities can't be assigned to an executable due to limitations of used file system. Suggested-by: Andi Kleen Signed-off-by: Alexey Budankov Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linux-doc@vger.kernel.org Cc: linux-man@vger.kernel.org Cc: linux-security-module@vger.kernel.org Link: http://lore.kernel.org/lkml/0abda956-de6c-95b1-61e8-49e146501079@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/admin-guide/perf-security.rst | 68 +++++++++++++++++++-- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/perf-security.rst b/Documentation/admin-guide/perf-security.rst index 57a65e27eeb9e..904e4eb37f99c 100644 --- a/Documentation/admin-guide/perf-security.rst +++ b/Documentation/admin-guide/perf-security.rst @@ -102,11 +102,11 @@ CAP_SYSLOG capability permits reading kernel space memory addresses from Privileged Perf users groups --------------------------------- -Mechanisms of capabilities, privileged capability-dumb files [6]_ and -file system ACLs [10]_ can be used to create dedicated groups of -privileged Perf users who are permitted to execute performance monitoring -and observability without scope limits. The following steps can be -taken to create such groups of privileged Perf users. +Mechanisms of capabilities, privileged capability-dumb files [6]_, +file system ACLs [10]_ and sudo [15]_ utility can be used to create +dedicated groups of privileged Perf users who are permitted to execute +performance monitoring and observability without limits. The following +steps can be taken to create such groups of privileged Perf users. 1. Create perf_users group of privileged Perf users, assign perf_users group to Perf tool executable and limit access to the executable for @@ -136,7 +136,7 @@ taken to create such groups of privileged Perf users. # getcap perf perf = cap_sys_ptrace,cap_syslog,cap_perfmon+ep -If the libcap installed doesn't yet support "cap_perfmon", use "38" instead, +If the libcap [16]_ installed doesn't yet support "cap_perfmon", use "38" instead, i.e.: :: @@ -162,6 +162,60 @@ performance monitoring and observability by using functionality of the configured Perf tool executable that, when executes, passes perf_events subsystem scope checks. +In case Perf tool executable can't be assigned required capabilities (e.g. +file system is mounted with nosuid option or extended attributes are +not supported by the file system) then creation of the capabilities +privileged environment, naturally shell, is possible. The shell provides +inherent processes with CAP_PERFMON and other required capabilities so that +performance monitoring and observability operations are available in the +environment without limits. Access to the environment can be open via sudo +utility for members of perf_users group only. In order to create such +environment: + +1. Create shell script that uses capsh utility [16]_ to assign CAP_PERFMON + and other required capabilities into ambient capability set of the shell + process, lock the process security bits after enabling SECBIT_NO_SETUID_FIXUP, + SECBIT_NOROOT and SECBIT_NO_CAP_AMBIENT_RAISE bits and then change + the process identity to sudo caller of the script who should essentially + be a member of perf_users group: + +:: + + # ls -alh /usr/local/bin/perf.shell + -rwxr-xr-x. 1 root root 83 Oct 13 23:57 /usr/local/bin/perf.shell + # cat /usr/local/bin/perf.shell + exec /usr/sbin/capsh --iab=^cap_perfmon --secbits=239 --user=$SUDO_USER -- -l + +2. Extend sudo policy at /etc/sudoers file with a rule for perf_users group: + +:: + + # grep perf_users /etc/sudoers + %perf_users ALL=/usr/local/bin/perf.shell + +3. Check that members of perf_users group have access to the privileged + shell and have CAP_PERFMON and other required capabilities enabled + in permitted, effective and ambient capability sets of an inherent process: + +:: + + $ id + uid=1003(capsh_test) gid=1004(capsh_test) groups=1004(capsh_test),1000(perf_users) context=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 + $ sudo perf.shell + [sudo] password for capsh_test: + $ grep Cap /proc/self/status + CapInh: 0000004000000000 + CapPrm: 0000004000000000 + CapEff: 0000004000000000 + CapBnd: 000000ffffffffff + CapAmb: 0000004000000000 + $ capsh --decode=0000004000000000 + 0x0000004000000000=cap_perfmon + +As a result, members of perf_users group have access to the privileged +environment where they can use tools employing performance monitoring APIs +governed by CAP_PERFMON Linux capability. + This specific access control management is only available to superuser or root running processes with CAP_SETPCAP, CAP_SETFCAP [6]_ capabilities. @@ -267,3 +321,5 @@ Bibliography .. [12] ``_ .. [13] ``_ .. [14] ``_ +.. [15] ``_ +.. [16] ``_ -- GitLab From a701d28e2d997705ae4376753af6e35b20029cef Mon Sep 17 00:00:00 2001 From: Dengcheng Zhu Date: Mon, 19 Oct 2020 15:21:24 +0800 Subject: [PATCH 0059/1894] perf annotate mips: Add perf arch instructions annotate handlers Support the MIPS architecture using the ins_ops association method. With this patch, perf-annotate can work well on MIPS. Testing it with a perf.data file collected on a mips machine: $./perf annotate -i perf.data : Disassembly of section .text: : : 00000000000be6a0 : : get_next_seq(): 0.00 : be6a0: lw v0,0(a0) 0.00 : be6a4: daddiu sp,sp,-128 0.00 : be6a8: ld a7,72(a0) 0.00 : be6ac: gssq s5,s4,80(sp) 0.00 : be6b0: gssq s1,s0,48(sp) 0.00 : be6b4: gssq s8,gp,112(sp) 0.00 : be6b8: gssq s7,s6,96(sp) 0.00 : be6bc: gssq s3,s2,64(sp) 0.00 : be6c0: sd a3,0(sp) 0.00 : be6c4: move s0,a0 0.00 : be6c8: sd v0,32(sp) 0.00 : be6cc: sd a5,8(sp) 0.00 : be6d0: sd zero,8(a0) 0.00 : be6d4: sd a6,16(sp) 0.00 : be6d8: ld s2,48(a0) 8.53 : be6dc: ld s1,40(a0) 9.42 : be6e0: ld v1,32(a0) 0.00 : be6e4: nop 0.00 : be6e8: ld s4,24(a0) 0.00 : be6ec: ld s5,16(a0) 0.00 : be6f0: sd a7,40(sp) 10.11 : be6f4: ld s6,64(a0) ... The original patch link: https://lore.kernel.org/patchwork/patch/1180480/ Signed-off-by: Dengcheng Zhu Cc: Dengcheng Zhu Cc: Jiaxun Yang Cc: Peter Zijlstra Cc: Xuefeng Li Cc: linux-mips@vger.kernel.org [ fanpeng@loongson.cn: Add missing "bgtzl", "bltzl", "bgezl", "blezl", "beql" and "bnel" for pre-R6processors ] Signed-off-by: Peng Fan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/mips/Build | 2 +- tools/perf/arch/mips/annotate/instructions.c | 46 ++++++++++++++++++++ tools/perf/util/annotate.c | 8 ++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/mips/annotate/instructions.c diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build index 1bb8bf6d7fd4c..e4e5f33c84d86 100644 --- a/tools/perf/arch/mips/Build +++ b/tools/perf/arch/mips/Build @@ -1 +1 @@ -# empty +perf-y += util/ diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c new file mode 100644 index 0000000000000..340993f2a897a --- /dev/null +++ b/tools/perf/arch/mips/annotate/instructions.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +static +struct ins_ops *mips__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "bal", 3) || + !strncmp(name, "bgezal", 6) || + !strncmp(name, "bltzal", 6) || + !strncmp(name, "bgtzal", 6) || + !strncmp(name, "blezal", 6) || + !strncmp(name, "beqzal", 6) || + !strncmp(name, "bnezal", 6) || + !strncmp(name, "bgtzl", 5) || + !strncmp(name, "bltzl", 5) || + !strncmp(name, "bgezl", 5) || + !strncmp(name, "blezl", 5) || + !strncmp(name, "jialc", 5) || + !strncmp(name, "beql", 4) || + !strncmp(name, "bnel", 4) || + !strncmp(name, "jal", 3)) + ops = &call_ops; + else if (!strncmp(name, "jr", 2)) + ops = &ret_ops; + else if (name[0] == 'j' || name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = mips__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 6c8575e182ed1..e52053a6ad425 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -152,6 +152,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arm/annotate/instructions.c" #include "arch/arm64/annotate/instructions.c" #include "arch/csky/annotate/instructions.c" +#include "arch/mips/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" @@ -174,6 +175,13 @@ static struct arch architectures[] = { .name = "csky", .init = csky__annotate_init, }, + { + .name = "mips", + .init = mips__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, { .name = "x86", .init = x86__annotate_init, -- GitLab From a7c77c4f52c80fffc53b4c616a95f96d57170933 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 19 Oct 2020 16:25:45 -0700 Subject: [PATCH 0060/1894] perf version: Add a feature for libpfm4 If perf is built with libpfm4 (LIBPFM4=1) then advertise it in perf -vv. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201019232545.4047264-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-version.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index d09ec2f030719..9cd074a3d825d 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -80,6 +80,7 @@ static void library_status(void) STATUS(HAVE_LIBBPF_SUPPORT, bpf); STATUS(HAVE_AIO_SUPPORT, aio); STATUS(HAVE_ZSTD_SUPPORT, zstd); + STATUS(HAVE_LIBPFM, libpfm4); } int cmd_version(int argc, const char **argv) -- GitLab From 0ee281e1e4e12f8c09b99f80a2482a55cd7d6bca Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 08:36:13 +0800 Subject: [PATCH 0061/1894] perf mem2node: Improve warning if detected no memory nodes Some archs (e.g. x86 and Arm64) don't enable the configuration CONFIG_MEMORY_HOTPLUG by default, if this configuration is not enabled when build the kernel image, the SysFS for memory nodes will be missed. This results in perf tool has no chance to catpure the memory nodes information, when perf tool reports the result and detects no memory nodes, it outputs "assertion failed at util/mem2node.c:99". The output log doesn't give out reason for the failure and users have no clue for how to fix it. This patch changes to use explicit way for warning: it tells user that detected no memory nodes and suggests to enable CONFIG_MEMORY_HOTPLUG for kernel building. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201019003613.8399-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem2node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c index c84f5841c7abd..03a7d7b277377 100644 --- a/tools/perf/util/mem2node.c +++ b/tools/perf/util/mem2node.c @@ -96,7 +96,8 @@ int mem2node__init(struct mem2node *map, struct perf_env *env) /* Cut unused entries, due to merging. */ tmp_entries = realloc(entries, sizeof(*entries) * j); - if (tmp_entries || WARN_ON_ONCE(j == 0)) + if (tmp_entries || + WARN_ONCE(j == 0, "No memory nodes, is CONFIG_MEMORY_HOTPLUG enabled?\n")) entries = tmp_entries; for (i = 0; i < j; i++) { -- GitLab From 3989bbf9607d6716900d9df91c46a2ce8a504b93 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 18:02:35 +0800 Subject: [PATCH 0062/1894] perf tests tsc: Make tsc testing as a common testing x86 arch provides the testing for conversion between tsc and perf time, the testing is located in x86 arch folder. Move this testing out from x86 arch folder and place it into the common testing folder, so allows to execute tsc testing on other architectures (e.g. Arm64). This patch removes the inclusion of "arch-tests.h" from the testing code, this can avoid building failure if any arch has no this header file. Committer testing: $ perf test -v tsc Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc 70: Convert perf time to TSC : --- start --- test child forked, pid 4032834 mmap size 528384B 1st event perf time 165409788843605 tsc 336578703793868 rdtsc time 165409788854986 tsc 336578703837038 2nd event perf time 165409788855487 tsc 336578703838935 test child finished with 0 ---- end ---- Convert perf time to TSC: Ok $ Signed-off-by: Leo Yan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201019100236.23675-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 - tools/perf/arch/x86/tests/Build | 1 - tools/perf/arch/x86/tests/arch-tests.c | 4 ---- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 ++++ tools/perf/{arch/x86 => }/tests/perf-time-to-tsc.c | 6 ++---- tools/perf/tests/tests.h | 1 + 7 files changed, 8 insertions(+), 10 deletions(-) rename tools/perf/{arch/x86 => }/tests/perf-time-to-tsc.c (98%) diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index c41c5affe4be7..6a54b94f1c25b 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -7,7 +7,6 @@ struct test; /* Tests */ int test__rdpmc(struct test *test __maybe_unused, int subtest); -int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 2997c506550c1..36d4f248b51db 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,6 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += rdpmc.o -perf-y += perf-time-to-tsc.o perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 6763135aec17b..bc25d727b4e9a 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -8,10 +8,6 @@ struct test arch_tests[] = { .desc = "x86 rdpmc", .func = test__rdpmc, }, - { - .desc = "Convert perf time to TSC", - .func = test__perf_time_to_tsc, - }, #ifdef HAVE_DWARF_UNWIND_SUPPORT { .desc = "DWARF unwind", diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 4d15bf6041fb9..aa4dc4f5abde0 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -62,6 +62,7 @@ perf-y += pfm.o perf-y += parse-metric.o perf-y += pe-file-parsing.o perf-y += expand-cgroup.o +perf-y += perf-time-to-tsc.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 132bdb3e6c31a..02e7bbf70419a 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -349,6 +349,10 @@ static struct test generic_tests[] = { .desc = "Event expansion for cgroups", .func = test__expand_cgroup_events, }, + { + .desc = "Convert perf time to TSC", + .func = test__perf_time_to_tsc, + }, { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c similarity index 98% rename from tools/perf/arch/x86/tests/perf-time-to-tsc.c rename to tools/perf/tests/perf-time-to-tsc.c index 026d32ed078e6..aee97c16c0d9b 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -18,10 +18,8 @@ #include "thread_map.h" #include "record.h" #include "tsc.h" -#include "util/mmap.h" -#include "tests/tests.h" - -#include "arch-tests.h" +#include "mmap.h" +#include "tests.h" #define CHECK__(x) { \ while ((x) < 0) { \ diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c85a2c08e4072..c9b180e640e5f 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -124,6 +124,7 @@ int test__pfm_subtest_get_nr(void); int test__parse_metric(struct test *test, int subtest); int test__pe_file_parsing(struct test *test, int subtest); int test__expand_cgroup_events(struct test *test, int subtest); +int test__perf_time_to_tsc(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); -- GitLab From 248dd9b591db5bc5fb46a0e015753cfcfe60a345 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 18:02:36 +0800 Subject: [PATCH 0063/1894] perf tests tsc: Add checking helper is_supported() So far tsc is enabled on x86_64, i386 and Arm64 architectures, add checking helper to skip this testing for other architectures. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201019100236.23675-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/perf-time-to-tsc.c | 13 +++++++++++++ tools/perf/tests/tests.h | 1 + 3 files changed, 15 insertions(+) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 02e7bbf70419a..a185904c47f3b 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -352,6 +352,7 @@ static struct test generic_tests[] = { { .desc = "Convert perf time to TSC", .func = test__perf_time_to_tsc, + .is_supported = test__tsc_is_supported, }, { .func = NULL, diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index aee97c16c0d9b..a9560e0f63609 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -169,3 +169,16 @@ out_err: evlist__delete(evlist); return err; } + +bool test__tsc_is_supported(void) +{ + /* + * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. + * Just enable the test for x86_64/i386 and Arm64 archs. + */ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) + return true; +#else + return false; +#endif +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c9b180e640e5f..b1f2aac93b33d 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -129,6 +129,7 @@ int test__perf_time_to_tsc(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); bool test__wp_is_supported(void); +bool test__tsc_is_supported(void); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT -- GitLab From cc3b964d5eb49d0c9da08760f8760bb6945f1df5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 16 Oct 2020 16:16:50 +0300 Subject: [PATCH 0064/1894] perf test: Implement skip_reason callback for watchpoint tests Currently reason for skipping the read only watchpoint test is only seen when running in verbose mode: $ perf test watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : Skip 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok $ perf test -v watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : --- start --- test child forked, pid 60204 Hardware does not support read only watchpoints. test child finished with -2 Implement skip_reason callback for the watchpoint tests, so that it's easy to see reason why the test is skipped: $ perf test watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : Skip (missing hardware support) 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok Signed-off-by: Tommi Rantala Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201016131650.72476-1-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/wp.c | 21 +++++++++++++++------ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index a185904c47f3b..7273823d0d025 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -142,6 +142,7 @@ static struct test generic_tests[] = { .skip_if_fail = false, .get_nr = test__wp_subtest_get_nr, .get_desc = test__wp_subtest_get_desc, + .skip_reason = test__wp_subtest_skip_reason, }, }, { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b1f2aac93b33d..8e24a61fe4c28 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -66,6 +66,7 @@ int test__bp_signal_overflow(struct test *test, int subtest); int test__bp_accounting(struct test *test, int subtest); int test__wp(struct test *test, int subtest); const char *test__wp_subtest_get_desc(int subtest); +const char *test__wp_subtest_skip_reason(int subtest); int test__wp_subtest_get_nr(void); int test__task_exit(struct test *test, int subtest); int test__mem(struct test *test, int subtest); diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index d262d6639829d..9387fa76faa50 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -174,10 +174,12 @@ static bool wp_ro_supported(void) #endif } -static void wp_ro_skip_msg(void) +static const char *wp_ro_skip_msg(void) { #if defined (__x86_64__) || defined (__i386__) - pr_debug("Hardware does not support read only watchpoints.\n"); + return "missing hardware support"; +#else + return NULL; #endif } @@ -185,7 +187,7 @@ static struct { const char *desc; int (*target_func)(void); bool (*is_supported)(void); - void (*skip_msg)(void); + const char *(*skip_msg)(void); } wp_testcase_table[] = { { .desc = "Read Only Watchpoint", @@ -219,16 +221,23 @@ const char *test__wp_subtest_get_desc(int i) return wp_testcase_table[i].desc; } +const char *test__wp_subtest_skip_reason(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return NULL; + if (!wp_testcase_table[i].skip_msg) + return NULL; + return wp_testcase_table[i].skip_msg(); +} + int test__wp(struct test *test __maybe_unused, int i) { if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) return TEST_FAIL; if (wp_testcase_table[i].is_supported && - !wp_testcase_table[i].is_supported()) { - wp_testcase_table[i].skip_msg(); + !wp_testcase_table[i].is_supported()) return TEST_SKIP; - } return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL; } -- GitLab From c18cf78d7969db89934587fa476220eefe7bd4bd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 14:12:37 -0300 Subject: [PATCH 0065/1894] perf bpf: Enclose libbpf.h include within HAVE_LIBBPF_SUPPORT As it uses the 'deprecated' attribute in a way that breaks the build with old gcc compilers, so to continue being able to build in such systems where NO_LIBBPF=1 is being used, enclose it under HAVE_LIBBPF_SUPPORT. 1 centos:6 : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23) 2 oraclelinux:6 : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1) CC /tmp/build/perf/builtin-record.o In file included from util/bpf-loader.h:11, from builtin-record.c:39: /git/linux/tools/lib/bpf/libbpf.h:203: error: wrong number of arguments specified for 'deprecated' attribute Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.h | 3 +++ tools/perf/util/parse-events.c | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 25251d63164ce..5d1c725cea295 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -8,6 +8,8 @@ #include #include + +#ifdef HAVE_LIBBPF_SUPPORT #include enum bpf_loader_errno { @@ -38,6 +40,7 @@ enum bpf_loader_errno { BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ __BPF_LOADER_ERRNO__END, }; +#endif // HAVE_LIBBPF_SUPPORT struct evsel; struct evlist; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3b273580fb840..3b581d7b32131 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -668,6 +668,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, return ret; } +#ifdef HAVE_LIBBPF_SUPPORT struct __add_bpf_event_param { struct parse_events_state *parse_state; struct list_head *list; @@ -900,6 +901,30 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, list_splice_tail(&obj_head_config, head_config); return err; } +#else // HAVE_LIBBPF_SUPPORT +int parse_events_load_bpf_obj(struct parse_events_state *parse_state, + struct list_head *list __maybe_unused, + struct bpf_object *obj __maybe_unused, + struct list_head *head_config __maybe_unused) +{ + parse_events__handle_error(parse_state->error, 0, + strdup("BPF support is not compiled"), + strdup("Make sure libbpf-devel is available at build time.")); + return -ENOTSUP; +} + +int parse_events_load_bpf(struct parse_events_state *parse_state, + struct list_head *list __maybe_unused, + char *bpf_file_name __maybe_unused, + bool source __maybe_unused, + struct list_head *head_config __maybe_unused) +{ + parse_events__handle_error(parse_state->error, 0, + strdup("BPF support is not compiled"), + strdup("Make sure libbpf-devel is available at build time.")); + return -ENOTSUP; +} +#endif // HAVE_LIBBPF_SUPPORT static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) -- GitLab From 38219f24116ace9b0e604f2ced9c7dbef3041058 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:00:51 -0300 Subject: [PATCH 0066/1894] perf tests: Skip the llvm and bpf tests if HAVE_LIBBPF_SUPPORT isn't defined If either NO_LIBBPF=1 is passed, explicitely disabling it or if libbpf is not available due to some missing dependency, skip its tests, telling the user the feature isn't available. # perf test 40: LLVM search and compile : Skip (not compiled in) 41: Session topology : Ok 42: BPF filter : Skip (not compiled in) Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 4 ++-- tools/perf/tests/llvm.c | 30 +++++++++++++++++++----------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index cd77e334e5777..d880c588a951f 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -9,12 +9,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include "tests.h" #include "llvm.h" @@ -25,6 +23,8 @@ #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test" #ifdef HAVE_LIBBPF_SUPPORT +#include +#include static int epoll_pwait_loop(void) { diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index ae6cda81c2093..98da8a8757abc 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -2,13 +2,13 @@ #include #include #include -#include -#include -#include "llvm.h" #include "tests.h" #include "debug.h" #ifdef HAVE_LIBBPF_SUPPORT +#include +#include +#include "llvm.h" static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { struct bpf_object *obj; @@ -19,14 +19,6 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) bpf_object__close(obj); return TEST_OK; } -#else -static int test__bpf_parsing(void *obj_buf __maybe_unused, - size_t obj_buf_sz __maybe_unused) -{ - pr_debug("Skip bpf parsing\n"); - return TEST_OK; -} -#endif static struct { const char *source; @@ -170,3 +162,19 @@ const char *test__llvm_subtest_get_desc(int subtest) return bpf_source_table[subtest].desc; } +#else //HAVE_LIBBPF_SUPPORT +int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + return TEST_SKIP; +} + +int test__llvm_subtest_get_nr(void) +{ + return 0; +} + +const char *test__llvm_subtest_get_desc(int subtest __maybe_unused) +{ + return NULL; +} +#endif // HAVE_LIBBPF_SUPPORT -- GitLab From 20e88c6076fc50ebf0560e730349000ff2da94fd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:48:23 -0300 Subject: [PATCH 0067/1894] perf annotate: Move bpf header inclusion to inside HAVE_LIBBPF_SUPPORT No need to include it otherwise. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e52053a6ad425..ce8c07bc8c562 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -10,10 +10,6 @@ #include #include #include -#include -#include -#include -#include #include "util.h" // hex_width() #include "ui/ui.h" #include "sort.h" @@ -1684,6 +1680,10 @@ fallback: #define PACKAGE "perf" #include #include +#include +#include +#include +#include static int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args) -- GitLab From ef0580ecd8b0306acf09b7a7508d72cafc67896d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:57:21 -0300 Subject: [PATCH 0068/1894] perf env: Conditionally compile BPF support code on having HAVE_LIBBPF_SUPPORT If libbpf isn't selected, no need for a bunch of related code, that were not even being used, as code using these perf_env methods was also enclosed in HAVE_LIBBPF_SUPPORT. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 14 ++++++++++---- tools/perf/util/env.c | 15 ++++++++++++--- tools/perf/util/env.h | 4 ++-- tools/perf/util/header.c | 21 ++++++++------------- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 55c11e854fe46..89b5fd2b5de3b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -11,8 +11,10 @@ #include #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include #include "bpf-event.h" +#endif #include "compress.h" #include "env.h" #include "namespaces.h" @@ -728,6 +730,7 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) return false; } +#ifdef HAVE_LIBBPF_SUPPORT static ssize_t bpf_read(struct dso *dso, u64 offset, char *data) { struct bpf_prog_info_node *node; @@ -765,6 +768,7 @@ static int bpf_size(struct dso *dso) dso->data.file_size = node->info_linear->info.jited_prog_len; return 0; } +#endif // HAVE_LIBBPF_SUPPORT static void dso_cache__free(struct dso *dso) @@ -894,10 +898,12 @@ static struct dso_cache *dso_cache__populate(struct dso *dso, *ret = -ENOMEM; return NULL; } - +#ifdef HAVE_LIBBPF_SUPPORT if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) *ret = bpf_read(dso, cache_offset, cache->data); - else if (dso->binary_type == DSO_BINARY_TYPE__OOL) + else +#endif + if (dso->binary_type == DSO_BINARY_TYPE__OOL) *ret = DSO__DATA_CACHE_SIZE; else *ret = file_read(dso, machine, cache_offset, cache->data); @@ -1018,10 +1024,10 @@ int dso__data_file_size(struct dso *dso, struct machine *machine) if (dso->data.status == DSO_DATA_STATUS_ERROR) return -1; - +#ifdef HAVE_LIBBPF_SUPPORT if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) return bpf_size(dso); - +#endif return file_size(dso, machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index fadc59708ece0..9130f6fad8d54 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -5,16 +5,18 @@ #include "util/header.h" #include #include -#include "bpf-event.h" #include "cgroup.h" #include #include -#include #include #include struct perf_env perf_env; +#ifdef HAVE_LIBBPF_SUPPORT +#include "bpf-event.h" +#include + void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { @@ -163,6 +165,11 @@ static void perf_env__purge_bpf(struct perf_env *env) up_write(&env->bpf_progs.lock); } +#else // HAVE_LIBBPF_SUPPORT +static void perf_env__purge_bpf(struct perf_env *env __maybe_unused) +{ +} +#endif // HAVE_LIBBPF_SUPPORT void perf_env__exit(struct perf_env *env) { @@ -197,11 +204,13 @@ void perf_env__exit(struct perf_env *env) zfree(&env->memory_nodes); } -void perf_env__init(struct perf_env *env) +void perf_env__init(struct perf_env *env __maybe_unused) { +#ifdef HAVE_LIBBPF_SUPPORT env->bpf_progs.infos = RB_ROOT; env->bpf_progs.btfs = RB_ROOT; init_rwsem(&env->bpf_progs.lock); +#endif } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index a129726520064..ca249bf5e9844 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -77,7 +77,7 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; - +#ifdef HAVE_LIBBPF_SUPPORT /* * bpf_info_lock protects bpf rbtrees. This is needed because the * trees are accessed by different threads in perf-top @@ -89,7 +89,7 @@ struct perf_env { struct rb_root btfs; u32 btfs_cnt; } bpf_progs; - +#endif // HAVE_LIBBPF_SUPPORT /* same reason as above (for perf-top) */ struct { struct rw_semaphore lock; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index be850e9f88520..598285a21dadd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -19,7 +19,9 @@ #include #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include +#endif #include #include "dso.h" @@ -987,13 +989,6 @@ out: up_read(&env->bpf_progs.lock); return ret; } -#else // HAVE_LIBBPF_SUPPORT -static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused, - struct evlist *evlist __maybe_unused) -{ - return 0; -} -#endif // HAVE_LIBBPF_SUPPORT static int write_bpf_btf(struct feat_fd *ff, struct evlist *evlist __maybe_unused) @@ -1027,6 +1022,7 @@ out: up_read(&env->bpf_progs.lock); return ret; } +#endif // HAVE_LIBBPF_SUPPORT static int cpu_cache_level__sort(const void *a, const void *b) { @@ -1638,6 +1634,7 @@ static void print_dir_format(struct feat_fd *ff, FILE *fp) fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version); } +#ifdef HAVE_LIBBPF_SUPPORT static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) { struct perf_env *env = &ff->ph->env; @@ -1683,6 +1680,7 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp) up_read(&env->bpf_progs.lock); } +#endif // HAVE_LIBBPF_SUPPORT static void free_event_desc(struct evsel *events) { @@ -2938,12 +2936,6 @@ out: up_write(&env->bpf_progs.lock); return err; } -#else // HAVE_LIBBPF_SUPPORT -static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data __maybe_unused) -{ - return 0; -} -#endif // HAVE_LIBBPF_SUPPORT static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) { @@ -2990,6 +2982,7 @@ out: free(node); return err; } +#endif // HAVE_LIBBPF_SUPPORT static int process_compressed(struct feat_fd *ff, void *data __maybe_unused) @@ -3120,8 +3113,10 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), FEAT_OPR(CLOCKID, clockid, false), FEAT_OPN(DIR_FORMAT, dir_format, false), +#ifdef HAVE_LIBBPF_SUPPORT FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), +#endif FEAT_OPR(COMPRESSED, compressed, false), FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), FEAT_OPR(CLOCK_DATA, clock_data, false), -- GitLab From 1218838d68f5e9cc195685f17375be96a54832c7 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Oct 2020 15:24:21 +0900 Subject: [PATCH 0069/1894] perf kvm: Add kvm-stat for arm64 Add support for 'perf kvm stat' on arm64 platform. Example: # perf kvm stat report Analyze events for all VMs, all VCPUs: VM-EXIT Samples Samples% Time% Min Time Max Time Avg time DABT_LOW 661867 98.91% 40.45% 2.19us 3364.65us 6.24us ( +- 0.34% ) IRQ 4598 0.69% 57.44% 2.89us 3397.59us 1276.27us ( +- 1.61% ) WFx 1475 0.22% 1.71% 2.22us 3388.63us 118.31us ( +- 8.69% ) IABT_LOW 1018 0.15% 0.38% 2.22us 2742.07us 38.29us ( +- 12.55% ) SYS64 180 0.03% 0.01% 2.07us 112.91us 6.57us ( +- 14.95% ) HVC64 17 0.00% 0.01% 2.19us 322.35us 42.95us ( +- 58.98% ) Total Samples:669155, Total events handled time:10216387.86us. Signed-off-by: Sergey Senozhatsky Reviewed-by: Leo Yan Tested-by: Leo Yan Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: Suleiman Souhlal Link: http://lore.kernel.org/lkml/20201027062421.463355-1-sergey.senozhatsky@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/Makefile | 1 + tools/perf/arch/arm64/util/Build | 1 + .../arch/arm64/util/arm64_exception_types.h | 92 +++++++++++++++++++ tools/perf/arch/arm64/util/kvm-stat.c | 85 +++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 tools/perf/arch/arm64/util/arm64_exception_types.h create mode 100644 tools/perf/arch/arm64/util/kvm-stat.c diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index dbef716a19135..fab3095fb5d08 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_JITDUMP := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +HAVE_KVM_STAT_SUPPORT := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b53294d74b011..8d2b9bcfffcaf 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -2,6 +2,7 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o perf-y += tsc.o +perf-y += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h new file mode 100644 index 0000000000000..27c981ebe4012 --- /dev/null +++ b/tools/perf/arch/arm64/util/arm64_exception_types.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H +#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H + +/* Per asm/virt.h */ +#define HVC_STUB_ERR 0xbadca11 + +/* Per asm/kvm_asm.h */ +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_EL1_SERROR 1 +#define ARM_EXCEPTION_TRAP 2 +#define ARM_EXCEPTION_IL 3 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR + +#define kvm_arm_exception_type \ + {ARM_EXCEPTION_IRQ, "IRQ" }, \ + {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \ + {ARM_EXCEPTION_TRAP, "TRAP" }, \ + {ARM_EXCEPTION_IL, "ILLEGAL" }, \ + {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" } + +/* Per asm/esr.h */ +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 (0x18) +#define ESR_ELx_EC_SVE (0x19) +#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +/* Unallocated EC: 0x1b - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \ + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \ + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \ + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + +#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */ diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c new file mode 100644 index 0000000000000..50376b9062c1b --- /dev/null +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "../../util/evsel.h" +#include "../../util/kvm-stat.h" +#include "arm64_exception_types.h" +#include "debug.h" + +define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type); +define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class); + +const char *kvm_trap_exit_reason = "esr_ec"; +const char *vcpu_id_str = "id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "ret"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + NULL, +}; + +static void event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->exit_reasons = arm64_exit_reasons; + + /* + * TRAP exceptions carry exception class info in esr_ec field + * and, hence, we need to use a different exit_reasons table to + * properly decode event's est_ec. + */ + if (key->key == ARM_EXCEPTION_TRAP) { + key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason); + key->exit_reasons = arm64_trap_exit_reasons; + } +} + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return !strcmp(evsel->name, kvm_entry_trace); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_exit_trace)) { + event_get_key(evsel, sample, key); + return true; + } + return false; +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { + .name = "vmexit", + .ops = &exit_events, + }, + { NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "arm64"; + return 0; +} -- GitLab From 9b0a7836359443227c9af101f7aea8412e739458 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 27 Oct 2020 16:28:54 +0900 Subject: [PATCH 0070/1894] perf test: Use generic event for expand_libpfm_events() I found that the UNHALTED_CORE_CYCLES event is only available in the Intel machines and it makes other vendors/archs fail on the test. As libpfm4 can parse the generic events like cycles, let's use them. Fixes: 40b74c30ffb9 ("perf test: Add expand cgroup event test") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201027072855.655449-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expand-cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index d5771e4d094f8..4c59f3ae438fc 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -145,7 +145,7 @@ static int expand_libpfm_events(void) int ret; struct evlist *evlist; struct rblist metric_events; - const char event_str[] = "UNHALTED_CORE_CYCLES"; + const char event_str[] = "CYCLES"; struct option opt = { .value = &evlist, }; -- GitLab From bb1c15b60b981d1065d7766ccf9de6c32beedfa3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 27 Oct 2020 16:28:55 +0900 Subject: [PATCH 0071/1894] perf stat: Support regex pattern in --for-each-cgroup To make the command line even more compact with cgroups, support regex pattern matching in cgroup names. $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1 3,000.73 msec cpu-clock foo # 2.998 CPUs utilized 12,530,992,699 cycles foo # 7.517 GHz (100.00%) 1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized 4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%) 1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized 4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%) 1.000892614 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201027072855.655449-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 5 +- tools/perf/builtin-stat.c | 5 +- tools/perf/util/cgroup.c | 198 ++++++++++++++++++++++--- 3 files changed, 182 insertions(+), 26 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 9f9f29025e49a..2b44c08b3b234 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -168,8 +168,9 @@ command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'. --for-each-cgroup name:: Expand event list for each cgroup in "name" (allow multiple cgroups separated -by comma). This has same effect that repeating -e option and -G option for -each event x name. This option cannot be used with -G/--cgroup option. +by comma). It also support regex patterns to match multiple groups. This has same +effect that repeating -e option and -G option for each event x name. This option +cannot be used with -G/--cgroup option. -o file:: --output file:: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b01af171d94f5..6709578128c92 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2235,8 +2235,11 @@ int cmd_stat(int argc, const char **argv) } if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, - &stat_config.metric_events, true) < 0) + &stat_config.metric_events, true) < 0) { + parse_options_usage(stat_usage, stat_options, + "for-each-cgroup", 0); goto out; + } } target__validate(&target); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index b81324a13a2b6..704333748549f 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -13,9 +13,19 @@ #include #include #include +#include +#include int nr_cgroups; +/* used to match cgroup name with patterns */ +struct cgroup_name { + struct list_head list; + bool used; + char name[]; +}; +static LIST_HEAD(cgroup_list); + static int open_cgroup(const char *name) { char path[PATH_MAX + 1]; @@ -149,6 +159,137 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup) evsel__set_default_cgroup(evsel, cgroup); } +/* helper function for ftw() in match_cgroups and list_cgroups */ +static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused, + int typeflag) +{ + struct cgroup_name *cn; + + if (typeflag != FTW_D) + return 0; + + cn = malloc(sizeof(*cn) + strlen(fpath) + 1); + if (cn == NULL) + return -1; + + cn->used = false; + strcpy(cn->name, fpath); + + list_add_tail(&cn->list, &cgroup_list); + return 0; +} + +static void release_cgroup_list(void) +{ + struct cgroup_name *cn; + + while (!list_empty(&cgroup_list)) { + cn = list_first_entry(&cgroup_list, struct cgroup_name, list); + list_del(&cn->list); + free(cn); + } +} + +/* collect given cgroups only */ +static int list_cgroups(const char *str) +{ + const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; + char *s; + + /* use given name as is - for testing purpose */ + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + if (e - str) { + int ret; + + s = strndup(str, e - str); + if (!s) + return -1; + /* pretend if it's added by ftw() */ + ret = add_cgroup_name(s, NULL, FTW_D); + free(s); + if (ret) + return -1; + } else { + if (add_cgroup_name("", NULL, FTW_D) < 0) + return -1; + } + + if (!p) + break; + str = p+1; + } + + /* these groups will be used */ + list_for_each_entry(cn, &cgroup_list, list) + cn->used = true; + + return 0; +} + +/* collect all cgroups first and then match with the pattern */ +static int match_cgroups(const char *str) +{ + char mnt[PATH_MAX]; + const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; + regex_t reg; + int prefix_len; + char *s; + + if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event")) + return -1; + + /* cgroup_name will have a full path, skip the root directory */ + prefix_len = strlen(mnt); + + /* collect all cgroups in the cgroup_list */ + if (ftw(mnt, add_cgroup_name, 20) < 0) + return -1; + + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + /* allow empty cgroups, i.e., skip */ + if (e - str) { + /* termination added */ + s = strndup(str, e - str); + if (!s) + return -1; + if (regcomp(®, s, REG_NOSUB)) { + free(s); + return -1; + } + + /* check cgroup name with the pattern */ + list_for_each_entry(cn, &cgroup_list, list) { + char *name = cn->name + prefix_len; + + if (name[0] == '/' && name[1]) + name++; + if (!regexec(®, name, 0, NULL, 0)) + cn->used = true; + } + regfree(®); + free(s); + } else { + /* first entry to root cgroup */ + cn = list_first_entry(&cgroup_list, struct cgroup_name, + list); + cn->used = true; + } + + if (!p) + break; + str = p+1; + } + return prefix_len; +} + int parse_cgroups(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -201,6 +342,11 @@ int parse_cgroups(const struct option *opt, const char *str, return 0; } +static bool has_pattern_string(const char *str) +{ + return !!strpbrk(str, "{}[]()|*+?^$"); +} + int evlist__expand_cgroup(struct evlist *evlist, const char *str, struct rblist *metric_events, bool open_cgroup) { @@ -208,8 +354,9 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, struct evsel *pos, *evsel, *leader; struct rblist orig_metric_events; struct cgroup *cgrp = NULL; - const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; int ret = -1; + int prefix_len; if (evlist->core.nr_entries == 0) { fprintf(stderr, "must define events before cgroups\n"); @@ -234,24 +381,27 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, rblist__init(&orig_metric_events); } - for (;;) { - p = strchr(str, ','); - e = p ? p : eos; + if (has_pattern_string(str)) + prefix_len = match_cgroups(str); + else + prefix_len = list_cgroups(str); - /* allow empty cgroups, i.e., skip */ - if (e - str) { - /* termination added */ - char *name = strndup(str, e - str); - if (!name) - goto out_err; + if (prefix_len < 0) + goto out_err; - cgrp = cgroup__new(name, open_cgroup); - free(name); - if (cgrp == NULL) - goto out_err; - } else { - cgrp = NULL; - } + list_for_each_entry(cn, &cgroup_list, list) { + char *name; + + if (!cn->used) + continue; + + /* cgroup_name might have a full path, skip the prefix */ + name = cn->name + prefix_len; + if (name[0] == '/' && name[1]) + name++; + cgrp = cgroup__new(name, open_cgroup); + if (cgrp == NULL) + goto out_err; leader = NULL; evlist__for_each_entry(orig_list, pos) { @@ -277,23 +427,25 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, if (metricgroup__copy_metric_events(tmp_list, cgrp, metric_events, &orig_metric_events) < 0) - break; + goto out_err; } perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; + } - if (!p) { - ret = 0; - break; - } - str = p+1; + if (list_empty(&evlist->core.entries)) { + fprintf(stderr, "no cgroup matched: %s\n", str); + goto out_err; } + ret = 0; + out_err: evlist__delete(orig_list); evlist__delete(tmp_list); rblist__exit(&orig_metric_events); + release_cgroup_list(); return ret; } -- GitLab From 55a4de94c64bacffbcd802c954764e0de2ab217f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Oct 2020 17:27:36 -0700 Subject: [PATCH 0072/1894] perf stat: Add --quiet option Add a new --quiet option to 'perf stat'. This is useful with 'perf stat record' to write the data only to the perf.data file, which can lower measurement overhead because the data doesn't need to be formatted. On my 4C desktop: % time ./perf stat record -e $(python -c 'print ",".join(["cycles"]*1000)') -a -I 1000 sleep 5 ... real 0m5.377s user 0m0.238s sys 0m0.452s % time ./perf stat record --quiet -e $(python -c 'print ",".join(["cycles"]*1000)') -a -I 1000 sleep 5 real 0m5.452s user 0m0.183s sys 0m0.423s In this example it cuts the user time by 20%. On systems with more cores the savings are higher. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Alexey Budankov Link: http://lore.kernel.org/lkml/20201027002737.30942-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++++ tools/perf/builtin-stat.c | 6 +++++- tools/perf/util/stat.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 2b44c08b3b234..5d4a673d7621a 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -317,6 +317,10 @@ small group that need not have multiplexing is lowered. This option forbids the event merging logic from sharing events between groups and may be used to increase accuracy in this case. +--quiet:: +Don't print output. This is useful with perf stat record below to only +write data to the perf.data file. + STAT RECORD ----------- Stores stat data into perf data file. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6709578128c92..f15b2f8aa14d8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -972,6 +972,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; + if (stat_config.quiet) + return; perf_evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); @@ -1171,6 +1173,8 @@ static struct option stat_options[] = { "threads of same physical core"), OPT_BOOLEAN(0, "summary", &stat_config.summary, "print summary for interval mode"), + OPT_BOOLEAN(0, "quiet", &stat_config.quiet, + "don't print output (useful with record)"), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", @@ -2132,7 +2136,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (!output) { + if (!output && !stat_config.quiet) { struct timespec tm; mode = append_file ? "a" : "w"; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 487010c624be9..05adf81650259 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -122,6 +122,7 @@ struct perf_stat_config { bool metric_no_group; bool metric_no_merge; bool stop_read_counter; + bool quiet; FILE *output; unsigned int interval; unsigned int timeout; -- GitLab From c5e6bc23355a3b33ffc170f92e315102f1e6a59c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 23 Oct 2020 11:06:28 +0900 Subject: [PATCH 0073/1894] perf trace beauty: Allow header files in a different path Current script to generate mmap flags and prot checks headers from the uapi/asm-generic directory but it might come from a different directory in some environment. So change the pattern to accept it. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201023020628.346257-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap_flags.sh | 4 ++-- tools/perf/trace/beauty/mmap_prot.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh index 39eb2595983b1..76825710c725f 100755 --- a/tools/perf/trace/beauty/mmap_flags.sh +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -28,12 +28,12 @@ egrep -q $regex ${linux_mman} && \ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") -([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*' ${arch_mman}) && +([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) && (egrep $regex ${header_dir}/mman.h | \ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh index 28f638f8d2161..664d8d534a509 100755 --- a/tools/perf/trace/beauty/mmap_prot.sh +++ b/tools/perf/trace/beauty/mmap_prot.sh @@ -17,7 +17,7 @@ prefix="PROT" printf "static const char *mmap_prot[] = {\n" regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}` -([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+ Date: Thu, 22 Oct 2020 19:02:26 +0800 Subject: [PATCH 0074/1894] perf jevents: Tidy error handling There is much duplication in the error handling for directory transvering for prcessing JSONs. Factor out the common code to tidy a bit. Signed-off-by: John Garry Reviewed-By: Kajol Jain Link: https://lore.kernel.org/r/1603364547-197086-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 83 ++++++++++++++------------------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index e47644cab3fab..7326c14c46233 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1100,12 +1100,13 @@ static int process_one_file(const char *fpath, const struct stat *sb, */ int main(int argc, char *argv[]) { - int rc, ret = 0; + int rc, ret = 0, empty_map = 0; int maxfds; char ldirname[PATH_MAX]; const char *arch; const char *output_file; const char *start_dirname; + char *err_string_ext = ""; struct stat stbuf; prog = basename(argv[0]); @@ -1133,7 +1134,8 @@ int main(int argc, char *argv[]) /* If architecture does not have any event lists, bail out */ if (stat(ldirname, &stbuf) < 0) { pr_info("%s: Arch %s has no PMU event lists\n", prog, arch); - goto empty_map; + empty_map = 1; + goto err_close_eventsfp; } /* Include pmu-events.h first */ @@ -1150,75 +1152,60 @@ int main(int argc, char *argv[]) */ maxfds = get_maxfds(); - mapfile = NULL; rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error preprocessing arch standard files %s\n", - prog, ldirname); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); - return 1; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_std_arch_event_dir; rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error walking file tree %s\n", prog, ldirname); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); - ret = 1; - goto out_free_mapfile; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_dir; sprintf(ldirname, "%s/test", start_dirname); rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error walking file tree %s rc=%d for test\n", - prog, ldirname, rc); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - free_arch_std_events(); - ret = 1; - goto out_free_mapfile; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_dir; if (close_table) print_events_table_suffix(eventsfp); if (!mapfile) { pr_info("%s: No CPU->JSON mapping?\n", prog); - goto empty_map; + empty_map = 1; + goto err_close_eventsfp; } - if (process_mapfile(eventsfp, mapfile)) { + rc = process_mapfile(eventsfp, mapfile); + fclose(eventsfp); + if (rc) { pr_info("%s: Error processing mapfile %s\n", prog, mapfile); /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); ret = 1; + goto err_out; } + free_arch_std_events(); + free(mapfile); + return 0; - goto out_free_mapfile; - -empty_map: +err_processing_std_arch_event_dir: + err_string_ext = " for std arch event"; +err_processing_dir: + if (verbose) { + pr_info("%s: Error walking file tree %s%s\n", prog, ldirname, + err_string_ext); + empty_map = 1; + } else if (rc < 0) { + ret = 1; + } else { + empty_map = 1; + } +err_close_eventsfp: fclose(eventsfp); - create_empty_mapping(output_file); + if (empty_map) + create_empty_mapping(output_file); +err_out: free_arch_std_events(); -out_free_mapfile: free(mapfile); return ret; } -- GitLab From 644bf4b0f7acde641d3db200b4db66977e96c3bd Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 22 Oct 2020 19:02:27 +0800 Subject: [PATCH 0075/1894] perf jevents: Add test for arch std events Recently there was an undetected breakage for std arch event support. Add support in "PMU events" testcase to detect such breakages. For this, the "test" arch needs has support added to process std arch events. And a test event is added for the test, ifself. Also add a few code comments to help understand the code a bit better. Committer testing: Before: # perf test -vv pmu |& grep l3_cache_rd # After: # perf test -vv pmu |& grep l3_cache_rd testing event table l3_cache_rd: pass testing aliases PMU cpu: matched event l3_cache_rd # Signed-off-by: John Garry Reviewed-By: Kajol Jain Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/1603364547-197086-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/test/arch-std-events.json | 8 ++++++++ .../perf/pmu-events/arch/test/test_cpu/cache.json | 5 +++++ tools/perf/pmu-events/jevents.c | 4 ++++ tools/perf/tests/pmu-events.c | 14 ++++++++++++++ 4 files changed, 31 insertions(+) create mode 100644 tools/perf/pmu-events/arch/test/arch-std-events.json create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/cache.json diff --git a/tools/perf/pmu-events/arch/test/arch-std-events.json b/tools/perf/pmu-events/arch/test/arch-std-events.json new file mode 100644 index 0000000000000..43f6f729d6aef --- /dev/null +++ b/tools/perf/pmu-events/arch/test/arch-std-events.json @@ -0,0 +1,8 @@ +[ + { + "PublicDescription": "Attributable Level 3 cache access, read", + "EventCode": "0x40", + "EventName": "L3_CACHE_RD", + "BriefDescription": "L3 cache access, read" + } +] diff --git a/tools/perf/pmu-events/arch/test/test_cpu/cache.json b/tools/perf/pmu-events/arch/test/test_cpu/cache.json new file mode 100644 index 0000000000000..036d0efdb2bb0 --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/cache.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "L3_CACHE_RD" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 7326c14c46233..72cfa3b5046d3 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1162,6 +1162,10 @@ int main(int argc, char *argv[]) sprintf(ldirname, "%s/test", start_dirname); + rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); + if (rc) + goto err_processing_std_arch_event_dir; + rc = nftw(ldirname, process_one_file, maxfds, 0); if (rc) goto err_processing_dir; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index d3517a74d95e3..ad2b21591275b 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -14,8 +14,10 @@ #include "util/parse-events.h" struct perf_pmu_test_event { + /* used for matching against events from generated pmu-events.c */ struct pmu_event event; + /* used for matching against event aliases */ /* extra events for aliases */ const char *alias_str; @@ -78,6 +80,17 @@ static struct perf_pmu_test_event test_cpu_events[] = { .alias_str = "umask=0,(null)=0x30d40,event=0x3a", .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }, + { + .event = { + .name = "l3_cache_rd", + .event = "event=0x40", + .desc = "L3 cache access, read", + .long_desc = "Attributable Level 3 cache access, read", + .topic = "cache", + }, + .alias_str = "event=0x40", + .alias_long_desc = "Attributable Level 3 cache access, read", + }, { /* sentinel */ .event = { .name = NULL, @@ -357,6 +370,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count) } +/* Test that aliases generated are as expected */ static int test_aliases(void) { struct perf_pmu *pmu = NULL; -- GitLab From e1ac4b2406d94eddce8ac2c5ab4235f6075a9602 Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Fri, 30 Oct 2020 14:08:38 +0800 Subject: [PATCH 0076/1894] efi: generalize efi_get_secureboot Generalize the efi_get_secureboot() function so not only efistub but also other subsystems can use it. Note that the MokSbState handling is not factored out: the variable is boot time only, and so it cannot be parameterized as easily. Also, the IMA code will switch to this version in a future patch, and it does not incorporate the MokSbState exception in the first place. Note that the new efi_get_secureboot_mode() helper treats any failures to read SetupMode as setup mode being disabled. Co-developed-by: Chester Lin Signed-off-by: Chester Lin Acked-by: Mimi Zohar Signed-off-by: Ard Biesheuvel --- arch/x86/boot/compressed/Makefile | 2 +- drivers/firmware/efi/libstub/efistub.h | 2 ++ drivers/firmware/efi/libstub/secureboot.c | 41 +++++++++-------------- include/linux/efi.h | 23 ++++++++++++- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ee249088cbfe2..8d358a6fe6ecd 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -35,7 +35,7 @@ cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += -mno-mmx -mno-sse -KBUILD_CFLAGS += -ffreestanding +KBUILD_CFLAGS += -ffreestanding -fshort-wchar KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 2d7abcd99de9b..b8ec29d6a74ac 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -848,4 +848,6 @@ asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint, void efi_handle_post_ebs_state(void); +enum efi_secureboot_mode efi_get_secureboot(void); + #endif diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 5efc524b14bef..af18d86c16041 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -12,15 +12,16 @@ #include "efistub.h" -/* BIOS variables */ -static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; -static const efi_char16_t efi_SecureBoot_name[] = L"SecureBoot"; -static const efi_char16_t efi_SetupMode_name[] = L"SetupMode"; - /* SHIM variables */ static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID; static const efi_char16_t shim_MokSBState_name[] = L"MokSBState"; +static efi_status_t get_var(efi_char16_t *name, efi_guid_t *vendor, u32 *attr, + unsigned long *data_size, void *data) +{ + return get_efi_var(name, vendor, attr, data_size, data); +} + /* * Determine whether we're in secure boot mode. * @@ -30,26 +31,18 @@ static const efi_char16_t shim_MokSBState_name[] = L"MokSBState"; enum efi_secureboot_mode efi_get_secureboot(void) { u32 attr; - u8 secboot, setupmode, moksbstate; unsigned long size; + enum efi_secureboot_mode mode; efi_status_t status; + u8 moksbstate; - size = sizeof(secboot); - status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid, - NULL, &size, &secboot); - if (status == EFI_NOT_FOUND) - return efi_secureboot_mode_disabled; - if (status != EFI_SUCCESS) - goto out_efi_err; - - size = sizeof(setupmode); - status = get_efi_var(efi_SetupMode_name, &efi_variable_guid, - NULL, &size, &setupmode); - if (status != EFI_SUCCESS) - goto out_efi_err; - - if (secboot == 0 || setupmode == 1) - return efi_secureboot_mode_disabled; + mode = efi_get_secureboot_mode(get_var); + if (mode == efi_secureboot_mode_unknown) { + efi_err("Could not determine UEFI Secure Boot status.\n"); + return efi_secureboot_mode_unknown; + } + if (mode != efi_secureboot_mode_enabled) + return mode; /* * See if a user has put the shim into insecure mode. If so, and if the @@ -69,8 +62,4 @@ enum efi_secureboot_mode efi_get_secureboot(void) secure_boot_enabled: efi_info("UEFI Secure Boot is enabled.\n"); return efi_secureboot_mode_enabled; - -out_efi_err: - efi_err("Could not determine UEFI Secure Boot status.\n"); - return efi_secureboot_mode_unknown; } diff --git a/include/linux/efi.h b/include/linux/efi.h index d7c0e73af2b97..1cd5d91d8ca18 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1089,7 +1089,28 @@ enum efi_secureboot_mode { efi_secureboot_mode_disabled, efi_secureboot_mode_enabled, }; -enum efi_secureboot_mode efi_get_secureboot(void); + +static inline +enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) +{ + u8 secboot, setupmode = 0; + efi_status_t status; + unsigned long size; + + size = sizeof(secboot); + status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, + &secboot); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; + if (status != EFI_SUCCESS) + return efi_secureboot_mode_unknown; + + size = sizeof(setupmode); + get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode); + if (secboot == 0 || setupmode == 1) + return efi_secureboot_mode_disabled; + return efi_secureboot_mode_enabled; +} #ifdef CONFIG_RESET_ATTACK_MITIGATION void efi_enable_reset_attack_mitigation(void); -- GitLab From 9d1c94a69d70f1b02bdf06b231cd16ad47ef06cd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:25 +0200 Subject: [PATCH 0077/1894] clk: fix a kernel-doc markup clk_get_duty_cycle -> clk_get_scaled_duty_cycle Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/b2336f3f3cdfe6e1a2d3a7a056ab7ccc7a81b945.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Stephen Boyd --- include/linux/clk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/clk.h b/include/linux/clk.h index 7fd6a1febcf4f..5f8d5f4931c04 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -150,7 +150,7 @@ int clk_get_phase(struct clk *clk); int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den); /** - * clk_get_duty_cycle - return the duty cycle ratio of a clock signal + * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal * @clk: clock signal source * @scale: scaling factor to be applied to represent the ratio as an integer * -- GitLab From 61a31292002b85529021a46e6288b29caf674fbe Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Sat, 17 Oct 2020 00:13:32 +0530 Subject: [PATCH 0078/1894] clk: qcom: clk-alpha-pll: Add support for helper functions Introduce clk_alpha_pll_write_config and alpha_pll_check_rate_margin helper functions to be across PLL configure functions and PLL set rate functions. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1602873815-1677-2-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-alpha-pll.c | 155 +++++++++++++------------------ 1 file changed, 66 insertions(+), 89 deletions(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 564431130a760..f3b8b54fefd59 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -207,6 +207,13 @@ static int wait_for_pll(struct clk_alpha_pll *pll, u32 mask, bool inverse, #define wait_for_pll_update_ack_clear(pll) \ wait_for_pll(pll, ALPHA_PLL_ACK_LATCH, 1, "update_ack_clear") +static void clk_alpha_pll_write_config(struct regmap *regmap, unsigned int reg, + unsigned int val) +{ + if (val) + regmap_write(regmap, reg, val); +} + void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config) { @@ -1004,33 +1011,19 @@ void clk_fabia_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, { u32 val, mask; - if (config->l) - regmap_write(regmap, PLL_L_VAL(pll), config->l); - - if (config->alpha) - regmap_write(regmap, PLL_FRAC(pll), config->alpha); - - if (config->config_ctl_val) - regmap_write(regmap, PLL_CONFIG_CTL(pll), + clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l); + clk_alpha_pll_write_config(regmap, PLL_FRAC(pll), config->alpha); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL(pll), config->config_ctl_val); - - if (config->config_ctl_hi_val) - regmap_write(regmap, PLL_CONFIG_CTL_U(pll), + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U(pll), config->config_ctl_hi_val); - - if (config->user_ctl_val) - regmap_write(regmap, PLL_USER_CTL(pll), config->user_ctl_val); - - if (config->user_ctl_hi_val) - regmap_write(regmap, PLL_USER_CTL_U(pll), + clk_alpha_pll_write_config(regmap, PLL_USER_CTL(pll), + config->user_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U(pll), config->user_ctl_hi_val); - - if (config->test_ctl_val) - regmap_write(regmap, PLL_TEST_CTL(pll), + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL(pll), config->test_ctl_val); - - if (config->test_ctl_hi_val) - regmap_write(regmap, PLL_TEST_CTL_U(pll), + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U(pll), config->test_ctl_hi_val); if (config->post_div_mask) { @@ -1145,25 +1138,38 @@ static unsigned long alpha_pll_fabia_recalc_rate(struct clk_hw *hw, return alpha_pll_calc_rate(parent_rate, l, frac, alpha_width); } +/* + * Due to limited number of bits for fractional rate programming, the + * rounded up rate could be marginally higher than the requested rate. + */ +static int alpha_pll_check_rate_margin(struct clk_hw *hw, + unsigned long rrate, unsigned long rate) +{ + unsigned long rate_margin = rate + PLL_RATE_MARGIN; + + if (rrate > rate_margin || rrate < rate) { + pr_err("%s: Rounded rate %lu not within range [%lu, %lu)\n", + clk_hw_get_name(hw), rrate, rate, rate_margin); + return -EINVAL; + } + + return 0; +} + static int alpha_pll_fabia_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long prate) { struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); u32 l, alpha_width = pll_alpha_width(pll); + unsigned long rrate; + int ret; u64 a; - unsigned long rrate, max = rate + PLL_RATE_MARGIN; rrate = alpha_pll_round_rate(rate, prate, &l, &a, alpha_width); - /* - * Due to limited number of bits for fractional rate programming, the - * rounded up rate could be marginally higher than the requested rate. - */ - if (rrate > (rate + PLL_RATE_MARGIN) || rrate < rate) { - pr_err("%s: Rounded rate %lu not within range [%lu, %lu)\n", - clk_hw_get_name(hw), rrate, rate, max); - return -EINVAL; - } + ret = alpha_pll_check_rate_margin(hw, rrate, rate); + if (ret < 0) + return ret; regmap_write(pll->clkr.regmap, PLL_L_VAL(pll), l); regmap_write(pll->clkr.regmap, PLL_FRAC(pll), a); @@ -1206,12 +1212,10 @@ static int alpha_pll_fabia_prepare(struct clk_hw *hw) rrate = alpha_pll_round_rate(cal_freq, clk_hw_get_rate(parent_hw), &cal_l, &a, alpha_width); - /* - * Due to a limited number of bits for fractional rate programming, the - * rounded up rate could be marginally higher than the requested rate. - */ - if (rrate > (cal_freq + PLL_RATE_MARGIN) || rrate < cal_freq) - return -EINVAL; + + ret = alpha_pll_check_rate_margin(hw, rrate, cal_freq); + if (ret < 0) + return ret; /* Setup PLL for calibration frequency */ regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL(pll), cal_l); @@ -1388,49 +1392,27 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops); void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config) { - if (config->l) - regmap_write(regmap, PLL_L_VAL(pll), config->l); - + clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l); regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL); - - if (config->alpha) - regmap_write(regmap, PLL_ALPHA_VAL(pll), config->alpha); - - if (config->config_ctl_val) - regmap_write(regmap, PLL_CONFIG_CTL(pll), - config->config_ctl_val); - - if (config->config_ctl_hi_val) - regmap_write(regmap, PLL_CONFIG_CTL_U(pll), - config->config_ctl_hi_val); - - if (config->config_ctl_hi1_val) - regmap_write(regmap, PLL_CONFIG_CTL_U1(pll), - config->config_ctl_hi1_val); - - if (config->user_ctl_val) - regmap_write(regmap, PLL_USER_CTL(pll), - config->user_ctl_val); - - if (config->user_ctl_hi_val) - regmap_write(regmap, PLL_USER_CTL_U(pll), - config->user_ctl_hi_val); - - if (config->user_ctl_hi1_val) - regmap_write(regmap, PLL_USER_CTL_U1(pll), - config->user_ctl_hi1_val); - - if (config->test_ctl_val) - regmap_write(regmap, PLL_TEST_CTL(pll), - config->test_ctl_val); - - if (config->test_ctl_hi_val) - regmap_write(regmap, PLL_TEST_CTL_U(pll), - config->test_ctl_hi_val); - - if (config->test_ctl_hi1_val) - regmap_write(regmap, PLL_TEST_CTL_U1(pll), - config->test_ctl_hi1_val); + clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL(pll), + config->config_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U(pll), + config->config_ctl_hi_val); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U1(pll), + config->config_ctl_hi1_val); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL(pll), + config->user_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U(pll), + config->user_ctl_hi_val); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U1(pll), + config->user_ctl_hi1_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL(pll), + config->test_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U(pll), + config->test_ctl_hi_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U1(pll), + config->test_ctl_hi1_val); regmap_update_bits(regmap, PLL_MODE(pll), PLL_UPDATE_BYPASS, PLL_UPDATE_BYPASS); @@ -1490,14 +1472,9 @@ static int alpha_pll_trion_set_rate(struct clk_hw *hw, unsigned long rate, rrate = alpha_pll_round_rate(rate, prate, &l, &a, alpha_width); - /* - * Due to a limited number of bits for fractional rate programming, the - * rounded up rate could be marginally higher than the requested rate. - */ - if (rrate > (rate + PLL_RATE_MARGIN) || rrate < rate) { - pr_err("Call set rate on the PLL with rounded rates!\n"); - return -EINVAL; - } + ret = alpha_pll_check_rate_margin(hw, rrate, rate); + if (ret < 0) + return ret; regmap_write(pll->clkr.regmap, PLL_L_VAL(pll), l); regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL(pll), a); -- GitLab From a2b57943a570a69679abf82e1de01c806db084d1 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Sat, 17 Oct 2020 00:13:33 +0530 Subject: [PATCH 0079/1894] clk: qcom: clk-alpha-pll: Add support for controlling Agera PLLs Add programming sequence support for managing the Agera PLLs. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1602873815-1677-3-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-alpha-pll.c | 62 ++++++++++++++++++++++++++++++++ drivers/clk/qcom/clk-alpha-pll.h | 4 +++ 2 files changed, 66 insertions(+) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index f3b8b54fefd59..21c357c26ec42 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -116,6 +116,16 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [PLL_OFF_OPMODE] = 0x38, [PLL_OFF_ALPHA_VAL] = 0x40, }, + [CLK_ALPHA_PLL_TYPE_AGERA] = { + [PLL_OFF_L_VAL] = 0x04, + [PLL_OFF_ALPHA_VAL] = 0x08, + [PLL_OFF_USER_CTL] = 0x0c, + [PLL_OFF_CONFIG_CTL] = 0x10, + [PLL_OFF_CONFIG_CTL_U] = 0x14, + [PLL_OFF_TEST_CTL] = 0x18, + [PLL_OFF_TEST_CTL_U] = 0x1c, + [PLL_OFF_STATUS] = 0x2c, + }, }; EXPORT_SYMBOL_GPL(clk_alpha_pll_regs); @@ -1538,3 +1548,55 @@ const struct clk_ops clk_alpha_pll_postdiv_lucid_ops = { .set_rate = clk_alpha_pll_postdiv_fabia_set_rate, }; EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_lucid_ops); + +void clk_agera_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, + const struct alpha_pll_config *config) +{ + clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l); + clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL(pll), + config->user_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL(pll), + config->config_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U(pll), + config->config_ctl_hi_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL(pll), + config->test_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U(pll), + config->test_ctl_hi_val); +} +EXPORT_SYMBOL_GPL(clk_agera_pll_configure); + +static int clk_alpha_pll_agera_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long prate) +{ + struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); + u32 l, alpha_width = pll_alpha_width(pll); + int ret; + unsigned long rrate; + u64 a; + + rrate = alpha_pll_round_rate(rate, prate, &l, &a, alpha_width); + ret = alpha_pll_check_rate_margin(hw, rrate, rate); + if (ret < 0) + return ret; + + /* change L_VAL without having to go through the power on sequence */ + regmap_write(pll->clkr.regmap, PLL_L_VAL(pll), l); + regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL(pll), a); + + if (clk_hw_is_enabled(hw)) + return wait_for_pll_enable_lock(pll); + + return 0; +} + +const struct clk_ops clk_alpha_pll_agera_ops = { + .enable = clk_alpha_pll_enable, + .disable = clk_alpha_pll_disable, + .is_enabled = clk_alpha_pll_is_enabled, + .recalc_rate = alpha_pll_fabia_recalc_rate, + .round_rate = clk_alpha_pll_round_rate, + .set_rate = clk_alpha_pll_agera_set_rate, +}; +EXPORT_SYMBOL_GPL(clk_alpha_pll_agera_ops); diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h index d3201b87c0cd4..0ea30d2f3da1c 100644 --- a/drivers/clk/qcom/clk-alpha-pll.h +++ b/drivers/clk/qcom/clk-alpha-pll.h @@ -15,6 +15,7 @@ enum { CLK_ALPHA_PLL_TYPE_FABIA, CLK_ALPHA_PLL_TYPE_TRION, CLK_ALPHA_PLL_TYPE_LUCID = CLK_ALPHA_PLL_TYPE_TRION, + CLK_ALPHA_PLL_TYPE_AGERA, CLK_ALPHA_PLL_TYPE_MAX, }; @@ -141,6 +142,7 @@ extern const struct clk_ops clk_alpha_pll_postdiv_trion_ops; extern const struct clk_ops clk_alpha_pll_lucid_ops; #define clk_alpha_pll_fixed_lucid_ops clk_alpha_pll_fixed_trion_ops extern const struct clk_ops clk_alpha_pll_postdiv_lucid_ops; +extern const struct clk_ops clk_alpha_pll_agera_ops; void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); @@ -148,6 +150,8 @@ void clk_fabia_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); +void clk_agera_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, + const struct alpha_pll_config *config); #define clk_lucid_pll_configure(pll, regmap, config) \ clk_trion_pll_configure(pll, regmap, config) -- GitLab From 57b971907eb07fbc8917f9f7a2df37da6304a296 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Sat, 17 Oct 2020 00:13:34 +0530 Subject: [PATCH 0080/1894] dt-bindings: clock: Add YAML schemas for the QCOM Camera clock bindings. The Camera Subsystem clock provider have a bunch of generic properties that are needed in a device tree. Add a YAML schemas for those. Add clock ids for camera clocks which are required to bring the camera subsystem out of reset. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1602873815-1677-4-git-send-email-tdas@codeaurora.org Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,sc7180-camcc.yaml | 73 +++++++++++ include/dt-bindings/clock/qcom,camcc-sc7180.h | 121 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml create mode 100644 include/dt-bindings/clock/qcom,camcc-sc7180.h diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml new file mode 100644 index 0000000000000..f49027edfc443 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sc7180-camcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Camera Clock & Reset Controller Binding for SC7180 + +maintainers: + - Taniya Das + +description: | + Qualcomm camera clock control module which supports the clocks, resets and + power domains on SC7180. + + See also: + - dt-bindings/clock/qcom,camcc-sc7180.h + +properties: + compatible: + const: qcom,sc7180-camcc + + clocks: + items: + - description: Board XO source + - description: Camera_ahb clock from GCC + - description: Camera XO clock from GCC + + clock-names: + items: + - const: bi_tcxo + - const: iface + - const: xo + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +additionalProperties: false + +examples: + - | + #include + #include + clock-controller@ad00000 { + compatible = "qcom,sc7180-camcc"; + reg = <0x0ad00000 0x10000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_CAMERA_AHB_CLK>, + <&gcc GCC_CAMERA_XO_CLK>; + clock-names = "bi_tcxo", "iface", "xo"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,camcc-sc7180.h b/include/dt-bindings/clock/qcom,camcc-sc7180.h new file mode 100644 index 0000000000000..ef7d3a041b88d --- /dev/null +++ b/include/dt-bindings/clock/qcom,camcc-sc7180.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SC7180_H +#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SC7180_H + +/* CAM_CC clocks */ +#define CAM_CC_PLL2_OUT_EARLY 0 +#define CAM_CC_PLL0 1 +#define CAM_CC_PLL1 2 +#define CAM_CC_PLL2 3 +#define CAM_CC_PLL2_OUT_AUX 4 +#define CAM_CC_PLL3 5 +#define CAM_CC_CAMNOC_AXI_CLK 6 +#define CAM_CC_CCI_0_CLK 7 +#define CAM_CC_CCI_0_CLK_SRC 8 +#define CAM_CC_CCI_1_CLK 9 +#define CAM_CC_CCI_1_CLK_SRC 10 +#define CAM_CC_CORE_AHB_CLK 11 +#define CAM_CC_CPAS_AHB_CLK 12 +#define CAM_CC_CPHY_RX_CLK_SRC 13 +#define CAM_CC_CSI0PHYTIMER_CLK 14 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 15 +#define CAM_CC_CSI1PHYTIMER_CLK 16 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 17 +#define CAM_CC_CSI2PHYTIMER_CLK 18 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 19 +#define CAM_CC_CSI3PHYTIMER_CLK 20 +#define CAM_CC_CSI3PHYTIMER_CLK_SRC 21 +#define CAM_CC_CSIPHY0_CLK 22 +#define CAM_CC_CSIPHY1_CLK 23 +#define CAM_CC_CSIPHY2_CLK 24 +#define CAM_CC_CSIPHY3_CLK 25 +#define CAM_CC_FAST_AHB_CLK_SRC 26 +#define CAM_CC_ICP_APB_CLK 27 +#define CAM_CC_ICP_ATB_CLK 28 +#define CAM_CC_ICP_CLK 29 +#define CAM_CC_ICP_CLK_SRC 30 +#define CAM_CC_ICP_CTI_CLK 31 +#define CAM_CC_ICP_TS_CLK 32 +#define CAM_CC_IFE_0_AXI_CLK 33 +#define CAM_CC_IFE_0_CLK 34 +#define CAM_CC_IFE_0_CLK_SRC 35 +#define CAM_CC_IFE_0_CPHY_RX_CLK 36 +#define CAM_CC_IFE_0_CSID_CLK 37 +#define CAM_CC_IFE_0_CSID_CLK_SRC 38 +#define CAM_CC_IFE_0_DSP_CLK 39 +#define CAM_CC_IFE_1_AXI_CLK 40 +#define CAM_CC_IFE_1_CLK 41 +#define CAM_CC_IFE_1_CLK_SRC 42 +#define CAM_CC_IFE_1_CPHY_RX_CLK 43 +#define CAM_CC_IFE_1_CSID_CLK 44 +#define CAM_CC_IFE_1_CSID_CLK_SRC 45 +#define CAM_CC_IFE_1_DSP_CLK 46 +#define CAM_CC_IFE_LITE_CLK 47 +#define CAM_CC_IFE_LITE_CLK_SRC 48 +#define CAM_CC_IFE_LITE_CPHY_RX_CLK 49 +#define CAM_CC_IFE_LITE_CSID_CLK 50 +#define CAM_CC_IFE_LITE_CSID_CLK_SRC 51 +#define CAM_CC_IPE_0_AHB_CLK 52 +#define CAM_CC_IPE_0_AREG_CLK 53 +#define CAM_CC_IPE_0_AXI_CLK 54 +#define CAM_CC_IPE_0_CLK 55 +#define CAM_CC_IPE_0_CLK_SRC 56 +#define CAM_CC_JPEG_CLK 57 +#define CAM_CC_JPEG_CLK_SRC 58 +#define CAM_CC_LRME_CLK 59 +#define CAM_CC_LRME_CLK_SRC 60 +#define CAM_CC_MCLK0_CLK 61 +#define CAM_CC_MCLK0_CLK_SRC 62 +#define CAM_CC_MCLK1_CLK 63 +#define CAM_CC_MCLK1_CLK_SRC 64 +#define CAM_CC_MCLK2_CLK 65 +#define CAM_CC_MCLK2_CLK_SRC 66 +#define CAM_CC_MCLK3_CLK 67 +#define CAM_CC_MCLK3_CLK_SRC 68 +#define CAM_CC_MCLK4_CLK 69 +#define CAM_CC_MCLK4_CLK_SRC 70 +#define CAM_CC_BPS_AHB_CLK 71 +#define CAM_CC_BPS_AREG_CLK 72 +#define CAM_CC_BPS_AXI_CLK 73 +#define CAM_CC_BPS_CLK 74 +#define CAM_CC_BPS_CLK_SRC 75 +#define CAM_CC_SLOW_AHB_CLK_SRC 76 +#define CAM_CC_SOC_AHB_CLK 77 +#define CAM_CC_SYS_TMR_CLK 78 + +/* CAM_CC power domains */ +#define BPS_GDSC 0 +#define IFE_0_GDSC 1 +#define IFE_1_GDSC 2 +#define IPE_0_GDSC 3 +#define TITAN_TOP_GDSC 4 + +/* CAM_CC resets */ +#define CAM_CC_BPS_BCR 0 +#define CAM_CC_CAMNOC_BCR 1 +#define CAM_CC_CCI_0_BCR 2 +#define CAM_CC_CCI_1_BCR 3 +#define CAM_CC_CPAS_BCR 4 +#define CAM_CC_CSI0PHY_BCR 5 +#define CAM_CC_CSI1PHY_BCR 6 +#define CAM_CC_CSI2PHY_BCR 7 +#define CAM_CC_CSI3PHY_BCR 8 +#define CAM_CC_ICP_BCR 9 +#define CAM_CC_IFE_0_BCR 10 +#define CAM_CC_IFE_1_BCR 11 +#define CAM_CC_IFE_LITE_BCR 12 +#define CAM_CC_IPE_0_BCR 13 +#define CAM_CC_JPEG_BCR 14 +#define CAM_CC_LRME_BCR 15 +#define CAM_CC_MCLK0_BCR 16 +#define CAM_CC_MCLK1_BCR 17 +#define CAM_CC_MCLK2_BCR 18 +#define CAM_CC_MCLK3_BCR 19 +#define CAM_CC_MCLK4_BCR 20 +#define CAM_CC_TITAN_TOP_BCR 21 + +#endif -- GitLab From 15d09e830bbc16880840ac8b01941465602807f4 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Sat, 17 Oct 2020 00:13:35 +0530 Subject: [PATCH 0081/1894] clk: qcom: camcc: Add camera clock controller driver for SC7180 Add support for the camera clock controller found on SC7180 based devices. This would allow camera drivers to probe and control their clocks. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1602873815-1677-5-git-send-email-tdas@codeaurora.org [sboyd@kernel.org: Mark hw array static, add UL to big vco numbers] Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 9 + drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/camcc-sc7180.c | 1736 +++++++++++++++++++++++++++++++ 3 files changed, 1746 insertions(+) create mode 100644 drivers/clk/qcom/camcc-sc7180.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 3a965bd326d5f..886e8610b3214 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -290,6 +290,15 @@ config QCS_GCC_404 Say Y if you want to use multimedia devices or peripheral devices such as UART, SPI, I2C, USB, SD/eMMC, PCIe etc. +config SC_CAMCC_7180 + tristate "SC7180 Camera Clock Controller" + select SC_GCC_7180 + help + Support for the camera clock controller on Qualcomm Technologies, Inc + SC7180 devices. + Say Y if you want to support camera devices and functionality such as + capturing pictures. + config SC_DISPCC_7180 tristate "SC7180 Display Clock Controller" select SC_GCC_7180 diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index 11ae86febe873..ee02fafbae278 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_QCOM_CLK_SMD_RPM) += clk-smd-rpm.o obj-$(CONFIG_QCS_GCC_404) += gcc-qcs404.o obj-$(CONFIG_QCS_Q6SSTOP_404) += q6sstop-qcs404.o obj-$(CONFIG_QCS_TURING_404) += turingcc-qcs404.o +obj-$(CONFIG_SC_CAMCC_7180) += camcc-sc7180.o obj-$(CONFIG_SC_DISPCC_7180) += dispcc-sc7180.o obj-$(CONFIG_SC_GCC_7180) += gcc-sc7180.o obj-$(CONFIG_SC_GPUCC_7180) += gpucc-sc7180.o diff --git a/drivers/clk/qcom/camcc-sc7180.c b/drivers/clk/qcom/camcc-sc7180.c new file mode 100644 index 0000000000000..f51bf5b6decc6 --- /dev/null +++ b/drivers/clk/qcom/camcc-sc7180.c @@ -0,0 +1,1736 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "common.h" +#include "gdsc.h" +#include "reset.h" + +enum { + P_BI_TCXO, + P_CAM_CC_PLL0_OUT_EVEN, + P_CAM_CC_PLL1_OUT_EVEN, + P_CAM_CC_PLL2_OUT_AUX, + P_CAM_CC_PLL2_OUT_EARLY, + P_CAM_CC_PLL3_OUT_MAIN, + P_CORE_BI_PLL_TEST_SE, +}; + +static const struct pll_vco agera_vco[] = { + { 600000000, 3300000000UL, 0 }, +}; + +static const struct pll_vco fabia_vco[] = { + { 249600000, 2000000000UL, 0 }, +}; + +/* 600MHz configuration */ +static const struct alpha_pll_config cam_cc_pll0_config = { + .l = 0x1f, + .alpha = 0x4000, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002067, + .test_ctl_val = 0x40000000, + .user_ctl_hi_val = 0x00004805, + .user_ctl_val = 0x00000001, +}; + +static struct clk_alpha_pll cam_cc_pll0 = { + .offset = 0x0, + .vco_table = fabia_vco, + .num_vco = ARRAY_SIZE(fabia_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_pll0", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fabia_ops, + }, + }, +}; + +/* 860MHz configuration */ +static const struct alpha_pll_config cam_cc_pll1_config = { + .l = 0x2a, + .alpha = 0x1555, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002067, + .test_ctl_val = 0x40000000, + .user_ctl_hi_val = 0x00004805, +}; + +static struct clk_alpha_pll cam_cc_pll1 = { + .offset = 0x1000, + .vco_table = fabia_vco, + .num_vco = ARRAY_SIZE(fabia_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_pll1", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fabia_ops, + }, + }, +}; + +/* 1920MHz configuration */ +static const struct alpha_pll_config cam_cc_pll2_config = { + .l = 0x64, + .config_ctl_val = 0x20000800, + .config_ctl_hi_val = 0x400003D2, + .test_ctl_val = 0x04000400, + .test_ctl_hi_val = 0x00004000, + .user_ctl_val = 0x0000030F, +}; + +static struct clk_alpha_pll cam_cc_pll2 = { + .offset = 0x2000, + .vco_table = agera_vco, + .num_vco = ARRAY_SIZE(agera_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_AGERA], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_pll2", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_agera_ops, + }, + }, +}; + +static struct clk_fixed_factor cam_cc_pll2_out_early = { + .mult = 1, + .div = 2, + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_pll2_out_early", + .parent_names = (const char *[]){ "cam_cc_pll2" }, + .num_parents = 1, + .ops = &clk_fixed_factor_ops, + }, +}; + +static const struct clk_div_table post_div_table_cam_cc_pll2_out_aux[] = { + { 0x3, 4 }, + { } +}; + +static struct clk_alpha_pll_postdiv cam_cc_pll2_out_aux = { + .offset = 0x2000, + .post_div_shift = 8, + .post_div_table = post_div_table_cam_cc_pll2_out_aux, + .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll2_out_aux), + .width = 2, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_AGERA], + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_pll2_out_aux", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_pll2.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_alpha_pll_postdiv_ops, + }, +}; + +/* 1080MHz configuration */ +static const struct alpha_pll_config cam_cc_pll3_config = { + .l = 0x38, + .alpha = 0x4000, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002067, + .test_ctl_val = 0x40000000, + .user_ctl_hi_val = 0x00004805, +}; + +static struct clk_alpha_pll cam_cc_pll3 = { + .offset = 0x3000, + .vco_table = fabia_vco, + .num_vco = ARRAY_SIZE(fabia_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_pll3", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fabia_ops, + }, + }, +}; + +static const struct parent_map cam_cc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_CAM_CC_PLL1_OUT_EVEN, 2 }, + { P_CAM_CC_PLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data cam_cc_parent_data_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &cam_cc_pll1.clkr.hw }, + { .hw = &cam_cc_pll0.clkr.hw }, + { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map cam_cc_parent_map_1[] = { + { P_BI_TCXO, 0 }, + { P_CAM_CC_PLL2_OUT_AUX, 1 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data cam_cc_parent_data_1[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &cam_cc_pll2_out_aux.clkr.hw }, + { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map cam_cc_parent_map_2[] = { + { P_BI_TCXO, 0 }, + { P_CAM_CC_PLL2_OUT_EARLY, 4 }, + { P_CAM_CC_PLL3_OUT_MAIN, 5 }, + { P_CAM_CC_PLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data cam_cc_parent_data_2[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &cam_cc_pll2_out_early.hw }, + { .hw = &cam_cc_pll3.clkr.hw }, + { .hw = &cam_cc_pll0.clkr.hw }, + { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map cam_cc_parent_map_3[] = { + { P_BI_TCXO, 0 }, + { P_CAM_CC_PLL1_OUT_EVEN, 2 }, + { P_CAM_CC_PLL2_OUT_EARLY, 4 }, + { P_CAM_CC_PLL3_OUT_MAIN, 5 }, + { P_CAM_CC_PLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data cam_cc_parent_data_3[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &cam_cc_pll1.clkr.hw }, + { .hw = &cam_cc_pll2_out_early.hw }, + { .hw = &cam_cc_pll3.clkr.hw }, + { .hw = &cam_cc_pll0.clkr.hw }, + { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map cam_cc_parent_map_4[] = { + { P_BI_TCXO, 0 }, + { P_CAM_CC_PLL3_OUT_MAIN, 5 }, + { P_CAM_CC_PLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data cam_cc_parent_data_4[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &cam_cc_pll3.clkr.hw }, + { .hw = &cam_cc_pll0.clkr.hw }, + { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map cam_cc_parent_map_5[] = { + { P_BI_TCXO, 0 }, + { P_CAM_CC_PLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data cam_cc_parent_data_5[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &cam_cc_pll0.clkr.hw }, + { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map cam_cc_parent_map_6[] = { + { P_BI_TCXO, 0 }, + { P_CAM_CC_PLL1_OUT_EVEN, 2 }, + { P_CAM_CC_PLL3_OUT_MAIN, 5 }, + { P_CAM_CC_PLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data cam_cc_parent_data_6[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &cam_cc_pll1.clkr.hw }, + { .hw = &cam_cc_pll3.clkr.hw }, + { .hw = &cam_cc_pll0.clkr.hw }, + { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" }, +}; + +static const struct freq_tbl ftbl_cam_cc_bps_clk_src[] = { + F(200000000, P_CAM_CC_PLL0_OUT_EVEN, 3, 0, 0), + F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0), + F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0), + F(480000000, P_CAM_CC_PLL2_OUT_EARLY, 2, 0, 0), + F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_bps_clk_src = { + .cmd_rcgr = 0x6010, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_2, + .freq_tbl = ftbl_cam_cc_bps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_bps_clk_src", + .parent_data = cam_cc_parent_data_2, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_cci_0_clk_src[] = { + F(37500000, P_CAM_CC_PLL0_OUT_EVEN, 16, 0, 0), + F(50000000, P_CAM_CC_PLL0_OUT_EVEN, 12, 0, 0), + F(100000000, P_CAM_CC_PLL0_OUT_EVEN, 6, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_cci_0_clk_src = { + .cmd_rcgr = 0xb0d8, + .mnd_width = 8, + .hid_width = 5, + .parent_map = cam_cc_parent_map_5, + .freq_tbl = ftbl_cam_cc_cci_0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_cci_0_clk_src", + .parent_data = cam_cc_parent_data_5, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_cci_1_clk_src = { + .cmd_rcgr = 0xb14c, + .mnd_width = 8, + .hid_width = 5, + .parent_map = cam_cc_parent_map_5, + .freq_tbl = ftbl_cam_cc_cci_0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_cci_1_clk_src", + .parent_data = cam_cc_parent_data_5, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_cphy_rx_clk_src[] = { + F(150000000, P_CAM_CC_PLL0_OUT_EVEN, 4, 0, 0), + F(270000000, P_CAM_CC_PLL3_OUT_MAIN, 4, 0, 0), + F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_cphy_rx_clk_src = { + .cmd_rcgr = 0x9064, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_3, + .freq_tbl = ftbl_cam_cc_cphy_rx_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_cphy_rx_clk_src", + .parent_data = cam_cc_parent_data_3, + .num_parents = 6, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_csi0phytimer_clk_src[] = { + F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_csi0phytimer_clk_src = { + .cmd_rcgr = 0x5004, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_0, + .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi0phytimer_clk_src", + .parent_data = cam_cc_parent_data_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_csi1phytimer_clk_src = { + .cmd_rcgr = 0x5028, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_0, + .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi1phytimer_clk_src", + .parent_data = cam_cc_parent_data_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_csi2phytimer_clk_src = { + .cmd_rcgr = 0x504c, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_0, + .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi2phytimer_clk_src", + .parent_data = cam_cc_parent_data_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_csi3phytimer_clk_src = { + .cmd_rcgr = 0x5070, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_0, + .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi3phytimer_clk_src", + .parent_data = cam_cc_parent_data_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_fast_ahb_clk_src[] = { + F(100000000, P_CAM_CC_PLL0_OUT_EVEN, 6, 0, 0), + F(200000000, P_CAM_CC_PLL0_OUT_EVEN, 3, 0, 0), + F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0), + F(404000000, P_CAM_CC_PLL1_OUT_EVEN, 2, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_fast_ahb_clk_src = { + .cmd_rcgr = 0x603c, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_0, + .freq_tbl = ftbl_cam_cc_fast_ahb_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_fast_ahb_clk_src", + .parent_data = cam_cc_parent_data_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_icp_clk_src[] = { + F(240000000, P_CAM_CC_PLL0_OUT_EVEN, 2.5, 0, 0), + F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0), + F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0), + F(480000000, P_CAM_CC_PLL2_OUT_EARLY, 2, 0, 0), + F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_icp_clk_src = { + .cmd_rcgr = 0xb088, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_2, + .freq_tbl = ftbl_cam_cc_icp_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_icp_clk_src", + .parent_data = cam_cc_parent_data_2, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_ife_0_clk_src[] = { + F(240000000, P_CAM_CC_PLL0_OUT_EVEN, 2.5, 0, 0), + F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0), + F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0), + F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_ife_0_clk_src = { + .cmd_rcgr = 0x9010, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_4, + .freq_tbl = ftbl_cam_cc_ife_0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_0_clk_src", + .parent_data = cam_cc_parent_data_4, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_ife_0_csid_clk_src[] = { + F(150000000, P_CAM_CC_PLL0_OUT_EVEN, 4, 0, 0), + F(270000000, P_CAM_CC_PLL3_OUT_MAIN, 4, 0, 0), + F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0), + F(480000000, P_CAM_CC_PLL2_OUT_EARLY, 2, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_ife_0_csid_clk_src = { + .cmd_rcgr = 0x903c, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_3, + .freq_tbl = ftbl_cam_cc_ife_0_csid_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_0_csid_clk_src", + .parent_data = cam_cc_parent_data_3, + .num_parents = 6, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_ife_1_clk_src = { + .cmd_rcgr = 0xa010, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_4, + .freq_tbl = ftbl_cam_cc_ife_0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_1_clk_src", + .parent_data = cam_cc_parent_data_4, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_ife_1_csid_clk_src = { + .cmd_rcgr = 0xa034, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_3, + .freq_tbl = ftbl_cam_cc_ife_0_csid_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_1_csid_clk_src", + .parent_data = cam_cc_parent_data_3, + .num_parents = 6, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_ife_lite_clk_src = { + .cmd_rcgr = 0xb004, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_4, + .freq_tbl = ftbl_cam_cc_ife_0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_lite_clk_src", + .parent_data = cam_cc_parent_data_4, + .num_parents = 4, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_ife_lite_csid_clk_src = { + .cmd_rcgr = 0xb024, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_3, + .freq_tbl = ftbl_cam_cc_ife_0_csid_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_lite_csid_clk_src", + .parent_data = cam_cc_parent_data_3, + .num_parents = 6, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_ipe_0_clk_src[] = { + F(240000000, P_CAM_CC_PLL0_OUT_EVEN, 2.5, 0, 0), + F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0), + F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0), + F(540000000, P_CAM_CC_PLL3_OUT_MAIN, 2, 0, 0), + F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_ipe_0_clk_src = { + .cmd_rcgr = 0x7010, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_2, + .freq_tbl = ftbl_cam_cc_ipe_0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_ipe_0_clk_src", + .parent_data = cam_cc_parent_data_2, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_jpeg_clk_src[] = { + F(66666667, P_CAM_CC_PLL0_OUT_EVEN, 9, 0, 0), + F(133333333, P_CAM_CC_PLL0_OUT_EVEN, 4.5, 0, 0), + F(216000000, P_CAM_CC_PLL3_OUT_MAIN, 5, 0, 0), + F(320000000, P_CAM_CC_PLL2_OUT_EARLY, 3, 0, 0), + F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_jpeg_clk_src = { + .cmd_rcgr = 0xb04c, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_2, + .freq_tbl = ftbl_cam_cc_jpeg_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_jpeg_clk_src", + .parent_data = cam_cc_parent_data_2, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_lrme_clk_src[] = { + F(200000000, P_CAM_CC_PLL0_OUT_EVEN, 3, 0, 0), + F(216000000, P_CAM_CC_PLL3_OUT_MAIN, 5, 0, 0), + F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0), + F(404000000, P_CAM_CC_PLL1_OUT_EVEN, 2, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_lrme_clk_src = { + .cmd_rcgr = 0xb0f8, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_6, + .freq_tbl = ftbl_cam_cc_lrme_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_lrme_clk_src", + .parent_data = cam_cc_parent_data_6, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_mclk0_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(24000000, P_CAM_CC_PLL2_OUT_AUX, 10, 1, 2), + F(64000000, P_CAM_CC_PLL2_OUT_AUX, 7.5, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_mclk0_clk_src = { + .cmd_rcgr = 0x4004, + .mnd_width = 8, + .hid_width = 5, + .parent_map = cam_cc_parent_map_1, + .freq_tbl = ftbl_cam_cc_mclk0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk0_clk_src", + .parent_data = cam_cc_parent_data_1, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_mclk1_clk_src = { + .cmd_rcgr = 0x4024, + .mnd_width = 8, + .hid_width = 5, + .parent_map = cam_cc_parent_map_1, + .freq_tbl = ftbl_cam_cc_mclk0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk1_clk_src", + .parent_data = cam_cc_parent_data_1, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_mclk2_clk_src = { + .cmd_rcgr = 0x4044, + .mnd_width = 8, + .hid_width = 5, + .parent_map = cam_cc_parent_map_1, + .freq_tbl = ftbl_cam_cc_mclk0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk2_clk_src", + .parent_data = cam_cc_parent_data_1, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_mclk3_clk_src = { + .cmd_rcgr = 0x4064, + .mnd_width = 8, + .hid_width = 5, + .parent_map = cam_cc_parent_map_1, + .freq_tbl = ftbl_cam_cc_mclk0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk3_clk_src", + .parent_data = cam_cc_parent_data_1, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 cam_cc_mclk4_clk_src = { + .cmd_rcgr = 0x4084, + .mnd_width = 8, + .hid_width = 5, + .parent_map = cam_cc_parent_map_1, + .freq_tbl = ftbl_cam_cc_mclk0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk4_clk_src", + .parent_data = cam_cc_parent_data_1, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_cam_cc_slow_ahb_clk_src[] = { + F(80000000, P_CAM_CC_PLL0_OUT_EVEN, 7.5, 0, 0), + { } +}; + +static struct clk_rcg2 cam_cc_slow_ahb_clk_src = { + .cmd_rcgr = 0x6058, + .mnd_width = 0, + .hid_width = 5, + .parent_map = cam_cc_parent_map_0, + .freq_tbl = ftbl_cam_cc_slow_ahb_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "cam_cc_slow_ahb_clk_src", + .parent_data = cam_cc_parent_data_0, + .num_parents = 4, + .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch cam_cc_bps_ahb_clk = { + .halt_reg = 0x6070, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x6070, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_bps_ahb_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_slow_ahb_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_bps_areg_clk = { + .halt_reg = 0x6054, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x6054, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_bps_areg_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_fast_ahb_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_bps_axi_clk = { + .halt_reg = 0x6038, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x6038, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_bps_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_bps_clk = { + .halt_reg = 0x6028, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x6028, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_bps_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_bps_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_camnoc_axi_clk = { + .halt_reg = 0xb124, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb124, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_camnoc_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_cci_0_clk = { + .halt_reg = 0xb0f0, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb0f0, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_cci_0_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cci_0_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_cci_1_clk = { + .halt_reg = 0xb164, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb164, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_cci_1_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cci_1_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_core_ahb_clk = { + .halt_reg = 0xb144, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0xb144, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_core_ahb_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_slow_ahb_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_cpas_ahb_clk = { + .halt_reg = 0xb11c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb11c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_cpas_ahb_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_slow_ahb_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csi0phytimer_clk = { + .halt_reg = 0x501c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x501c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi0phytimer_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_csi0phytimer_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csi1phytimer_clk = { + .halt_reg = 0x5040, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x5040, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi1phytimer_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_csi1phytimer_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csi2phytimer_clk = { + .halt_reg = 0x5064, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x5064, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi2phytimer_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_csi2phytimer_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csi3phytimer_clk = { + .halt_reg = 0x5088, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x5088, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csi3phytimer_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_csi3phytimer_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csiphy0_clk = { + .halt_reg = 0x5020, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x5020, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csiphy0_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cphy_rx_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csiphy1_clk = { + .halt_reg = 0x5044, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x5044, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csiphy1_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cphy_rx_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csiphy2_clk = { + .halt_reg = 0x5068, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x5068, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csiphy2_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cphy_rx_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_csiphy3_clk = { + .halt_reg = 0x508c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x508c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_csiphy3_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cphy_rx_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_icp_clk = { + .halt_reg = 0xb0a0, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb0a0, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_icp_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_icp_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_0_axi_clk = { + .halt_reg = 0x9080, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x9080, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_0_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_0_clk = { + .halt_reg = 0x9028, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x9028, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_0_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_0_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_0_cphy_rx_clk = { + .halt_reg = 0x907c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x907c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_0_cphy_rx_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cphy_rx_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_0_csid_clk = { + .halt_reg = 0x9054, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x9054, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_0_csid_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_0_csid_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_0_dsp_clk = { + .halt_reg = 0x9038, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x9038, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_0_dsp_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_0_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_1_axi_clk = { + .halt_reg = 0xa058, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xa058, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_1_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_1_clk = { + .halt_reg = 0xa028, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xa028, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_1_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_1_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_1_cphy_rx_clk = { + .halt_reg = 0xa054, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xa054, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_1_cphy_rx_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cphy_rx_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_1_csid_clk = { + .halt_reg = 0xa04c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xa04c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_1_csid_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_1_csid_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_1_dsp_clk = { + .halt_reg = 0xa030, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xa030, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_1_dsp_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_1_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_lite_clk = { + .halt_reg = 0xb01c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb01c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_lite_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_lite_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_lite_cphy_rx_clk = { + .halt_reg = 0xb044, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb044, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_lite_cphy_rx_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_cphy_rx_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ife_lite_csid_clk = { + .halt_reg = 0xb03c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb03c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ife_lite_csid_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ife_lite_csid_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ipe_0_ahb_clk = { + .halt_reg = 0x7040, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x7040, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ipe_0_ahb_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_slow_ahb_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ipe_0_areg_clk = { + .halt_reg = 0x703c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x703c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ipe_0_areg_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_fast_ahb_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ipe_0_axi_clk = { + .halt_reg = 0x7038, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x7038, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ipe_0_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_ipe_0_clk = { + .halt_reg = 0x7028, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x7028, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_ipe_0_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_ipe_0_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_jpeg_clk = { + .halt_reg = 0xb064, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb064, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_jpeg_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_jpeg_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_lrme_clk = { + .halt_reg = 0xb110, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb110, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_lrme_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_lrme_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_mclk0_clk = { + .halt_reg = 0x401c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x401c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk0_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_mclk0_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_mclk1_clk = { + .halt_reg = 0x403c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x403c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk1_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_mclk1_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_mclk2_clk = { + .halt_reg = 0x405c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x405c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk2_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_mclk2_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_mclk3_clk = { + .halt_reg = 0x407c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x407c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk3_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_mclk3_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_mclk4_clk = { + .halt_reg = 0x409c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x409c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_mclk4_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &cam_cc_mclk4_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_soc_ahb_clk = { + .halt_reg = 0xb140, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb140, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_soc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch cam_cc_sys_tmr_clk = { + .halt_reg = 0xb0a8, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb0a8, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "cam_cc_sys_tmr_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct gdsc bps_gdsc = { + .gdscr = 0x6004, + .pd = { + .name = "bps_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = HW_CTRL, +}; + +static struct gdsc ife_0_gdsc = { + .gdscr = 0x9004, + .pd = { + .name = "ife_0_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + +static struct gdsc ife_1_gdsc = { + .gdscr = 0xa004, + .pd = { + .name = "ife_1_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + +static struct gdsc ipe_0_gdsc = { + .gdscr = 0x7004, + .pd = { + .name = "ipe_0_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = HW_CTRL, +}; + +static struct gdsc titan_top_gdsc = { + .gdscr = 0xb134, + .pd = { + .name = "titan_top_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + +static struct clk_hw *cam_cc_sc7180_hws[] = { + [CAM_CC_PLL2_OUT_EARLY] = &cam_cc_pll2_out_early.hw, +}; + +static struct clk_regmap *cam_cc_sc7180_clocks[] = { + [CAM_CC_BPS_AHB_CLK] = &cam_cc_bps_ahb_clk.clkr, + [CAM_CC_BPS_AREG_CLK] = &cam_cc_bps_areg_clk.clkr, + [CAM_CC_BPS_AXI_CLK] = &cam_cc_bps_axi_clk.clkr, + [CAM_CC_BPS_CLK] = &cam_cc_bps_clk.clkr, + [CAM_CC_BPS_CLK_SRC] = &cam_cc_bps_clk_src.clkr, + [CAM_CC_CAMNOC_AXI_CLK] = &cam_cc_camnoc_axi_clk.clkr, + [CAM_CC_CCI_0_CLK] = &cam_cc_cci_0_clk.clkr, + [CAM_CC_CCI_0_CLK_SRC] = &cam_cc_cci_0_clk_src.clkr, + [CAM_CC_CCI_1_CLK] = &cam_cc_cci_1_clk.clkr, + [CAM_CC_CCI_1_CLK_SRC] = &cam_cc_cci_1_clk_src.clkr, + [CAM_CC_CORE_AHB_CLK] = &cam_cc_core_ahb_clk.clkr, + [CAM_CC_CPAS_AHB_CLK] = &cam_cc_cpas_ahb_clk.clkr, + [CAM_CC_CPHY_RX_CLK_SRC] = &cam_cc_cphy_rx_clk_src.clkr, + [CAM_CC_CSI0PHYTIMER_CLK] = &cam_cc_csi0phytimer_clk.clkr, + [CAM_CC_CSI0PHYTIMER_CLK_SRC] = &cam_cc_csi0phytimer_clk_src.clkr, + [CAM_CC_CSI1PHYTIMER_CLK] = &cam_cc_csi1phytimer_clk.clkr, + [CAM_CC_CSI1PHYTIMER_CLK_SRC] = &cam_cc_csi1phytimer_clk_src.clkr, + [CAM_CC_CSI2PHYTIMER_CLK] = &cam_cc_csi2phytimer_clk.clkr, + [CAM_CC_CSI2PHYTIMER_CLK_SRC] = &cam_cc_csi2phytimer_clk_src.clkr, + [CAM_CC_CSI3PHYTIMER_CLK] = &cam_cc_csi3phytimer_clk.clkr, + [CAM_CC_CSI3PHYTIMER_CLK_SRC] = &cam_cc_csi3phytimer_clk_src.clkr, + [CAM_CC_CSIPHY0_CLK] = &cam_cc_csiphy0_clk.clkr, + [CAM_CC_CSIPHY1_CLK] = &cam_cc_csiphy1_clk.clkr, + [CAM_CC_CSIPHY2_CLK] = &cam_cc_csiphy2_clk.clkr, + [CAM_CC_CSIPHY3_CLK] = &cam_cc_csiphy3_clk.clkr, + [CAM_CC_FAST_AHB_CLK_SRC] = &cam_cc_fast_ahb_clk_src.clkr, + [CAM_CC_ICP_CLK] = &cam_cc_icp_clk.clkr, + [CAM_CC_ICP_CLK_SRC] = &cam_cc_icp_clk_src.clkr, + [CAM_CC_IFE_0_AXI_CLK] = &cam_cc_ife_0_axi_clk.clkr, + [CAM_CC_IFE_0_CLK] = &cam_cc_ife_0_clk.clkr, + [CAM_CC_IFE_0_CLK_SRC] = &cam_cc_ife_0_clk_src.clkr, + [CAM_CC_IFE_0_CPHY_RX_CLK] = &cam_cc_ife_0_cphy_rx_clk.clkr, + [CAM_CC_IFE_0_CSID_CLK] = &cam_cc_ife_0_csid_clk.clkr, + [CAM_CC_IFE_0_CSID_CLK_SRC] = &cam_cc_ife_0_csid_clk_src.clkr, + [CAM_CC_IFE_0_DSP_CLK] = &cam_cc_ife_0_dsp_clk.clkr, + [CAM_CC_IFE_1_AXI_CLK] = &cam_cc_ife_1_axi_clk.clkr, + [CAM_CC_IFE_1_CLK] = &cam_cc_ife_1_clk.clkr, + [CAM_CC_IFE_1_CLK_SRC] = &cam_cc_ife_1_clk_src.clkr, + [CAM_CC_IFE_1_CPHY_RX_CLK] = &cam_cc_ife_1_cphy_rx_clk.clkr, + [CAM_CC_IFE_1_CSID_CLK] = &cam_cc_ife_1_csid_clk.clkr, + [CAM_CC_IFE_1_CSID_CLK_SRC] = &cam_cc_ife_1_csid_clk_src.clkr, + [CAM_CC_IFE_1_DSP_CLK] = &cam_cc_ife_1_dsp_clk.clkr, + [CAM_CC_IFE_LITE_CLK] = &cam_cc_ife_lite_clk.clkr, + [CAM_CC_IFE_LITE_CLK_SRC] = &cam_cc_ife_lite_clk_src.clkr, + [CAM_CC_IFE_LITE_CPHY_RX_CLK] = &cam_cc_ife_lite_cphy_rx_clk.clkr, + [CAM_CC_IFE_LITE_CSID_CLK] = &cam_cc_ife_lite_csid_clk.clkr, + [CAM_CC_IFE_LITE_CSID_CLK_SRC] = &cam_cc_ife_lite_csid_clk_src.clkr, + [CAM_CC_IPE_0_AHB_CLK] = &cam_cc_ipe_0_ahb_clk.clkr, + [CAM_CC_IPE_0_AREG_CLK] = &cam_cc_ipe_0_areg_clk.clkr, + [CAM_CC_IPE_0_AXI_CLK] = &cam_cc_ipe_0_axi_clk.clkr, + [CAM_CC_IPE_0_CLK] = &cam_cc_ipe_0_clk.clkr, + [CAM_CC_IPE_0_CLK_SRC] = &cam_cc_ipe_0_clk_src.clkr, + [CAM_CC_JPEG_CLK] = &cam_cc_jpeg_clk.clkr, + [CAM_CC_JPEG_CLK_SRC] = &cam_cc_jpeg_clk_src.clkr, + [CAM_CC_LRME_CLK] = &cam_cc_lrme_clk.clkr, + [CAM_CC_LRME_CLK_SRC] = &cam_cc_lrme_clk_src.clkr, + [CAM_CC_MCLK0_CLK] = &cam_cc_mclk0_clk.clkr, + [CAM_CC_MCLK0_CLK_SRC] = &cam_cc_mclk0_clk_src.clkr, + [CAM_CC_MCLK1_CLK] = &cam_cc_mclk1_clk.clkr, + [CAM_CC_MCLK1_CLK_SRC] = &cam_cc_mclk1_clk_src.clkr, + [CAM_CC_MCLK2_CLK] = &cam_cc_mclk2_clk.clkr, + [CAM_CC_MCLK2_CLK_SRC] = &cam_cc_mclk2_clk_src.clkr, + [CAM_CC_MCLK3_CLK] = &cam_cc_mclk3_clk.clkr, + [CAM_CC_MCLK3_CLK_SRC] = &cam_cc_mclk3_clk_src.clkr, + [CAM_CC_MCLK4_CLK] = &cam_cc_mclk4_clk.clkr, + [CAM_CC_MCLK4_CLK_SRC] = &cam_cc_mclk4_clk_src.clkr, + [CAM_CC_PLL0] = &cam_cc_pll0.clkr, + [CAM_CC_PLL1] = &cam_cc_pll1.clkr, + [CAM_CC_PLL2] = &cam_cc_pll2.clkr, + [CAM_CC_PLL2_OUT_AUX] = &cam_cc_pll2_out_aux.clkr, + [CAM_CC_PLL3] = &cam_cc_pll3.clkr, + [CAM_CC_SLOW_AHB_CLK_SRC] = &cam_cc_slow_ahb_clk_src.clkr, + [CAM_CC_SOC_AHB_CLK] = &cam_cc_soc_ahb_clk.clkr, + [CAM_CC_SYS_TMR_CLK] = &cam_cc_sys_tmr_clk.clkr, +}; +static struct gdsc *cam_cc_sc7180_gdscs[] = { + [BPS_GDSC] = &bps_gdsc, + [IFE_0_GDSC] = &ife_0_gdsc, + [IFE_1_GDSC] = &ife_1_gdsc, + [IPE_0_GDSC] = &ipe_0_gdsc, + [TITAN_TOP_GDSC] = &titan_top_gdsc, +}; + +static const struct regmap_config cam_cc_sc7180_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0xd028, + .fast_io = true, +}; + +static const struct qcom_cc_desc cam_cc_sc7180_desc = { + .config = &cam_cc_sc7180_regmap_config, + .clk_hws = cam_cc_sc7180_hws, + .num_clk_hws = ARRAY_SIZE(cam_cc_sc7180_hws), + .clks = cam_cc_sc7180_clocks, + .num_clks = ARRAY_SIZE(cam_cc_sc7180_clocks), + .gdscs = cam_cc_sc7180_gdscs, + .num_gdscs = ARRAY_SIZE(cam_cc_sc7180_gdscs), +}; + +static const struct of_device_id cam_cc_sc7180_match_table[] = { + { .compatible = "qcom,sc7180-camcc" }, + { } +}; +MODULE_DEVICE_TABLE(of, cam_cc_sc7180_match_table); + +static int cam_cc_sc7180_probe(struct platform_device *pdev) +{ + struct regmap *regmap; + int ret; + + pm_runtime_enable(&pdev->dev); + ret = pm_clk_create(&pdev->dev); + if (ret < 0) + return ret; + + ret = pm_clk_add(&pdev->dev, "xo"); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to acquire XO clock\n"); + goto disable_pm_runtime; + } + + ret = pm_clk_add(&pdev->dev, "iface"); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to acquire iface clock\n"); + goto disable_pm_runtime; + } + + ret = pm_clk_runtime_resume(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "pm runtime resume failed\n"); + goto destroy_pm_clk; + } + + regmap = qcom_cc_map(pdev, &cam_cc_sc7180_desc); + if (IS_ERR(regmap)) { + ret = PTR_ERR(regmap); + pm_clk_runtime_suspend(&pdev->dev); + goto destroy_pm_clk; + } + + clk_fabia_pll_configure(&cam_cc_pll0, regmap, &cam_cc_pll0_config); + clk_fabia_pll_configure(&cam_cc_pll1, regmap, &cam_cc_pll1_config); + clk_agera_pll_configure(&cam_cc_pll2, regmap, &cam_cc_pll2_config); + clk_fabia_pll_configure(&cam_cc_pll3, regmap, &cam_cc_pll3_config); + + ret = qcom_cc_really_probe(pdev, &cam_cc_sc7180_desc, regmap); + + pm_clk_runtime_suspend(&pdev->dev); + + if (ret < 0) { + dev_err(&pdev->dev, "Failed to register CAM CC clocks\n"); + goto destroy_pm_clk; + } + + return 0; + +destroy_pm_clk: + pm_clk_destroy(&pdev->dev); + +disable_pm_runtime: + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static const struct dev_pm_ops cam_cc_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + +static struct platform_driver cam_cc_sc7180_driver = { + .probe = cam_cc_sc7180_probe, + .driver = { + .name = "cam_cc-sc7180", + .of_match_table = cam_cc_sc7180_match_table, + .pm = &cam_cc_pm_ops, + }, +}; + +static int __init cam_cc_sc7180_init(void) +{ + return platform_driver_register(&cam_cc_sc7180_driver); +} +subsys_initcall(cam_cc_sc7180_init); + +static void __exit cam_cc_sc7180_exit(void) +{ + platform_driver_unregister(&cam_cc_sc7180_driver); +} +module_exit(cam_cc_sc7180_exit); + +MODULE_DESCRIPTION("QTI CAM_CC SC7180 Driver"); +MODULE_LICENSE("GPL v2"); -- GitLab From 7635622b77b53985d816b7f7c1a04e718c9db814 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 19 Oct 2020 15:49:34 -0700 Subject: [PATCH 0082/1894] clk: qcom: lpasscc-sc7810: Use devm in probe Let's convert the lpass clock control driver to use devm. This is a few more lines of code, but it will be useful in a later patch which disentangles the two devices handled by this driver. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20201019154857.v5.1.I4567b5e7e17bbb15ef063d447cb83fd43746cb18@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/lpasscorecc-sc7180.c | 38 +++++++++++++++------------ 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c index 228d08f5d26fa..2d15e33ec8376 100644 --- a/drivers/clk/qcom/lpasscorecc-sc7180.c +++ b/drivers/clk/qcom/lpasscorecc-sc7180.c @@ -356,6 +356,16 @@ static const struct qcom_cc_desc lpass_audio_hm_sc7180_desc = { .num_gdscs = ARRAY_SIZE(lpass_audio_hm_sc7180_gdscs), }; +static void lpass_pm_runtime_disable(void *data) +{ + pm_runtime_disable(data); +} + +static void lpass_pm_clk_destroy(void *data) +{ + pm_clk_destroy(data); +} + static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) { const struct qcom_cc_desc *desc; @@ -418,34 +428,28 @@ static int lpass_core_sc7180_probe(struct platform_device *pdev) int ret; pm_runtime_enable(&pdev->dev); + ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_runtime_disable, &pdev->dev); + if (ret) + return ret; + ret = pm_clk_create(&pdev->dev); if (ret) - goto disable_pm_runtime; + return ret; + ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_clk_destroy, &pdev->dev); + if (ret) + return ret; ret = pm_clk_add(&pdev->dev, "iface"); if (ret < 0) { dev_err(&pdev->dev, "failed to acquire iface clock\n"); - goto destroy_pm_clk; + return ret; } - ret = -EINVAL; clk_probe = of_device_get_match_data(&pdev->dev); if (!clk_probe) - goto destroy_pm_clk; - - ret = clk_probe(pdev); - if (ret) - goto destroy_pm_clk; - - return 0; - -destroy_pm_clk: - pm_clk_destroy(&pdev->dev); - -disable_pm_runtime: - pm_runtime_disable(&pdev->dev); + return -EINVAL; - return ret; + return clk_probe(pdev); } static const struct dev_pm_ops lpass_core_cc_pm_ops = { -- GitLab From 4ee9fe3e292bafc915872fce5eacd2c185d7711f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 19 Oct 2020 15:49:35 -0700 Subject: [PATCH 0083/1894] clk: qcom: lpass-sc7180: Disentangle the two clock devices The sc7180 lpass clock driver manages two different devices. These two devices were tangled together, using one probe and a lookup to figure out the real probe. I think it's cleaner to really separate the probe for these two devices since they're really different things, just both managed by the same driver. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20201019154857.v5.2.I75c409497d4dea9daefa53ec5f93824081c4ecbe@changeid Reviewed-by: Taniya Das Signed-off-by: Stephen Boyd --- drivers/clk/qcom/lpasscorecc-sc7180.c | 103 ++++++++++++++++---------- 1 file changed, 64 insertions(+), 39 deletions(-) diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c index 2d15e33ec8376..1a3925badd7c4 100644 --- a/drivers/clk/qcom/lpasscorecc-sc7180.c +++ b/drivers/clk/qcom/lpasscorecc-sc7180.c @@ -366,12 +366,39 @@ static void lpass_pm_clk_destroy(void *data) pm_clk_destroy(data); } +static int lpass_create_pm_clks(struct platform_device *pdev) +{ + int ret; + + pm_runtime_enable(&pdev->dev); + ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_runtime_disable, &pdev->dev); + if (ret) + return ret; + + ret = pm_clk_create(&pdev->dev); + if (ret) + return ret; + ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_clk_destroy, &pdev->dev); + if (ret) + return ret; + + ret = pm_clk_add(&pdev->dev, "iface"); + if (ret < 0) + dev_err(&pdev->dev, "failed to acquire iface clock\n"); + + return ret; +} + static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) { const struct qcom_cc_desc *desc; struct regmap *regmap; int ret; + ret = lpass_create_pm_clks(pdev); + if (ret) + return ret; + lpass_core_cc_sc7180_regmap_config.name = "lpass_audio_cc"; desc = &lpass_audio_hm_sc7180_desc; ret = qcom_cc_probe_by_index(pdev, 1, desc); @@ -402,6 +429,11 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) static int lpass_hm_core_probe(struct platform_device *pdev) { const struct qcom_cc_desc *desc; + int ret; + + ret = lpass_create_pm_clks(pdev); + if (ret) + return ret; lpass_core_cc_sc7180_regmap_config.name = "lpass_hm_core"; desc = &lpass_core_hm_sc7180_desc; @@ -409,55 +441,28 @@ static int lpass_hm_core_probe(struct platform_device *pdev) return qcom_cc_probe_by_index(pdev, 0, desc); } -static const struct of_device_id lpass_core_cc_sc7180_match_table[] = { +static const struct of_device_id lpass_hm_sc7180_match_table[] = { { .compatible = "qcom,sc7180-lpasshm", - .data = lpass_hm_core_probe, }, + { } +}; +MODULE_DEVICE_TABLE(of, lpass_hm_sc7180_match_table); + +static const struct of_device_id lpass_core_cc_sc7180_match_table[] = { { .compatible = "qcom,sc7180-lpasscorecc", - .data = lpass_core_cc_sc7180_probe, }, { } }; MODULE_DEVICE_TABLE(of, lpass_core_cc_sc7180_match_table); -static int lpass_core_sc7180_probe(struct platform_device *pdev) -{ - int (*clk_probe)(struct platform_device *p); - int ret; - - pm_runtime_enable(&pdev->dev); - ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_runtime_disable, &pdev->dev); - if (ret) - return ret; - - ret = pm_clk_create(&pdev->dev); - if (ret) - return ret; - ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_clk_destroy, &pdev->dev); - if (ret) - return ret; - - ret = pm_clk_add(&pdev->dev, "iface"); - if (ret < 0) { - dev_err(&pdev->dev, "failed to acquire iface clock\n"); - return ret; - } - - clk_probe = of_device_get_match_data(&pdev->dev); - if (!clk_probe) - return -EINVAL; - - return clk_probe(pdev); -} - static const struct dev_pm_ops lpass_core_cc_pm_ops = { SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) }; static struct platform_driver lpass_core_cc_sc7180_driver = { - .probe = lpass_core_sc7180_probe, + .probe = lpass_core_cc_sc7180_probe, .driver = { .name = "lpass_core_cc-sc7180", .of_match_table = lpass_core_cc_sc7180_match_table, @@ -465,17 +470,37 @@ static struct platform_driver lpass_core_cc_sc7180_driver = { }, }; -static int __init lpass_core_cc_sc7180_init(void) +static const struct dev_pm_ops lpass_hm_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + +static struct platform_driver lpass_hm_sc7180_driver = { + .probe = lpass_hm_core_probe, + .driver = { + .name = "lpass_hm-sc7180", + .of_match_table = lpass_hm_sc7180_match_table, + .pm = &lpass_hm_pm_ops, + }, +}; + +static int __init lpass_sc7180_init(void) { - return platform_driver_register(&lpass_core_cc_sc7180_driver); + int ret; + + ret = platform_driver_register(&lpass_core_cc_sc7180_driver); + if (ret) + return ret; + + return platform_driver_register(&lpass_hm_sc7180_driver); } -subsys_initcall(lpass_core_cc_sc7180_init); +subsys_initcall(lpass_sc7180_init); -static void __exit lpass_core_cc_sc7180_exit(void) +static void __exit lpass_sc7180_exit(void) { + platform_driver_unregister(&lpass_hm_sc7180_driver); platform_driver_unregister(&lpass_core_cc_sc7180_driver); } -module_exit(lpass_core_cc_sc7180_exit); +module_exit(lpass_sc7180_exit); MODULE_DESCRIPTION("QTI LPASS_CORE_CC SC7180 Driver"); MODULE_LICENSE("GPL v2"); -- GitLab From a6dee2fe778b9f79f75bfc203945ace79172623e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 26 Oct 2020 12:02:18 +0000 Subject: [PATCH 0084/1894] dt-bindings: clock: Add support for LPASS Audio Clock Controller Audio Clock controller is a block inside LPASS which controls 2 Glitch free muxes to LPASS codec Macros. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20201026120221.18984-2-srinivas.kandagatla@linaro.org Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,audiocc-sm8250.yaml | 58 +++++++++++++++++++ .../clock/qcom,sm8250-lpass-audiocc.h | 13 +++++ 2 files changed, 71 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml create mode 100644 include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h diff --git a/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml new file mode 100644 index 0000000000000..915d76206ad07 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,audiocc-sm8250.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Clock bindings for LPASS Audio Clock Controller on SM8250 SoCs + +maintainers: + - Srinivas Kandagatla + +description: | + The clock consumer should specify the desired clock by having the clock + ID in its "clocks" phandle cell. + See include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h for the full list + of Audio Clock controller clock IDs. + +properties: + compatible: + const: qcom,sm8250-lpass-audiocc + + reg: + maxItems: 1 + + '#clock-cells': + const: 1 + + clocks: + items: + - description: LPASS Core voting clock + - description: Glitch Free Mux register clock + + clock-names: + items: + - const: core + - const: bus + +required: + - compatible + - reg + - '#clock-cells' + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include + #include + clock-controller@3300000 { + #clock-cells = <1>; + compatible = "qcom,sm8250-lpass-audiocc"; + reg = <0x03300000 0x30000>; + clocks = <&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>, + <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>; + clock-names = "core", "bus"; + }; diff --git a/include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h b/include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h new file mode 100644 index 0000000000000..a1aa6cb5d8400 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _DT_BINDINGS_CLK_LPASS_AUDIOCC_SM8250_H +#define _DT_BINDINGS_CLK_LPASS_AUDIOCC_SM8250_H + +/* From AudioCC */ +#define LPASS_CDC_WSA_NPL 0 +#define LPASS_CDC_WSA_MCLK 1 +#define LPASS_CDC_RX_MCLK 2 +#define LPASS_CDC_RX_NPL 3 +#define LPASS_CDC_RX_MCLK_MCLK2 4 + +#endif /* _DT_BINDINGS_CLK_LPASS_AUDIOCC_SM8250_H */ -- GitLab From 7dbe5a7a3f990d642a3166b5d161db429d9f7271 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 26 Oct 2020 12:02:19 +0000 Subject: [PATCH 0085/1894] dt-bindings: clock: Add support for LPASS Always ON Controller Always ON Clock controller is a block inside LPASS which controls 1 Glitch free muxes to LPASS codec Macros. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20201026120221.18984-3-srinivas.kandagatla@linaro.org Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,aoncc-sm8250.yaml | 58 +++++++++++++++++++ .../clock/qcom,sm8250-lpass-aoncc.h | 11 ++++ 2 files changed, 69 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml create mode 100644 include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h diff --git a/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml new file mode 100644 index 0000000000000..c40a74b5d6725 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,aoncc-sm8250.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Clock bindings for LPASS Always ON Clock Controller on SM8250 SoCs + +maintainers: + - Srinivas Kandagatla + +description: | + The clock consumer should specify the desired clock by having the clock + ID in its "clocks" phandle cell. + See include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h for the full list + of Audio Clock controller clock IDs. + +properties: + compatible: + const: qcom,sm8250-lpass-aon + + reg: + maxItems: 1 + + '#clock-cells': + const: 1 + + clocks: + items: + - description: LPASS Core voting clock + - description: Glitch Free Mux register clock + + clock-names: + items: + - const: core + - const: bus + +required: + - compatible + - reg + - '#clock-cells' + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include + #include + clock-controller@3800000 { + #clock-cells = <1>; + compatible = "qcom,sm8250-lpass-aon"; + reg = <0x03380000 0x40000>; + clocks = <&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>, + <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>; + clock-names = "core", "bus"; + }; diff --git a/include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h b/include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h new file mode 100644 index 0000000000000..f5a1cfac86129 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _DT_BINDINGS_CLK_LPASS_AONCC_SM8250_H +#define _DT_BINDINGS_CLK_LPASS_AONCC_SM8250_H + +/* from AOCC */ +#define LPASS_CDC_VA_MCLK 0 +#define LPASS_CDC_TX_NPL 1 +#define LPASS_CDC_TX_MCLK 2 + +#endif /* _DT_BINDINGS_CLK_LPASS_AONCC_SM8250_H */ -- GitLab From a2d8f507803ee858c718b2a8d54c00ac9c5c5f09 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 26 Oct 2020 12:02:20 +0000 Subject: [PATCH 0086/1894] clk: qcom: Add support to LPASS AUDIO_CC Glitch Free Mux clocks GFM Muxes in AUDIO_CC control clocks to LPASS WSA and RX Codec Macros. This patch adds support to these muxes. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20201026120221.18984-4-srinivas.kandagatla@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 6 + drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/lpass-gfm-sm8250.c | 257 ++++++++++++++++++++++++++++ 3 files changed, 264 insertions(+) create mode 100644 drivers/clk/qcom/lpass-gfm-sm8250.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 886e8610b3214..48c624a1eff12 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -511,4 +511,10 @@ config KRAITCC Support for the Krait CPU clocks on Qualcomm devices. Say Y if you want to support CPU frequency scaling. +config CLK_GFM_LPASS_SM8250 + tristate "SM8250 GFM LPASS Clocks" + help + Support for the Glitch Free Mux (GFM) Low power audio + subsystem (LPASS) clocks found on SM8250 SoCs. + endif diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index ee02fafbae278..57b6349a3ee8e 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -19,6 +19,7 @@ clk-qcom-$(CONFIG_QCOM_GDSC) += gdsc.o # Keep alphabetically sorted by config obj-$(CONFIG_APQ_GCC_8084) += gcc-apq8084.o obj-$(CONFIG_APQ_MMCC_8084) += mmcc-apq8084.o +obj-$(CONFIG_CLK_GFM_LPASS_SM8250) += lpass-gfm-sm8250.o obj-$(CONFIG_IPQ_APSS_PLL) += apss-ipq-pll.o obj-$(CONFIG_IPQ_APSS_6018) += apss-ipq6018.o obj-$(CONFIG_IPQ_GCC_4019) += gcc-ipq4019.o diff --git a/drivers/clk/qcom/lpass-gfm-sm8250.c b/drivers/clk/qcom/lpass-gfm-sm8250.c new file mode 100644 index 0000000000000..48a73dd97d0d6 --- /dev/null +++ b/drivers/clk/qcom/lpass-gfm-sm8250.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LPASS Audio CC and Always ON CC Glitch Free Mux clock driver + * + * Copyright (c) 2020 Linaro Ltd. + * Author: Srinivas Kandagatla + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct lpass_gfm { + struct device *dev; + void __iomem *base; +}; + +struct clk_gfm { + unsigned int mux_reg; + unsigned int mux_mask; + struct clk_hw hw; + struct lpass_gfm *priv; + void __iomem *gfm_mux; +}; + +#define GFM_MASK BIT(1) +#define to_clk_gfm(_hw) container_of(_hw, struct clk_gfm, hw) + +static u8 clk_gfm_get_parent(struct clk_hw *hw) +{ + struct clk_gfm *clk = to_clk_gfm(hw); + + return readl(clk->gfm_mux) & GFM_MASK; +} + +static int clk_gfm_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_gfm *clk = to_clk_gfm(hw); + unsigned int val; + + val = readl(clk->gfm_mux); + + if (index) + val |= GFM_MASK; + else + val &= ~GFM_MASK; + + writel(val, clk->gfm_mux); + + return 0; +} + +static const struct clk_ops clk_gfm_ops = { + .get_parent = clk_gfm_get_parent, + .set_parent = clk_gfm_set_parent, + .determine_rate = __clk_mux_determine_rate, +}; + +static struct clk_gfm lpass_gfm_wsa_mclk = { + .mux_reg = 0x220d8, + .mux_mask = BIT(0), + .hw.init = &(struct clk_init_data) { + .name = "WSA_MCLK", + .ops = &clk_gfm_ops, + .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, + .parent_data = (const struct clk_parent_data[]){ + { + .index = 0, + .fw_name = "LPASS_CLK_ID_TX_CORE_MCLK", + }, { + .index = 1, + .fw_name = "LPASS_CLK_ID_WSA_CORE_MCLK", + }, + }, + .num_parents = 2, + }, +}; + +static struct clk_gfm lpass_gfm_wsa_npl = { + .mux_reg = 0x220d8, + .mux_mask = BIT(0), + .hw.init = &(struct clk_init_data) { + .name = "WSA_NPL", + .ops = &clk_gfm_ops, + .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, + .parent_data = (const struct clk_parent_data[]){ + { + .index = 0, + .fw_name = "LPASS_CLK_ID_TX_CORE_NPL_MCLK", + }, { + .index = 1, + .fw_name = "LPASS_CLK_ID_WSA_CORE_NPL_MCLK", + }, + }, + .num_parents = 2, + }, +}; + +static struct clk_gfm lpass_gfm_rx_mclk_mclk2 = { + .mux_reg = 0x240d8, + .mux_mask = BIT(0), + .hw.init = &(struct clk_init_data) { + .name = "RX_MCLK_MCLK2", + .ops = &clk_gfm_ops, + .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, + .parent_data = (const struct clk_parent_data[]){ + { + .index = 0, + .fw_name = "LPASS_CLK_ID_TX_CORE_MCLK", + }, { + .index = 1, + .fw_name = "LPASS_CLK_ID_RX_CORE_MCLK", + }, + }, + .num_parents = 2, + }, +}; + +static struct clk_gfm lpass_gfm_rx_npl = { + .mux_reg = 0x240d8, + .mux_mask = BIT(0), + .hw.init = &(struct clk_init_data) { + .name = "RX_NPL", + .ops = &clk_gfm_ops, + .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, + .parent_data = (const struct clk_parent_data[]){ + { + .index = 0, + .fw_name = "LPASS_CLK_ID_TX_CORE_NPL_MCLK", + }, { + .index = 1, + .fw_name = "LPASS_CLK_ID_RX_CORE_NPL_MCLK", + }, + }, + .num_parents = 2, + }, +}; + +static struct clk_gfm *audiocc_gfm_clks[] = { + [LPASS_CDC_WSA_NPL] = &lpass_gfm_wsa_npl, + [LPASS_CDC_WSA_MCLK] = &lpass_gfm_wsa_mclk, + [LPASS_CDC_RX_NPL] = &lpass_gfm_rx_npl, + [LPASS_CDC_RX_MCLK_MCLK2] = &lpass_gfm_rx_mclk_mclk2, +}; + +static struct clk_hw_onecell_data audiocc_hw_onecell_data = { + .hws = { + [LPASS_CDC_WSA_NPL] = &lpass_gfm_wsa_npl.hw, + [LPASS_CDC_WSA_MCLK] = &lpass_gfm_wsa_mclk.hw, + [LPASS_CDC_RX_NPL] = &lpass_gfm_rx_npl.hw, + [LPASS_CDC_RX_MCLK_MCLK2] = &lpass_gfm_rx_mclk_mclk2.hw, + }, + .num = ARRAY_SIZE(audiocc_gfm_clks), +}; + +struct lpass_gfm_data { + struct clk_hw_onecell_data *onecell_data; + struct clk_gfm **gfm_clks; +}; + +static struct lpass_gfm_data audiocc_data = { + .onecell_data = &audiocc_hw_onecell_data, + .gfm_clks = audiocc_gfm_clks, +}; + +static int lpass_gfm_clk_driver_probe(struct platform_device *pdev) +{ + const struct lpass_gfm_data *data; + struct device *dev = &pdev->dev; + struct clk_gfm *gfm; + struct lpass_gfm *cc; + int err, i; + + data = of_device_get_match_data(dev); + if (!data) + return -EINVAL; + + cc = devm_kzalloc(dev, sizeof(*cc), GFP_KERNEL); + if (!cc) + return -ENOMEM; + + cc->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(cc->base)) + return PTR_ERR(cc->base); + + pm_runtime_enable(dev); + err = pm_clk_create(dev); + if (err) + goto pm_clk_err; + + err = of_pm_clk_add_clks(dev); + if (err < 0) { + dev_dbg(dev, "Failed to get lpass core voting clocks\n"); + goto clk_reg_err; + } + + for (i = 0; i < data->onecell_data->num; i++) { + if (!data->gfm_clks[i]) + continue; + + gfm = data->gfm_clks[i]; + gfm->priv = cc; + gfm->gfm_mux = cc->base; + gfm->gfm_mux = gfm->gfm_mux + data->gfm_clks[i]->mux_reg; + + err = devm_clk_hw_register(dev, &data->gfm_clks[i]->hw); + if (err) + goto clk_reg_err; + + } + + err = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, + data->onecell_data); + if (err) + goto clk_reg_err; + + return 0; + +clk_reg_err: + pm_clk_destroy(dev); +pm_clk_err: + pm_runtime_disable(dev); + return err; +} + +static const struct of_device_id lpass_gfm_clk_match_table[] = { + { + .compatible = "qcom,sm8250-lpass-audiocc", + .data = &audiocc_data, + }, + { } +}; +MODULE_DEVICE_TABLE(of, lpass_gfm_clk_match_table); + +static const struct dev_pm_ops lpass_gfm_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + +static struct platform_driver lpass_gfm_clk_driver = { + .probe = lpass_gfm_clk_driver_probe, + .driver = { + .name = "lpass-gfm-clk", + .of_match_table = lpass_gfm_clk_match_table, + .pm = &lpass_gfm_pm_ops, + }, +}; +module_platform_driver(lpass_gfm_clk_driver); +MODULE_LICENSE("GPL v2"); -- GitLab From 794aa56a78052d195641b1ce43e5538574bedf41 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 26 Oct 2020 12:02:21 +0000 Subject: [PATCH 0087/1894] clk: qcom: Add support to LPASS AON_CC Glitch Free Mux clocks LPASS Always ON Clock controller has one GFM mux to control VA and TX clocks to codec macro on LPASS. This patch adds support to this mux. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20201026120221.18984-5-srinivas.kandagatla@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/lpass-gfm-sm8250.c | 63 +++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/drivers/clk/qcom/lpass-gfm-sm8250.c b/drivers/clk/qcom/lpass-gfm-sm8250.c index 48a73dd97d0d6..d366c7c2abc77 100644 --- a/drivers/clk/qcom/lpass-gfm-sm8250.c +++ b/drivers/clk/qcom/lpass-gfm-sm8250.c @@ -18,6 +18,7 @@ #include #include #include +#include struct lpass_gfm { struct device *dev; @@ -65,6 +66,46 @@ static const struct clk_ops clk_gfm_ops = { .determine_rate = __clk_mux_determine_rate, }; +static struct clk_gfm lpass_gfm_va_mclk = { + .mux_reg = 0x20000, + .mux_mask = BIT(0), + .hw.init = &(struct clk_init_data) { + .name = "VA_MCLK", + .ops = &clk_gfm_ops, + .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, + .num_parents = 2, + .parent_data = (const struct clk_parent_data[]){ + { + .index = 0, + .fw_name = "LPASS_CLK_ID_TX_CORE_MCLK", + }, { + .index = 1, + .fw_name = "LPASS_CLK_ID_VA_CORE_MCLK", + }, + }, + }, +}; + +static struct clk_gfm lpass_gfm_tx_npl = { + .mux_reg = 0x20000, + .mux_mask = BIT(0), + .hw.init = &(struct clk_init_data) { + .name = "TX_NPL", + .ops = &clk_gfm_ops, + .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, + .parent_data = (const struct clk_parent_data[]){ + { + .index = 0, + .fw_name = "LPASS_CLK_ID_TX_CORE_NPL_MCLK", + }, { + .index = 1, + .fw_name = "LPASS_CLK_ID_VA_CORE_2X_MCLK", + }, + }, + .num_parents = 2, + }, +}; + static struct clk_gfm lpass_gfm_wsa_mclk = { .mux_reg = 0x220d8, .mux_mask = BIT(0), @@ -145,6 +186,19 @@ static struct clk_gfm lpass_gfm_rx_npl = { }, }; +static struct clk_gfm *aoncc_gfm_clks[] = { + [LPASS_CDC_VA_MCLK] = &lpass_gfm_va_mclk, + [LPASS_CDC_TX_NPL] = &lpass_gfm_tx_npl, +}; + +static struct clk_hw_onecell_data aoncc_hw_onecell_data = { + .hws = { + [LPASS_CDC_VA_MCLK] = &lpass_gfm_va_mclk.hw, + [LPASS_CDC_TX_NPL] = &lpass_gfm_tx_npl.hw, + }, + .num = ARRAY_SIZE(aoncc_gfm_clks), +}; + static struct clk_gfm *audiocc_gfm_clks[] = { [LPASS_CDC_WSA_NPL] = &lpass_gfm_wsa_npl, [LPASS_CDC_WSA_MCLK] = &lpass_gfm_wsa_mclk, @@ -172,6 +226,11 @@ static struct lpass_gfm_data audiocc_data = { .gfm_clks = audiocc_gfm_clks, }; +static struct lpass_gfm_data aoncc_data = { + .onecell_data = &aoncc_hw_onecell_data, + .gfm_clks = aoncc_gfm_clks, +}; + static int lpass_gfm_clk_driver_probe(struct platform_device *pdev) { const struct lpass_gfm_data *data; @@ -233,6 +292,10 @@ pm_clk_err: } static const struct of_device_id lpass_gfm_clk_match_table[] = { + { + .compatible = "qcom,sm8250-lpass-aoncc", + .data = &aoncc_data, + }, { .compatible = "qcom,sm8250-lpass-audiocc", .data = &audiocc_data, -- GitLab From 25519d68344269f9dc58b5bc72f648248a1fafb9 Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Fri, 30 Oct 2020 14:08:39 +0800 Subject: [PATCH 0088/1894] ima: generalize x86/EFI arch glue for other EFI architectures Move the x86 IMA arch code into security/integrity/ima/ima_efi.c, so that we will be able to wire it up for arm64 in a future patch. Co-developed-by: Chester Lin Signed-off-by: Chester Lin Acked-by: Mimi Zohar Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 3 ++ arch/x86/kernel/Makefile | 2 - security/integrity/ima/Makefile | 4 ++ .../integrity/ima/ima_efi.c | 45 +++++-------------- 4 files changed, 19 insertions(+), 35 deletions(-) rename arch/x86/kernel/ima_arch.c => security/integrity/ima/ima_efi.c (60%) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 7673dc833232e..c98f78330b092 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -380,4 +380,7 @@ static inline void efi_fake_memmap_early(void) } #endif +#define arch_ima_efi_boot_mode \ + ({ extern struct boot_params boot_params; boot_params.secure_boot; }) + #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 68608bd892c0d..5eeb808eb0242 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -161,5 +161,3 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o endif - -obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile index 67dabca670e23..2499f2485c04f 100644 --- a/security/integrity/ima/Makefile +++ b/security/integrity/ima/Makefile @@ -14,3 +14,7 @@ ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o ima-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o ima-$(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) += ima_asymmetric_keys.o ima-$(CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS) += ima_queue_keys.o + +ifeq ($(CONFIG_EFI),y) +ima-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_efi.o +endif diff --git a/arch/x86/kernel/ima_arch.c b/security/integrity/ima/ima_efi.c similarity index 60% rename from arch/x86/kernel/ima_arch.c rename to security/integrity/ima/ima_efi.c index 7dfb1e8089284..71786d01946f4 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/security/integrity/ima/ima_efi.c @@ -5,50 +5,29 @@ #include #include #include +#include -extern struct boot_params boot_params; +#ifndef arch_ima_efi_boot_mode +#define arch_ima_efi_boot_mode efi_secureboot_mode_unset +#endif static enum efi_secureboot_mode get_sb_mode(void) { - efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; - efi_status_t status; - unsigned long size; - u8 secboot, setupmode; - - size = sizeof(secboot); + enum efi_secureboot_mode mode; if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) { pr_info("ima: secureboot mode unknown, no efi\n"); return efi_secureboot_mode_unknown; } - /* Get variable contents into buffer */ - status = efi.get_variable(L"SecureBoot", &efi_variable_guid, - NULL, &size, &secboot); - if (status == EFI_NOT_FOUND) { + mode = efi_get_secureboot_mode(efi.get_variable); + if (mode == efi_secureboot_mode_disabled) pr_info("ima: secureboot mode disabled\n"); - return efi_secureboot_mode_disabled; - } - - if (status != EFI_SUCCESS) { + else if (mode == efi_secureboot_mode_unknown) pr_info("ima: secureboot mode unknown\n"); - return efi_secureboot_mode_unknown; - } - - size = sizeof(setupmode); - status = efi.get_variable(L"SetupMode", &efi_variable_guid, - NULL, &size, &setupmode); - - if (status != EFI_SUCCESS) /* ignore unknown SetupMode */ - setupmode = 0; - - if (secboot == 0 || setupmode == 1) { - pr_info("ima: secureboot mode disabled\n"); - return efi_secureboot_mode_disabled; - } - - pr_info("ima: secureboot mode enabled\n"); - return efi_secureboot_mode_enabled; + else + pr_info("ima: secureboot mode enabled\n"); + return mode; } bool arch_ima_get_secureboot(void) @@ -57,7 +36,7 @@ bool arch_ima_get_secureboot(void) static bool initialized; if (!initialized && efi_enabled(EFI_BOOT)) { - sb_mode = boot_params.secure_boot; + sb_mode = arch_ima_efi_boot_mode; if (sb_mode == efi_secureboot_mode_unset) sb_mode = get_sb_mode(); -- GitLab From 2b076054e524a92e3a303de487dfb7cf85d8e149 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Nov 2020 12:39:12 +0100 Subject: [PATCH 0089/1894] remove boolinit.cocci 0/1 for booleans is perfectly valid C. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Julia Lawall --- scripts/coccinelle/misc/boolinit.cocci | 195 ------------------------- 1 file changed, 195 deletions(-) delete mode 100644 scripts/coccinelle/misc/boolinit.cocci diff --git a/scripts/coccinelle/misc/boolinit.cocci b/scripts/coccinelle/misc/boolinit.cocci deleted file mode 100644 index fed6126e2b9d3..0000000000000 --- a/scripts/coccinelle/misc/boolinit.cocci +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/// Bool initializations should use true and false. Bool tests don't need -/// comparisons. Based on contributions from Joe Perches, Rusty Russell -/// and Bruce W Allan. -/// -// Confidence: High -// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. -// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. -// URL: http://coccinelle.lip6.fr/ -// Options: --include-headers - -virtual patch -virtual context -virtual org -virtual report - -@boolok@ -symbol true,false; -@@ -( -true -| -false -) - -@depends on patch@ -bool t; -@@ - -( -- t == true -+ t -| -- true == t -+ t -| -- t != true -+ !t -| -- true != t -+ !t -| -- t == false -+ !t -| -- false == t -+ !t -| -- t != false -+ t -| -- false != t -+ t -) - -@depends on patch disable is_zero, isnt_zero@ -bool t; -@@ - -( -- t == 1 -+ t -| -- t != 1 -+ !t -| -- t == 0 -+ !t -| -- t != 0 -+ t -) - -@depends on patch && boolok@ -bool b; -@@ -( - b = -- 0 -+ false -| - b = -- 1 -+ true -) - -// --------------------------------------------------------------------- - -@r1 depends on !patch@ -bool t; -position p; -@@ - -( -* t@p == true -| -* true == t@p -| -* t@p != true -| -* true != t@p -| -* t@p == false -| -* false == t@p -| -* t@p != false -| -* false != t@p -) - -@r2 depends on !patch disable is_zero, isnt_zero@ -bool t; -position p; -@@ - -( -* t@p == 1 -| -* t@p != 1 -| -* t@p == 0 -| -* t@p != 0 -) - -@r3 depends on !patch && boolok@ -bool b; -position p1; -@@ -( -*b@p1 = 0 -| -*b@p1 = 1 -) - -@r4 depends on !patch@ -bool b; -position p2; -identifier i; -constant c != {0,1}; -@@ -( - b = i -| -*b@p2 = c -) - -@script:python depends on org@ -p << r1.p; -@@ - -cocci.print_main("WARNING: Comparison to bool",p) - -@script:python depends on org@ -p << r2.p; -@@ - -cocci.print_main("WARNING: Comparison of 0/1 to bool variable",p) - -@script:python depends on org@ -p1 << r3.p1; -@@ - -cocci.print_main("WARNING: Assignment of 0/1 to bool variable",p1) - -@script:python depends on org@ -p2 << r4.p2; -@@ - -cocci.print_main("ERROR: Assignment of non-0/1 constant to bool variable",p2) - -@script:python depends on report@ -p << r1.p; -@@ - -coccilib.report.print_report(p[0],"WARNING: Comparison to bool") - -@script:python depends on report@ -p << r2.p; -@@ - -coccilib.report.print_report(p[0],"WARNING: Comparison of 0/1 to bool variable") - -@script:python depends on report@ -p1 << r3.p1; -@@ - -coccilib.report.print_report(p1[0],"WARNING: Assignment of 0/1 to bool variable") - -@script:python depends on report@ -p2 << r4.p2; -@@ - -coccilib.report.print_report(p2[0],"ERROR: Assignment of non-0/1 constant to bool variable") -- GitLab From 1db9d9ded771389aae5760d20dd1bac113451b9c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Oct 2020 20:48:02 +0100 Subject: [PATCH 0090/1894] KVM: arm64: Add kimg_hyp_va() helper KVM/arm64 is so far unable to deal with function pointers, as the compiler will generate the kernel's runtime VA, and not the linear mapping address, meaning that kern_hyp_va() will give the wrong result. We so far have been able to use PC-relative addressing, but that's not always easy to use, and prevents the implementation of things like the mapping of an index to a pointer. To allow this, provide a new helper that computes the required translation from the kernel image to the HYP VA space. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_mmu.h | 18 ++++++++++++ arch/arm64/kvm/va_layout.c | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 331394306ccee..608c3a83e740d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -98,6 +98,24 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) +static __always_inline unsigned long __kimg_hyp_va(unsigned long v) +{ + unsigned long offset; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + kvm_update_kimg_phys_offset) + : "=r" (offset)); + + return __kern_hyp_va((v - offset) | PAGE_OFFSET); +} + +#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v)))) + +#define kimg_fn_ptr(x) (typeof(x) **)(x) + /* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index e0404bcab019a..1d00d2cb93fd5 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * The LSB of the HYP VA tag @@ -201,3 +202,52 @@ void kvm_patch_vector_branch(struct alt_instr *alt, AARCH64_INSN_BRANCH_NOLINK); *updptr++ = cpu_to_le32(insn); } + +static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u32 insn, oinsn, rd; + + BUG_ON(nr_inst != 4); + + /* Compute target register */ + oinsn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); + + /* movz rd, #(val & 0xffff) */ + insn = aarch64_insn_gen_movewide(rd, + (u16)val, + 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 16) & 0xffff), lsl #16 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 16), + 16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 32) & 0xffff), lsl #32 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 32), + 32, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 48) & 0xffff), lsl #48 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 48), + 48, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); +} + +void kvm_update_kimg_phys_offset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst); +} -- GitLab From 7cd0aaafaadcaaf280887f8b478393a9fcfc69e3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 11 Oct 2020 18:17:38 +0100 Subject: [PATCH 0091/1894] KVM: arm64: Turn host HVC handling into a dispatch table Now that we can use function pointer, use a dispatch table to call the individual HVC handlers, leading to more maintainable code. Further improvements include helpers to declare the mapping of local variables to values passed in the host context. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kernel/image-vars.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 228 +++++++++++++++++------------ 2 files changed, 135 insertions(+), 94 deletions(-) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c615b285ff5b3..e8c194f8de887 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,6 +64,7 @@ __efistub__ctype = _ctype; /* Alternative callbacks for init-time patching of nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); +KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e2eafe2c93aff..c0543b2e760ea 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -12,106 +12,146 @@ #include #include -#include - -static void handle_host_hcall(unsigned long func_id, - struct kvm_cpu_context *host_ctxt) -{ - unsigned long ret = 0; - - switch (func_id) { - case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1; - - ret = __kvm_vcpu_run(kern_hyp_va(vcpu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context): - __kvm_flush_vm_context(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - phys_addr_t ipa = host_ctxt->regs.regs[2]; - int level = host_ctxt->regs.regs[3]; - - __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - - __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - - __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): { - u64 cntvoff = host_ctxt->regs.regs[1]; - - __kvm_timer_set_cntvoff(cntvoff); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs): - __kvm_enable_ssbs(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2): - ret = __vgic_v3_get_ich_vtr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr): - ret = __vgic_v3_read_vmcr(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): { - u32 vmcr = host_ctxt->regs.regs[1]; - - __vgic_v3_write_vmcr(vmcr); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs): - __vgic_v3_init_lrs(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2): - ret = __kvm_get_mdcr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; - - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; - - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); - break; - } - default: - /* Invalid host HVC. */ - host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED; - return; - } - - host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS; - host_ctxt->regs.regs[1] = ret; +#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] +#define DECLARE_REG(type, name, ctxt, reg) \ + type name = (type)cpu_reg(ctxt, (reg)) + +static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); +} + +static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) +{ + __kvm_flush_vm_context(); +} + +static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2); + DECLARE_REG(int, level, host_ctxt, 3); + + __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); +} + +static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + + __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); +} + +static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + + __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); +} + +static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) +{ + __kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1)); +} + +static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) +{ + __kvm_enable_ssbs(); +} + +static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2(); +} + +static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); +} + +static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); +} + +static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_init_lrs(); +} + +static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); +} + +static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); +} + +static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); +} + +typedef void (*hcall_t)(struct kvm_cpu_context *); + +#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x) + +static const hcall_t *host_hcall[] = { + HANDLE_FUNC(__kvm_vcpu_run), + HANDLE_FUNC(__kvm_flush_vm_context), + HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), + HANDLE_FUNC(__kvm_tlb_flush_vmid), + HANDLE_FUNC(__kvm_tlb_flush_local_vmid), + HANDLE_FUNC(__kvm_timer_set_cntvoff), + HANDLE_FUNC(__kvm_enable_ssbs), + HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), + HANDLE_FUNC(__vgic_v3_read_vmcr), + HANDLE_FUNC(__vgic_v3_write_vmcr), + HANDLE_FUNC(__vgic_v3_init_lrs), + HANDLE_FUNC(__kvm_get_mdcr_el2), + HANDLE_FUNC(__vgic_v3_save_aprs), + HANDLE_FUNC(__vgic_v3_restore_aprs), +}; + +static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, id, host_ctxt, 0); + const hcall_t *kfn; + hcall_t hfn; + + id -= KVM_HOST_SMCCC_ID(0); + + if (unlikely(id >= ARRAY_SIZE(host_hcall))) + goto inval; + + kfn = host_hcall[id]; + if (unlikely(!kfn)) + goto inval; + + cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS; + + hfn = kimg_fn_hyp_va(kfn); + hfn(host_ctxt); + + return; +inval: + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; } void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); - unsigned long func_id; - if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64) + if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)) hyp_panic(); - func_id = host_ctxt->regs.regs[0]; - handle_host_hcall(func_id, host_ctxt); + handle_host_hcall(host_ctxt); } -- GitLab From f2bd43f1c97f0ef6612cde87aa941248a91c59c6 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 3 Nov 2020 19:32:14 +0800 Subject: [PATCH 0092/1894] clk: imx: gate2: Remove unused variable ret This patch fixes below warning reported by coccicheck: ./clk-gate2.c:57:5-8: Unneeded variable: "ret". Return "0" on line 68 Reported-by: Hulk Robot Signed-off-by: Zou Wei Reviewed-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-gate2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-gate2.c b/drivers/clk/imx/clk-gate2.c index 480a184207d56..f16c4019f402e 100644 --- a/drivers/clk/imx/clk-gate2.c +++ b/drivers/clk/imx/clk-gate2.c @@ -54,7 +54,6 @@ static int clk_gate2_enable(struct clk_hw *hw) { struct clk_gate2 *gate = to_clk_gate2(hw); unsigned long flags; - int ret = 0; spin_lock_irqsave(gate->lock, flags); @@ -65,7 +64,7 @@ static int clk_gate2_enable(struct clk_hw *hw) out: spin_unlock_irqrestore(gate->lock, flags); - return ret; + return 0; } static void clk_gate2_disable(struct clk_hw *hw) -- GitLab From bdb08940236c2096ac60c99854ff8b8fdc4d8d02 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Nov 2020 17:24:29 +0100 Subject: [PATCH 0093/1894] clk: imx8mm: drop of_match_ptr from of_device_id table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might be not relevant here). This fixes compile warning (!CONFIG_OF && !CONFIG_MODULES): drivers/clk/imx/clk-imx8mm.c:641:34: warning: ‘imx8mm_clk_of_match’ defined but not used [-Wunused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8mm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c index 0de0be0cf5484..e27890b61fb6b 100644 --- a/drivers/clk/imx/clk-imx8mm.c +++ b/drivers/clk/imx/clk-imx8mm.c @@ -653,7 +653,7 @@ static struct platform_driver imx8mm_clk_driver = { * reloading the driver will crash or break devices. */ .suppress_bind_attrs = true, - .of_match_table = of_match_ptr(imx8mm_clk_of_match), + .of_match_table = imx8mm_clk_of_match, }, }; module_platform_driver(imx8mm_clk_driver); -- GitLab From 8f8a3230929f4ddcd3a5adb659e4b3cf52d9d38e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Nov 2020 17:24:30 +0100 Subject: [PATCH 0094/1894] clk: imx8mn: drop of_match_ptr from of_device_id table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might be not relevant here). This fixes compile warning (!CONFIG_OF && !CONFIG_MODULES): drivers/clk/imx/clk-imx8mn.c:592:34: warning: ‘imx8mn_clk_of_match’ defined but not used [-Wunused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8mn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c index e984de543f0bc..18f81419f355d 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c @@ -604,7 +604,7 @@ static struct platform_driver imx8mn_clk_driver = { * reloading the driver will crash or break devices. */ .suppress_bind_attrs = true, - .of_match_table = of_match_ptr(imx8mn_clk_of_match), + .of_match_table = imx8mn_clk_of_match, }, }; module_platform_driver(imx8mn_clk_driver); -- GitLab From f32e42f09270a5298653ed6d8079fa7fddb6b393 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Nov 2020 17:24:31 +0100 Subject: [PATCH 0095/1894] clk: imx8mp: drop of_match_ptr from of_device_id table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might be not relevant here). This fixes compile warning (!CONFIG_OF && !CONFIG_MODULES): drivers/clk/imx/clk-imx8mp.c:751:34: warning: ‘imx8mp_clk_of_match’ defined but not used [-Wunused-const-variable=] Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 3cb2bc46eae0a..165bfe23fb341 100644 --- a/drivers/clk/imx/clk-imx8mp.c +++ b/drivers/clk/imx/clk-imx8mp.c @@ -763,7 +763,7 @@ static struct platform_driver imx8mp_clk_driver = { * reloading the driver will crash or break devices. */ .suppress_bind_attrs = true, - .of_match_table = of_match_ptr(imx8mp_clk_of_match), + .of_match_table = imx8mp_clk_of_match, }, }; module_platform_driver(imx8mp_clk_driver); -- GitLab From 00cb754ac62253c84ea969c8d0d48884111ad909 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Nov 2020 17:24:32 +0100 Subject: [PATCH 0096/1894] clk: imx8mq: drop of_match_ptr from of_device_id table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might be not relevant here). This fixes compile warning (!CONFIG_OF && !CONFIG_MODULES): drivers/clk/imx/clk-imx8mq.c:626:34: warning: ‘imx8mq_clk_of_match’ defined but not used [-Wunused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx8mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c index 8265d1d48af42..75186888ecae0 100644 --- a/drivers/clk/imx/clk-imx8mq.c +++ b/drivers/clk/imx/clk-imx8mq.c @@ -639,7 +639,7 @@ static struct platform_driver imx8mq_clk_driver = { * reloading the driver will crash or break devices. */ .suppress_bind_attrs = true, - .of_match_table = of_match_ptr(imx8mq_clk_of_match), + .of_match_table = imx8mq_clk_of_match, }, }; module_platform_driver(imx8mq_clk_driver); -- GitLab From 550b562a153f84282c60fc3cb0d98f4e5609f0b4 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 4 Nov 2020 19:19:31 +0800 Subject: [PATCH 0097/1894] clk: imx: scu: Make pd_np with static keyword Fix the following sparse warning: ./clk-scu.c:23:20: warning: symbol 'pd_np' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Zou Wei Reviewed-by: Stephen Boyd Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-scu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index d0243c52ccbcf..d10f60e48ece8 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -20,7 +20,7 @@ #define IMX_SIP_SET_CPUFREQ 0x00 static struct imx_sc_ipc *ccm_ipc_handle; -struct device_node *pd_np; +static struct device_node *pd_np; static struct platform_driver imx_clk_scu_driver; struct imx_scu_clk_node { -- GitLab From c22588c99635ac4dace0ce2d55c1e2dc4f13cb54 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2020 11:56:50 +0100 Subject: [PATCH 0098/1894] KVM: arm64: Don't adjust PC on SError during SMC trap On SMC trap, the prefered return address is set to that of the SMC instruction itself. It is thus wrong to try and roll it back when an SError occurs while trapping on SMC. It is still necessary on HVC though, as HVC doesn't cause a trap, and sets ELR to returning *after* the HVC. It also became apparent that there is no 16bit encoding for an AArch32 HVC instruction, meaning that the displacement is always 4 bytes, no matter what the ISA is. Take this opportunity to simplify it. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5d690d60ccad5..79a720657c47f 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -245,15 +245,15 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); /* - * HVC/SMC already have an adjusted PC, which we need - * to correct in order to return to after having - * injected the SError. + * HVC already have an adjusted PC, which we need to + * correct in order to return to after having injected + * the SError. + * + * SMC, on the other hand, is *trapped*, meaning its + * preferred return address is the SMC itself. */ - if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || - esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { - u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; - *vcpu_pc(vcpu) -= adj; - } + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) + *vcpu_pc(vcpu) -= 4; return 1; } -- GitLab From 6ddbc281e2aa21c5917e015a373958455f5eb3c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2020 11:14:38 +0100 Subject: [PATCH 0099/1894] KVM: arm64: Move kvm_vcpu_trap_il_is32bit into kvm_skip_instr32() There is no need to feed the result of kvm_vcpu_trap_il_is32bit() to kvm_skip_instr(), as only AArch32 has a variable length ISA, and this helper can equally be called from kvm_skip_instr32(), reducing the complexity at all the call sites. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 8 ++++---- arch/arm64/kvm/handle_exit.c | 6 +++--- arch/arm64/kvm/hyp/aarch32.c | 4 ++-- arch/arm64/kvm/mmio.c | 2 +- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5ef2669ccd6c3..0864f425547db 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,7 +26,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); +void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); @@ -472,10 +472,10 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { - kvm_skip_instr32(vcpu, is_wide_instr); + kvm_skip_instr32(vcpu); } else { *vcpu_pc(vcpu) += 4; *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; @@ -494,7 +494,7 @@ static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 79a720657c47f..30bf8e22df54c 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -61,7 +61,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * otherwise return to the same address... */ vcpu_set_reg(vcpu, 0, ~0UL); - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); return 1; } @@ -100,7 +100,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_clear_request(KVM_REQ_UNHALT, vcpu); } - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); return 1; } @@ -221,7 +221,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); handled = 1; } else { exit_handle_fn exit_handler; diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index ae56d8a4b3828..f98cbe2626a1c 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -123,13 +123,13 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); - if (is_thumb && !is_wide_instr) + if (is_thumb && !kvm_vcpu_trap_il_is32bit(vcpu)) pc += 2; else pc += 4; diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 6a2826f1bf5e9..7e8eb32ae7d2f 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 57972bdb213ab..0bec07cf8d06b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1014,7 +1014,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * cautious, and skip the instruction. */ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); ret = 1; goto out_unlock; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fb12d3ef423ad..1232a814ca7f1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2199,7 +2199,7 @@ static void perform_access(struct kvm_vcpu *vcpu, /* Skip instruction if instructed so */ if (likely(r->access(vcpu, params, r))) - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); } /* -- GitLab From cdb5e02ed133731f8a6676a389ed40ca303cab7c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 09:29:27 +0100 Subject: [PATCH 0100/1894] KVM: arm64: Make kvm_skip_instr() and co private to HYP In an effort to remove the vcpu PC manipulations from EL1 on nVHE systems, move kvm_skip_instr() to be HYP-specific. EL1's intent to increment PC post emulation is now signalled via a flag in the vcpu structure. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 27 +---------- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/handle_exit.c | 6 +-- arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 56 ++++++++++++++++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 2 + arch/arm64/kvm/hyp/nvhe/switch.c | 3 ++ arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 + arch/arm64/kvm/hyp/vgic-v3-sr.c | 2 + arch/arm64/kvm/hyp/vhe/switch.c | 3 ++ arch/arm64/kvm/mmio.c | 2 +- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- 12 files changed, 77 insertions(+), 31 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/hyp/adjust_pc.h diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0864f425547db..6d2b5d1aa7b3b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -472,32 +472,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) { - if (vcpu_mode_is_32bit(vcpu)) { - kvm_skip_instr32(vcpu); - } else { - *vcpu_pc(vcpu) += 4; - *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; - } - - /* advance the singlestep state machine */ - *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; -} - -/* - * Skip an instruction which has been emulated at hyp while most guest sysregs - * are live. - */ -static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) -{ - *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); - - kvm_skip_instr(vcpu); - - write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); - write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 781d029b8aa85..7991a1c132541 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -407,6 +407,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ +#define KVM_ARM64_INCREMENT_PC (1 << 8) /* Increment PC */ #define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 30bf8e22df54c..d4e00a864ee6d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -61,7 +61,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * otherwise return to the same address... */ vcpu_set_reg(vcpu, 0, ~0UL); - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); return 1; } @@ -100,7 +100,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_clear_request(KVM_REQ_UNHALT, vcpu); } - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); return 1; } @@ -221,7 +221,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); handled = 1; } else { exit_handle_fn exit_handler; diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h new file mode 100644 index 0000000000000..d3043b07e78e7 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Guest PC manipulation helpers + * + * Copyright (C) 2012,2013 - ARM Ltd + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_ADJUST_PC_H__ +#define __ARM64_KVM_HYP_ADJUST_PC_H__ + +#include +#include + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) { + kvm_skip_instr32(vcpu); + } else { + *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } + + /* advance the singlestep state machine */ + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; +} + +/* + * Skip an instruction which has been emulated at hyp while most guest sysregs + * are live. + */ +static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); + + kvm_skip_instr(vcpu); + + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); +} + +/* + * Adjust the guest PC on entry, depending on flags provided by EL1 + * for the purpose of emulation (MMIO, sysreg). + */ +static inline void __adjust_pc(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + kvm_skip_instr(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; + } +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1f875a8f20c47..8b2328f62a074 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -7,6 +7,8 @@ #ifndef __ARM64_KVM_HYP_SWITCH_H__ #define __ARM64_KVM_HYP_SWITCH_H__ +#include + #include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8ae8160bc93ab..3e50ff35aa4f1 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier */ +#include #include #include @@ -189,6 +190,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(host_ctxt); + __adjust_pc(vcpu); + /* * We must restore the 32-bit state before the sysregs, thanks * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index bd1bab551d481..8f0585640241e 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier */ +#include + #include #include #include diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 452f4cacd6743..80406f463c28f 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier */ +#include + #include #include #include diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 62546e20b2511..af8e940d0f035 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier */ +#include #include #include @@ -133,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __load_guest_stage2(vcpu->arch.hw_mmu); __activate_traps(vcpu); + __adjust_pc(vcpu); + sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 7e8eb32ae7d2f..3e2d8ba11a027 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0bec07cf8d06b..2ecd0c5622a6a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1014,7 +1014,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * cautious, and skip the instruction. */ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); ret = 1; goto out_unlock; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1232a814ca7f1..6a6f06205ea79 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2199,7 +2199,7 @@ static void perform_access(struct kvm_vcpu *vcpu, /* Skip instruction if instructed so */ if (likely(r->access(vcpu, params, r))) - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); } /* -- GitLab From defe21f49bc98b095300752aa1e19bb608f3e97d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 12:12:45 +0100 Subject: [PATCH 0101/1894] KVM: arm64: Move PC rollback on SError to HYP Instead of handling the "PC rollback on SError during HVC" at EL1 (which requires disclosing PC to a potentially untrusted kernel), let's move this fixup to ... fixup_guest_exit(), which is where we do all fixups. Isn't that neat? Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 17 ----------------- arch/arm64/kvm/hyp/include/hyp/switch.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index d4e00a864ee6d..f79137ee4274a 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -241,23 +241,6 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; - if (ARM_SERROR_PENDING(exception_index)) { - u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); - - /* - * HVC already have an adjusted PC, which we need to - * correct in order to return to after having injected - * the SError. - * - * SMC, on the other hand, is *trapped*, meaning its - * preferred return address is the SMC itself. - */ - if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) - *vcpu_pc(vcpu) -= 4; - - return 1; - } - exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 8b2328f62a074..84473574c2e7d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -411,6 +411,21 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + if (ARM_SERROR_PENDING(*exit_code)) { + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); + + /* + * HVC already have an adjusted PC, which we need to + * correct in order to return to after having injected + * the SError. + * + * SMC, on the other hand, is *trapped*, meaning its + * preferred return address is the SMC itself. + */ + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) + write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); + } + /* * We're using the raw exception code in order to only process * the trap if no SError is pending. We will come back to the -- GitLab From 21c810017cef75435be8b8f1da2110c6d1fd887b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:36:11 +0100 Subject: [PATCH 0102/1894] KVM: arm64: Move VHE direct sysreg accessors into kvm_host.h As we are about to need to access system registers from the HYP code based on their internal encoding, move the direct sysreg accessors to a common include file, with a VHE-specific guard. No functionnal change. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 91 +++++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 81 --------------------------- 2 files changed, 91 insertions(+), 81 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7991a1c132541..0672b3db61219 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -439,6 +439,97 @@ struct kvm_vcpu_arch { u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); +static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not saved on every + * exit from the guest but are only saved on vcpu_put. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the guest cannot modify its + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's + * thread when emulating cross-VCPU communication. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; + case PAR_EL1: *val = read_sysreg_par(); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} + +static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not restored on every + * entry to the guest but are only restored on vcpu_load. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the MPIDR should only be set + * once, before running the VCPU, and never changed later. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} + /* * CP14 and CP15 live in the same array, as they are backed by the * same system registers. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6a6f06205ea79..26c7c25f8a6db 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -64,87 +64,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - switch (reg) { - case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - -static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { u64 val = 0x8badf00d8badf00d; -- GitLab From e650b64f1a56cbc700f0a2d2ab8d23155757e2f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:42:38 +0100 Subject: [PATCH 0103/1894] KVM: arm64: Add basic hooks for injecting exceptions from EL2 Add the basic infrastructure to describe injection of exceptions into a guest. So far, nothing uses this code path. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 28 ++++++++++++++++++++-- arch/arm64/kvm/hyp/exception.c | 17 +++++++++++++ arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 10 ++++++-- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- 5 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kvm/hyp/exception.c diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0672b3db61219..7a1faf917f3ca 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -407,9 +407,33 @@ struct kvm_vcpu_arch { #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ -#define KVM_ARM64_INCREMENT_PC (1 << 8) /* Increment PC */ +#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ +#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ -#define vcpu_has_sve(vcpu) (system_supports_sve() && \ +/* + * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can + * take the following values: + * + * For AArch32 EL1: + */ +#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9) +#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9) +#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9) +/* For AArch64: */ +#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9) +#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11) +#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11) + +/* + * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be + * set together with an exception... + */ +#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ + +#define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) #ifdef CONFIG_ARM64_PTR_AUTH diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c new file mode 100644 index 0000000000000..6533a92708507 --- /dev/null +++ b/arch/arm64/kvm/hyp/exception.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fault injection for both 32 and 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + */ + +#include + +void kvm_inject_exception(struct kvm_vcpu *vcpu) +{ +} diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h index d3043b07e78e7..b1f60923a8feb 100644 --- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -13,6 +13,8 @@ #include #include +void kvm_inject_exception(struct kvm_vcpu *vcpu); + static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { @@ -43,11 +45,15 @@ static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) /* * Adjust the guest PC on entry, depending on flags provided by EL1 - * for the purpose of emulation (MMIO, sysreg). + * for the purpose of emulation (MMIO, sysreg) or exception injection. */ static inline void __adjust_pc(struct kvm_vcpu *vcpu) { - if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { + kvm_inject_exception(vcpu); + vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { kvm_skip_instr(vcpu); vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; } diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index ddde15fe85f2f..77b8c4e06f2f6 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -8,7 +8,7 @@ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o ## ## Build rules for compiling nVHE hyp code diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 461e97c375cc7..96bec0ecf9dd9 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -8,4 +8,4 @@ ccflags-y := -D__KVM_VHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o -- GitLab From bb666c472ca25efb38d1163131cc01546b3a653a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:52:29 +0100 Subject: [PATCH 0104/1894] KVM: arm64: Inject AArch64 exceptions from HYP Move the AArch64 exception injection code from EL1 to HYP, leaving only the ESR_EL1 updates to EL1. In order to come with the differences between VHE and nVHE, two set of system register accessors are provided. SPSR, ELR, PC and PSTATE are now completely handled in the hypervisor. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 12 +++ arch/arm64/kvm/hyp/exception.c | 136 +++++++++++++++++++++++++++ arch/arm64/kvm/inject_fault.c | 114 ++-------------------- 3 files changed, 154 insertions(+), 108 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6d2b5d1aa7b3b..736a342dadf7c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -21,6 +21,18 @@ #include #include +#define CURRENT_EL_SP_EL0_VECTOR 0x0 +#define CURRENT_EL_SP_ELx_VECTOR 0x200 +#define LOWER_EL_AArch64_VECTOR 0x400 +#define LOWER_EL_AArch32_VECTOR 0x600 + +enum exception_type { + except_type_sync = 0, + except_type_irq = 0x80, + except_type_fiq = 0x100, + except_type_serror = 0x180, +}; + unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 6533a92708507..94d6d823424d7 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -11,7 +11,143 @@ */ #include +#include +#include + +#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) +#error Hypervisor code only! +#endif + +static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +{ + u64 val; + + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return __vcpu_sys_reg(vcpu, reg); +} + +static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (__vcpu_write_sys_reg_to_cpu(val, reg)) + return; + + __vcpu_sys_reg(vcpu, reg) = val; +} + +static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) +{ + write_sysreg_el1(val, SYS_SPSR); +} + +/* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + * + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) +{ + unsigned long sctlr, vbar, old, new, mode; + u64 exc_offset; + + mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + if (mode == target_mode) + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + else if (!(mode & PSR_MODE32_BIT)) + exc_offset = LOWER_EL_AArch64_VECTOR; + else + exc_offset = LOWER_EL_AArch32_VECTOR; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + exc_offset + type; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= target_mode; + + *vcpu_cpsr(vcpu) = new; + __vcpu_write_spsr(vcpu, old); +} void kvm_inject_exception(struct kvm_vcpu *vcpu) { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + break; + default: + /* + * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} + * will be implemented at some point. Everything + * else gets silently ignored. + */ + break; + } } diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 34a96ab244fab..8862431f8e3b9 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,119 +14,15 @@ #include #include -#define CURRENT_EL_SP_EL0_VECTOR 0x0 -#define CURRENT_EL_SP_ELx_VECTOR 0x200 -#define LOWER_EL_AArch64_VECTOR 0x400 -#define LOWER_EL_AArch32_VECTOR 0x600 - -enum exception_type { - except_type_sync = 0, - except_type_irq = 0x80, - except_type_fiq = 0x100, - except_type_serror = 0x180, -}; - -/* - * This performs the exception entry at a given EL (@target_mode), stashing PC - * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. - * The EL passed to this function *must* be a non-secure, privileged mode with - * bit 0 being set (PSTATE.SP == 1). - * - * When an exception is taken, most PSTATE fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all - * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx - * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. - * - * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. - * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. - * - * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from - * MSB to LSB. - */ -static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, - enum exception_type type) -{ - unsigned long sctlr, vbar, old, new, mode; - u64 exc_offset; - - mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); - - if (mode == target_mode) - exc_offset = CURRENT_EL_SP_ELx_VECTOR; - else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) - exc_offset = CURRENT_EL_SP_EL0_VECTOR; - else if (!(mode & PSR_MODE32_BIT)) - exc_offset = LOWER_EL_AArch64_VECTOR; - else - exc_offset = LOWER_EL_AArch32_VECTOR; - - switch (target_mode) { - case PSR_MODE_EL1h: - vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); - sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); - break; - default: - /* Don't do that */ - BUG(); - } - - *vcpu_pc(vcpu) = vbar + exc_offset + type; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_N_BIT); - new |= (old & PSR_Z_BIT); - new |= (old & PSR_C_BIT); - new |= (old & PSR_V_BIT); - - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) - - new |= (old & PSR_DIT_BIT); - - // PSTATE.UAO is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D5-2579. - - // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 - // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page D5-2578. - new |= (old & PSR_PAN_BIT); - if (!(sctlr & SCTLR_EL1_SPAN)) - new |= PSR_PAN_BIT; - - // PSTATE.SS is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D2-2452. - - // PSTATE.IL is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D1-2306. - - // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 - // See ARM DDI 0487E.a, page D13-3258 - if (sctlr & SCTLR_ELx_DSSBS) - new |= PSR_SSBS_BIT; - - // PSTATE.BTYPE is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. - - new |= PSR_D_BIT; - new |= PSR_A_BIT; - new |= PSR_I_BIT; - new |= PSR_F_BIT; - - new |= target_mode; - - *vcpu_cpsr(vcpu) = new; - vcpu_write_spsr(vcpu, old); -} - static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -156,7 +52,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu) { u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); /* * Build an unknown exception, depending on the instruction -- GitLab From 41613b519ce78bfe1328b8bd693944e80fc8b6c3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:53:49 +0100 Subject: [PATCH 0105/1894] KVM: arm64: Inject AArch32 exceptions from HYP Similarly to what has been done for AArch64, move the AArch32 exception injection to HYP. In order to not use the regmap selection code at EL2, simplify the code populating the target mode's LR register by useing the compatibility aliases for LR_abt and LR_und. We also introduce new accessors for SPSR_abt and SPSR_und, and move VBAR/SCTLR to using the AArch64 accessors (the use of the AArch32 names was an ARMv7 leftover). Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/aarch32.c | 149 +----------------------- arch/arm64/kvm/hyp/exception.c | 200 +++++++++++++++++++++++++++++++-- 2 files changed, 195 insertions(+), 154 deletions(-) diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c index 40a62a99fbf86..ad453b47c5174 100644 --- a/arch/arm64/kvm/aarch32.c +++ b/arch/arm64/kvm/aarch32.c @@ -19,20 +19,6 @@ #define DFSR_FSC_EXTABT_nLPAE 0x08 #define DFSR_LPAE BIT(9) -/* - * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. - */ -static const u8 return_offsets[8][2] = { - [0] = { 0, 0 }, /* Reset, unused */ - [1] = { 4, 2 }, /* Undefined */ - [2] = { 0, 0 }, /* SVC, unused */ - [3] = { 4, 4 }, /* Prefetch abort */ - [4] = { 8, 8 }, /* Data abort */ - [5] = { 0, 0 }, /* HVC, unused */ - [6] = { 4, 4 }, /* IRQ, unused */ - [7] = { 4, 4 }, /* FIQ, unused */ -}; - static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) { preempt_disable(); @@ -53,132 +39,10 @@ static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) } } -/* - * When an exception is taken, most CPSR fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). - * - * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with - * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was - * obsoleted by the ARMv7 virtualization extensions and is RES0. - * - * For the SPSR layout seen from AArch32, see: - * - ARM DDI 0406C.d, page B1-1148 - * - ARM DDI 0487E.a, page G8-6264 - * - * For the SPSR_ELx layout for AArch32 seen from AArch64, see: - * - ARM DDI 0487E.a, page C5-426 - * - * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from - * MSB to LSB. - */ -static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) -{ - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - unsigned long old, new; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_AA32_N_BIT); - new |= (old & PSR_AA32_Z_BIT); - new |= (old & PSR_AA32_C_BIT); - new |= (old & PSR_AA32_V_BIT); - new |= (old & PSR_AA32_Q_BIT); - - // CPSR.IT[7:0] are set to zero upon any exception - // See ARM DDI 0487E.a, section G1.12.3 - // See ARM DDI 0406C.d, section B1.8.3 - - new |= (old & PSR_AA32_DIT_BIT); - - // CPSR.SSBS is set to SCTLR.DSSBS upon any exception - // See ARM DDI 0487E.a, page G8-6244 - if (sctlr & BIT(31)) - new |= PSR_AA32_SSBS_BIT; - - // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 - // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page G8-6246 - new |= (old & PSR_AA32_PAN_BIT); - if (!(sctlr & BIT(23))) - new |= PSR_AA32_PAN_BIT; - - // SS does not exist in AArch32, so ignore - - // CPSR.IL is set to zero upon any exception - // See ARM DDI 0487E.a, page G1-5527 - - new |= (old & PSR_AA32_GE_MASK); - - // CPSR.IT[7:0] are set to zero upon any exception - // See prior comment above - - // CPSR.E is set to SCTLR.EE upon any exception - // See ARM DDI 0487E.a, page G8-6245 - // See ARM DDI 0406C.d, page B4-1701 - if (sctlr & BIT(25)) - new |= PSR_AA32_E_BIT; - - // CPSR.A is unchanged upon an exception to Undefined, Supervisor - // CPSR.A is set upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_A_BIT); - if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) - new |= PSR_AA32_A_BIT; - - // CPSR.I is set upon any exception - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= PSR_AA32_I_BIT; - - // CPSR.F is set upon an exception to FIQ - // CPSR.F is unchanged upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_F_BIT); - if (mode == PSR_AA32_MODE_FIQ) - new |= PSR_AA32_F_BIT; - - // CPSR.T is set to SCTLR.TE upon any exception - // See ARM DDI 0487E.a, page G8-5514 - // See ARM DDI 0406C.d, page B1-1181 - if (sctlr & BIT(30)) - new |= PSR_AA32_T_BIT; - - new |= mode; - - return new; -} - -static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) -{ - unsigned long spsr = *vcpu_cpsr(vcpu); - bool is_thumb = (spsr & PSR_AA32_T_BIT); - u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - - *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); - - /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); - *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; - - /* Branch to exception vector */ - if (sctlr & (1 << 13)) - vect_offset += 0xffff0000; - else /* always have security exceptions */ - vect_offset += vcpu_cp15(vcpu, c12_VBAR); - - *vcpu_pc(vcpu) = vect_offset; -} - void kvm_inject_undef32(struct kvm_vcpu *vcpu) { - bool loaded = pre_fault_synchronize(vcpu); - - prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); - post_fault_synchronize(vcpu, loaded); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | + KVM_ARM64_PENDING_EXCEPTION); } /* @@ -188,7 +52,6 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu) static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) { - u32 vect_offset; u32 *far, *fsr; bool is_lpae; bool loaded; @@ -196,17 +59,17 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, loaded = pre_fault_synchronize(vcpu); if (is_pabt) { - vect_offset = 12; + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | + KVM_ARM64_PENDING_EXCEPTION); far = &vcpu_cp15(vcpu, c6_IFAR); fsr = &vcpu_cp15(vcpu, c5_IFSR); } else { /* !iabt */ - vect_offset = 16; + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | + KVM_ARM64_PENDING_EXCEPTION); far = &vcpu_cp15(vcpu, c6_DFAR); fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); - *far = addr; /* Give the guest an IMPLEMENTATION DEFINED exception */ diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 94d6d823424d7..73629094f9030 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -41,6 +41,22 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) write_sysreg_el1(val, SYS_SPSR); } +static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_abt); + else + vcpu->arch.ctxt.spsr_abt = val; +} + +static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_und); + else + vcpu->arch.ctxt.spsr_und = val; +} + /* * This performs the exception entry at a given EL (@target_mode), stashing PC * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. @@ -135,19 +151,181 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, __vcpu_write_spsr(vcpu, old); } -void kvm_inject_exception(struct kvm_vcpu *vcpu) +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { - case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | - KVM_ARM64_EXCEPT_AA64_EL1): - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + u32 return_address; + + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); + return_address = *vcpu_pc(vcpu); + return_address += return_offsets[vect_offset >> 2][is_thumb]; + + /* KVM only enters the ABT and UND modes, so only deal with those */ + switch(mode) { + case PSR_AA32_MODE_ABT: + __vcpu_write_spsr_abt(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_abt = return_address; break; - default: - /* - * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} - * will be implemented at some point. Everything - * else gets silently ignored. - */ + + case PSR_AA32_MODE_UND: + __vcpu_write_spsr_und(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_und = return_address; break; } + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += __vcpu_read_sys_reg(vcpu, VBAR_EL1); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_exception(struct kvm_vcpu *vcpu) +{ + if (vcpu_el1_is_32bit(vcpu)) { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case KVM_ARM64_EXCEPT_AA32_UND: + enter_exception32(vcpu, PSR_AA32_MODE_UND, 4); + break; + case KVM_ARM64_EXCEPT_AA32_IABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12); + break; + case KVM_ARM64_EXCEPT_AA32_DABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16); + break; + default: + /* Err... */ + break; + } + } else { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + break; + default: + /* + * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} + * will be implemented at some point. Everything + * else gets silently ignored. + */ + break; + } + } } -- GitLab From 7d76b8a60350ff5e919daacd78ef3c2fe04735a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 Oct 2020 09:24:40 +0100 Subject: [PATCH 0106/1894] KVM: arm64: Remove SPSR manipulation primitives The SPSR setting code is now completely unused, including that dealing with banked AArch32 SPSRs. Cleanup time. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 26 -------- arch/arm64/kvm/regmap.c | 96 ---------------------------- 2 files changed, 122 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 736a342dadf7c..5d957d0e7b69e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -34,8 +34,6 @@ enum exception_type { }; unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); @@ -180,30 +178,6 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs[reg_num] = val; } -static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) -{ - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_read_spsr32(vcpu); - - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_SPSR); - else - return __vcpu_sys_reg(vcpu, SPSR_EL1); -} - -static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu_mode_is_32bit(vcpu)) { - vcpu_write_spsr32(vcpu, v); - return; - } - - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_SPSR); - else - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; -} - /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index accc1d5fba615..ae7e290bb0177 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -126,99 +126,3 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) return reg_array + vcpu_reg_offsets[mode][reg_num]; } - -/* - * Return the SPSR for the current mode of the virtual CPU. - */ -static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu) -{ - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - switch (mode) { - case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC; - case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT; - case PSR_AA32_MODE_UND: return KVM_SPSR_UND; - case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ; - case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ; - default: BUG(); - } -} - -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - return __vcpu_sys_reg(vcpu, SPSR_EL1); - case KVM_SPSR_ABT: - return vcpu->arch.ctxt.spsr_abt; - case KVM_SPSR_UND: - return vcpu->arch.ctxt.spsr_und; - case KVM_SPSR_IRQ: - return vcpu->arch.ctxt.spsr_irq; - case KVM_SPSR_FIQ: - return vcpu->arch.ctxt.spsr_fiq; - } - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - return read_sysreg_el1(SYS_SPSR); - case KVM_SPSR_ABT: - return read_sysreg(spsr_abt); - case KVM_SPSR_UND: - return read_sysreg(spsr_und); - case KVM_SPSR_IRQ: - return read_sysreg(spsr_irq); - case KVM_SPSR_FIQ: - return read_sysreg(spsr_fiq); - default: - BUG(); - } -} - -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; - break; - case KVM_SPSR_ABT: - vcpu->arch.ctxt.spsr_abt = v; - break; - case KVM_SPSR_UND: - vcpu->arch.ctxt.spsr_und = v; - break; - case KVM_SPSR_IRQ: - vcpu->arch.ctxt.spsr_irq = v; - break; - case KVM_SPSR_FIQ: - vcpu->arch.ctxt.spsr_fiq = v; - break; - } - - return; - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - write_sysreg_el1(v, SYS_SPSR); - break; - case KVM_SPSR_ABT: - write_sysreg(v, spsr_abt); - break; - case KVM_SPSR_UND: - write_sysreg(v, spsr_und); - break; - case KVM_SPSR_IRQ: - write_sysreg(v, spsr_irq); - break; - case KVM_SPSR_FIQ: - write_sysreg(v, spsr_fiq); - break; - } -} -- GitLab From dcfba399325f919b25854ca17ef1535f5d754fe1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 Oct 2020 09:45:24 +0100 Subject: [PATCH 0107/1894] KVM: arm64: Consolidate exception injection Move the AArch32 exception injection code back into the inject_fault.c file, removing the need for a few non-static functions now that AArch32 host support is a thing of the past. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 3 - arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/aarch32.c | 95 ---------------------------- arch/arm64/kvm/inject_fault.c | 75 +++++++++++++++++++++- 4 files changed, 73 insertions(+), 102 deletions(-) delete mode 100644 arch/arm64/kvm/aarch32.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5d957d0e7b69e..3105bb73f539d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -42,9 +42,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_undef32(struct kvm_vcpu *vcpu); -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1504c81fbf5de..9b32a89a25c84 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ inject_fault.o regmap.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ - aarch32.o arch_timer.o \ + arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c deleted file mode 100644 index ad453b47c5174..0000000000000 --- a/arch/arm64/kvm/aarch32.c +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (not much of an) Emulation layer for 32bit guests. - * - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * based on arch/arm/kvm/emulate.c - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include -#include -#include -#include - -#define DFSR_FSC_EXTABT_LPAE 0x10 -#define DFSR_FSC_EXTABT_nLPAE 0x08 -#define DFSR_LPAE BIT(9) - -static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - if (vcpu->arch.sysregs_loaded_on_cpu) { - kvm_arch_vcpu_put(vcpu); - return true; - } - - preempt_enable(); - return false; -} - -static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) -{ - if (loaded) { - kvm_arch_vcpu_load(vcpu, smp_processor_id()); - preempt_enable(); - } -} - -void kvm_inject_undef32(struct kvm_vcpu *vcpu) -{ - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | - KVM_ARM64_PENDING_EXCEPTION); -} - -/* - * Modelled after TakeDataAbortException() and TakePrefetchAbortException - * pseudocode. - */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) -{ - u32 *far, *fsr; - bool is_lpae; - bool loaded; - - loaded = pre_fault_synchronize(vcpu); - - if (is_pabt) { - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | - KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); - } else { /* !iabt */ - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | - KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) { - *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; - } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ - *fsr = DFSR_FSC_EXTABT_nLPAE; - } - - post_fault_synchronize(vcpu, loaded); -} - -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, false, addr); -} - -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, true, addr); -} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 8862431f8e3b9..e2a2e48ca371e 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -66,6 +66,75 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_sys_reg(vcpu, esr, ESR_EL1); } +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + +static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.sysregs_loaded_on_cpu) { + kvm_arch_vcpu_put(vcpu); + return true; + } + + preempt_enable(); + return false; +} + +static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) +{ + if (loaded) { + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + preempt_enable(); + } +} + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | + KVM_ARM64_PENDING_EXCEPTION); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 *far, *fsr; + bool is_lpae; + bool loaded; + + loaded = pre_fault_synchronize(vcpu); + + if (is_pabt) { + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | + KVM_ARM64_PENDING_EXCEPTION); + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | + KVM_ARM64_PENDING_EXCEPTION); + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) { + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; + } + + post_fault_synchronize(vcpu, loaded); +} + /** * kvm_inject_dabt - inject a data abort into the guest * @vcpu: The VCPU to receive the data abort @@ -77,7 +146,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_dabt32(vcpu, addr); + inject_abt32(vcpu, false, addr); else inject_abt64(vcpu, false, addr); } @@ -93,7 +162,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_pabt32(vcpu, addr); + inject_abt32(vcpu, true, addr); else inject_abt64(vcpu, true, addr); } @@ -108,7 +177,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_undef32(vcpu); + inject_undef32(vcpu); else inject_undef64(vcpu); } -- GitLab From 90c1f934ed7141a6d4c202936d12faaeb405fb66 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 16 Oct 2020 18:41:24 +0100 Subject: [PATCH 0108/1894] KVM: arm64: Get rid of the AArch32 register mapping code The only use of the register mapping code was for the sake of the LR mapping, which we trivially solved in a previous patch. Get rid of the whole thing now. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 2 - arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/guest.c | 28 +++++- arch/arm64/kvm/regmap.c | 128 --------------------------- 4 files changed, 26 insertions(+), 134 deletions(-) delete mode 100644 arch/arm64/kvm/regmap.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3105bb73f539d..c8f550a535162 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -33,8 +33,6 @@ enum exception_type { except_type_serror = 0x180, }; -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); - bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 9b32a89a25c84..60fd181df6243 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o handle_exit.o \ + inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ arch_timer.o \ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfb5218137ca9..3f23f7478d2ad 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -252,10 +252,32 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { - int i; + int i, nr_reg; + + switch (*vcpu_cpsr(vcpu)) { + /* + * Either we are dealing with user mode, and only the + * first 15 registers (+ PC) must be narrowed to 32bit. + * AArch32 r0-r14 conveniently map to AArch64 x0-x14. + */ + case PSR_AA32_MODE_USR: + case PSR_AA32_MODE_SYS: + nr_reg = 15; + break; + + /* + * Otherwide, this is a priviledged mode, and *all* the + * registers must be narrowed to 32bit. + */ + default: + nr_reg = 31; + break; + } + + for (i = 0; i < nr_reg; i++) + vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i)); - for (i = 0; i < 16; i++) - *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu); } out: return err; diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c deleted file mode 100644 index ae7e290bb0177..0000000000000 --- a/arch/arm64/kvm/regmap.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * Derived from arch/arm/kvm/emulate.c: - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include -#include -#include -#include - -#define VCPU_NR_MODES 6 -#define REG_OFFSET(_reg) \ - (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) - -#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) - -static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { - /* USR Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), - REG_OFFSET(pc) - }, - - /* FIQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), - REG_OFFSET(compat_r8_fiq), /* r8 */ - REG_OFFSET(compat_r9_fiq), /* r9 */ - REG_OFFSET(compat_r10_fiq), /* r10 */ - REG_OFFSET(compat_r11_fiq), /* r11 */ - REG_OFFSET(compat_r12_fiq), /* r12 */ - REG_OFFSET(compat_sp_fiq), /* r13 */ - REG_OFFSET(compat_lr_fiq), /* r14 */ - REG_OFFSET(pc) - }, - - /* IRQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_irq), /* r13 */ - REG_OFFSET(compat_lr_irq), /* r14 */ - REG_OFFSET(pc) - }, - - /* SVC Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_svc), /* r13 */ - REG_OFFSET(compat_lr_svc), /* r14 */ - REG_OFFSET(pc) - }, - - /* ABT Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_abt), /* r13 */ - REG_OFFSET(compat_lr_abt), /* r14 */ - REG_OFFSET(pc) - }, - - /* UND Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_und), /* r13 */ - REG_OFFSET(compat_lr_und), /* r14 */ - REG_OFFSET(pc) - }, -}; - -/* - * Return a pointer to the register number valid in the current mode of - * the virtual CPU. - */ -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) -{ - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - - switch (mode) { - case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC: - mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ - break; - - case PSR_AA32_MODE_ABT: - mode = 4; - break; - - case PSR_AA32_MODE_UND: - mode = 5; - break; - - case PSR_AA32_MODE_SYS: - mode = 0; /* SYS maps to USR */ - break; - - default: - BUG(); - } - - return reg_array + vcpu_reg_offsets[mode][reg_num]; -} -- GitLab From ca4e514774930f30b66375a974b5edcbebaf0e7e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Nov 2020 11:10:15 +0000 Subject: [PATCH 0109/1894] KVM: arm64: Introduce handling of AArch32 TTBCR2 traps ARMv8.2 introduced TTBCR2, which shares TCR_EL1 with TTBCR. Gracefully handle traps to this register when HCR_EL2.TVM is set. Cc: stable@vger.kernel.org Reported-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/sys_regs.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7a1faf917f3ca..803cb2427b541 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -212,6 +212,7 @@ enum vcpu_sysreg { #define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ #define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ #define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c2_TTBCR2 (c2_TTBCR + 1) /* Translation Table Base Control R. 2 */ #define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ #define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ #define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 26c7c25f8a6db..afdf18d694cb5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1925,6 +1925,7 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 }, { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, -- GitLab From 4ff3fc316d78daa2ed6de2f13616fb33a2926d8e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Oct 2020 22:23:28 +0000 Subject: [PATCH 0110/1894] KVM: arm64: Move AArch32 exceptions over to AArch64 sysregs The use of the AArch32-specific accessors have always been a bit annoying on 64bit, and it is time for a change. Let's move the AArch32 exception injection over to the AArch64 encoding, which requires us to split the two halves of FAR_EL1 into DFAR and IFAR. This enables us to drop the preempt_disable() games on VHE, and to kill the last user of the vcpu_cp15() macro. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/inject_fault.c | 62 ++++++++++--------------------- 2 files changed, 20 insertions(+), 43 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 803cb2427b541..c905c3fcaaa06 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -562,7 +562,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) #define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) struct kvm_vm_stat { ulong remote_tlb_flush; diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e2a2e48ca371e..b47df73e98d78 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -69,26 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) #define DFSR_FSC_EXTABT_LPAE 0x10 #define DFSR_FSC_EXTABT_nLPAE 0x08 #define DFSR_LPAE BIT(9) - -static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - if (vcpu->arch.sysregs_loaded_on_cpu) { - kvm_arch_vcpu_put(vcpu); - return true; - } - - preempt_enable(); - return false; -} - -static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) -{ - if (loaded) { - kvm_arch_vcpu_load(vcpu, smp_processor_id()); - preempt_enable(); - } -} +#define TTBCR_EAE BIT(31) static void inject_undef32(struct kvm_vcpu *vcpu) { @@ -100,39 +81,36 @@ static void inject_undef32(struct kvm_vcpu *vcpu) * Modelled after TakeDataAbortException() and TakePrefetchAbortException * pseudocode. */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) { - u32 *far, *fsr; - bool is_lpae; - bool loaded; + u64 far; + u32 fsr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) { + fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + fsr = DFSR_FSC_EXTABT_nLPAE; + } - loaded = pre_fault_synchronize(vcpu); + far = vcpu_read_sys_reg(vcpu, FAR_EL1); if (is_pabt) { vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); + far &= GENMASK(31, 0); + far |= (u64)addr << 32; + vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2); } else { /* !iabt */ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) { - *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; - } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ - *fsr = DFSR_FSC_EXTABT_nLPAE; + far &= GENMASK(63, 32); + far |= addr; + vcpu_write_sys_reg(vcpu, fsr, ESR_EL1); } - post_fault_synchronize(vcpu, loaded); + vcpu_write_sys_reg(vcpu, far, FAR_EL1); } /** -- GitLab From 6ed6750f2b6d4a51f27615f3323d1850449299e3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:09:12 +0000 Subject: [PATCH 0111/1894] KVM: arm64: Add AArch32 mapping annotation In order to deal with the few AArch32 system registers that map to only a particular half of their AArch64 counterpart (such as DFAR and IFAR being colocated in FAR_EL1), let's add an optional annotation to the sysreg descriptor structure, indicating whether a register maps to the upper or lower 32bits of a register. Nothing is using these annotation yet. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 5a6fc30f59894..259864c3c76bb 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -27,6 +27,12 @@ struct sys_reg_desc { /* Sysreg string for debug */ const char *name; + enum { + AA32_ZEROHIGH, + AA32_LO, + AA32_HI, + } aarch32_map; + /* MRS/MSR instruction which accesses it. */ u8 Op0; u8 Op1; @@ -153,6 +159,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); +#define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x #define CRn(_x) .CRn = _x -- GitLab From b1ea1d760d3331da19e33650bf8c09ce028a0a49 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:14:20 +0000 Subject: [PATCH 0112/1894] KVM: arm64: Map AArch32 cp15 register to AArch64 sysregs Move all the cp15 registers over to their AArch64 counterpart. This requires the annotation of a few of them (such as the usual DFAR/IFAR vs FAR_EL1), and a new helper that generates mask/shift pairs for the various configurations. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 114 ++++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 48 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index afdf18d694cb5..ab66101c855e5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -128,6 +128,24 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift) +{ + switch (r->aarch32_map) { + case AA32_LO: + *mask = GENMASK_ULL(31, 0); + *shift = 0; + break; + case AA32_HI: + *mask = GENMASK_ULL(63, 32); + *shift = 32; + break; + default: + *mask = GENMASK_ULL(63, 0); + *shift = 0; + break; + } +} + /* * Generic accessor for VM registers. Only called as long as HCR_TVM * is set. If the guest enables the MMU, we stop trapping the VM @@ -138,26 +156,21 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { bool was_enabled = vcpu_has_cache_enabled(vcpu); - u64 val; - int reg = r->reg; + u64 val, mask, shift; BUG_ON(!p->is_write); - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; + get_access_mask(r, &mask, &shift); - if (!p->is_aarch32 || !p->is_32bit) { - val = p->regval; + if (~mask) { + val = vcpu_read_sys_reg(vcpu, r->reg); + val &= ~mask; } else { - val = vcpu_read_sys_reg(vcpu, reg); - if (r->reg % 2) - val = (p->regval << 32) | (u64)lower_32_bits(val); - else - val = ((u64)upper_32_bits(val) << 32) | - lower_32_bits(p->regval); + val = 0; } - vcpu_write_sys_reg(vcpu, val, reg); + + val |= (p->regval & (mask >> shift)) << shift; + vcpu_write_sys_reg(vcpu, val, r->reg); kvm_toggle_cache(vcpu, was_enabled); return true; @@ -167,17 +180,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + u64 mask, shift; + if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); - - if (p->is_aarch32) { - if (r->Op2 & 2) - p->regval = upper_32_bits(p->regval); - else - p->regval = lower_32_bits(p->regval); - } + get_access_mask(r, &mask, &shift); + p->regval = (vcpu_read_sys_reg(vcpu, r->reg) & mask) >> shift; return true; } @@ -1264,10 +1273,6 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { int reg = r->reg; - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; - if (p->is_write) vcpu_write_sys_reg(vcpu, p->regval, reg); else @@ -1919,20 +1924,29 @@ static const struct sys_reg_desc cp14_64_regs[] = { */ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 }, - { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, SCTLR_EL1 }, + /* ACTLR */ + { AA32(LO), Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr, NULL, ACTLR_EL1 }, + /* ACTLR2 */ + { AA32(HI), Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr, NULL, ACTLR_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, TTBR1_EL1 }, + /* TTBCR */ + { AA32(LO), Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, TCR_EL1 }, + /* TTBCR2 */ + { AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 }, + { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 }, + /* DFSR */ + { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 }, + /* ADFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, AFSR0_EL1 }, + /* AIFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, AFSR1_EL1 }, + /* DFAR */ + { AA32(LO), Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, FAR_EL1 }, + /* IFAR */ + { AA32(HI), Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, FAR_EL1 }, /* * DC{C,I,CI}SW operations: @@ -1958,15 +1972,19 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, - { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, - { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, - { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, - { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, + /* PRRR/MAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 }, + /* NMRR/MAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, MAIR_EL1 }, + /* AMAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, AMAIR_EL1 }, + /* AMAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 }, /* ICC_SRE */ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre }, - { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 }, /* Arch Tmers */ { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, @@ -2041,14 +2059,14 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, - { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR }, + { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 }, }; static const struct sys_reg_desc cp15_64_regs[] = { - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ - { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, -- GitLab From 1da42c34d7c42fe2840bfe3de83cd0b5aa374859 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:17:23 +0000 Subject: [PATCH 0113/1894] KVM: arm64: Map AArch32 cp14 register to AArch64 sysregs Similarly to what has been done on the cp15 front, repaint the debug registers to use their AArch64 counterparts. This results in some simplification as we can remove the 32bit-specific accessors. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 8 --- arch/arm64/kvm/sys_regs.c | 109 ++++++++++-------------------- 2 files changed, 37 insertions(+), 80 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c905c3fcaaa06..0d023e4f80a07 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -555,14 +555,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) return true; } -/* - * CP14 and CP15 live in the same array, as they are backed by the - * same system registers. - */ -#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) - -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) - struct kvm_vm_stat { ulong remote_tlb_flush; }; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ab66101c855e5..660ff6c18b2e1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -366,26 +366,30 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, */ static void reg_to_dbg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - u64 val = p->regval; + u64 mask, shift, val; - if (p->is_32bit) { - val &= 0xffffffffUL; - val |= ((*dbg_reg >> 32) << 32); - } + get_access_mask(rd, &mask, &shift); + val = *dbg_reg; + val &= ~mask; + val |= (p->regval & (mask >> shift)) << shift; *dbg_reg = val; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } static void dbg_to_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - p->regval = *dbg_reg; - if (p->is_32bit) - p->regval &= 0xffffffffUL; + u64 mask, shift; + + get_access_mask(rd, &mask, &shift); + p->regval = (*dbg_reg & mask) >> shift; } static bool trap_bvr(struct kvm_vcpu *vcpu, @@ -395,9 +399,9 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -437,9 +441,9 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -480,9 +484,9 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); @@ -523,9 +527,9 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -1744,66 +1748,27 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, } } -static bool trap_debug32(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) { - vcpu_cp14(vcpu, r->reg) = p->regval; - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = vcpu_cp14(vcpu, r->reg); - } - - return true; -} - -/* AArch32 debug register mappings +/* + * AArch32 debug register mappings * * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] * - * All control registers and watchpoint value registers are mapped to - * the lower 32 bits of their AArch64 equivalents. We share the trap - * handlers with the above AArch64 code which checks what mode the - * system is in. + * None of the other registers share their location, so treat them as + * if they were 64bit. */ - -static bool trap_xvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; - - if (p->is_write) { - u64 val = *dbg_reg; - - val &= 0xffffffffUL; - val |= p->regval << 32; - *dbg_reg = val; - - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = *dbg_reg >> 32; - } - - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); - - return true; -} - -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ - /* DBGWCRn */ \ +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ + /* DBGWCRn */ \ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } -#define DBGBXVR(n) \ - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } +#define DBGBXVR(n) \ + { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external @@ -1821,9 +1786,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug_regs, NULL, MDCCINT_EL1 }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug_regs, NULL, MDSCR_EL1 }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1838,7 +1803,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug_regs, NULL, DBGVCR32_EL2 }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), -- GitLab From 2d27fd784893a767ec4162afc6d8c86eec2d1bfe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:20:13 +0000 Subject: [PATCH 0114/1894] KVM: arm64: Drop is_32bit trap attribute The is_32bit attribute is now completely unused, drop it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 3 --- arch/arm64/kvm/sys_regs.h | 1 - arch/arm64/kvm/vgic-sys-reg-v3.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 660ff6c18b2e1..a4726cdbe16fa 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2176,7 +2176,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, int Rt2 = (esr >> 10) & 0x1f; params.is_aarch32 = true; - params.is_32bit = false; params.CRm = (esr >> 1) & 0xf; params.is_write = ((esr & 1) == 0); @@ -2227,7 +2226,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, int Rt = kvm_vcpu_sys_get_rt(vcpu); params.is_aarch32 = true; - params.is_32bit = true; params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = ((esr & 1) == 0); @@ -2322,7 +2320,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); params.is_aarch32 = false; - params.is_32bit = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 259864c3c76bb..8c4958d6b5ce6 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -20,7 +20,6 @@ struct sys_reg_params { u64 regval; bool is_write; bool is_aarch32; - bool is_32bit; /* Only valid if is_aarch32 is true */ }; struct sys_reg_desc { diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 2f92bdcb11885..806d6701a7da3 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -269,7 +269,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; params.is_aarch32 = false; - params.is_32bit = false; if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs))) @@ -289,7 +288,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; params.is_aarch32 = false; - params.is_32bit = false; r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs)); -- GitLab From 50f304532770c19a127b1e1b6769c0538abda58f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:20:49 +0000 Subject: [PATCH 0115/1894] KVM: arm64: Drop is_aarch32 trap attribute is_aarch32 is only used once, and can be trivially replaced by testing Op0 instead. Drop it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 7 ++----- arch/arm64/kvm/sys_regs.h | 1 - arch/arm64/kvm/vgic-sys-reg-v3.c | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a4726cdbe16fa..64fdfb64d7916 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -213,7 +213,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure * group. */ - if (p->is_aarch32) { + if (p->Op0 == 0) { /* AArch32 */ switch (p->Op1) { default: /* Keep GCC quiet */ case 0: /* ICC_SGI1R */ @@ -224,7 +224,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, g1 = false; break; } - } else { + } else { /* AArch64 */ switch (p->Op2) { default: /* Keep GCC quiet */ case 5: /* ICC_SGI1R_EL1 */ @@ -2175,7 +2175,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, int Rt = kvm_vcpu_sys_get_rt(vcpu); int Rt2 = (esr >> 10) & 0x1f; - params.is_aarch32 = true; params.CRm = (esr >> 1) & 0xf; params.is_write = ((esr & 1) == 0); @@ -2225,7 +2224,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - params.is_aarch32 = true; params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = ((esr & 1) == 0); @@ -2319,7 +2317,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); - params.is_aarch32 = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 8c4958d6b5ce6..416153b593a60 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -19,7 +19,6 @@ struct sys_reg_params { u8 Op2; u64 regval; bool is_write; - bool is_aarch32; }; struct sys_reg_desc { diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 806d6701a7da3..07d5271e9f052 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -268,7 +268,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs))) @@ -287,7 +286,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, if (is_write) params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs)); -- GitLab From 5f7e02aebdf0c8d255f3ff2df8595fd220e7d5ce Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:21:37 +0000 Subject: [PATCH 0116/1894] KVM: arm64: Drop legacy copro shadow register Finally remove one of the biggest 32bit legacy: the copro shadow mapping. We won't missit. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 48 +------------------------------ 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0d023e4f80a07..c527f9567713f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -201,49 +201,6 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; -/* 32bit mapping */ -#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ -#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ -#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ -#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ -#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ -#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ -#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ -#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ -#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ -#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ -#define c2_TTBCR2 (c2_TTBCR + 1) /* Translation Table Base Control R. 2 */ -#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ -#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ -#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ -#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ -#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ -#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ -#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ -#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ -#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ -#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ -#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ -#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ -#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ -#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ -#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ -#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ -#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ -#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ - -#define cp14_DBGDSCRext (MDSCR_EL1 * 2) -#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) -#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) -#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) -#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) -#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) -#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) -#define cp14_DBGVCR (DBGVCR32_EL2 * 2) - -#define NR_COPRO_REGS (NR_SYS_REGS * 2) - struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -254,10 +211,7 @@ struct kvm_cpu_context { struct user_fpsimd_state fp_regs; - union { - u64 sys_regs[NR_SYS_REGS]; - u32 copro[NR_COPRO_REGS]; - }; + u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; }; -- GitLab From 6ac4a5ac50d1d25a61aa00e660eebb21a2ff9b96 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Nov 2020 18:11:16 +0000 Subject: [PATCH 0117/1894] KVM: arm64: Drop kvm_coproc.h kvm_coproc.h used to serve as a compatibility layer for the files shared between the 32 and 64 bit ports. Another one bites the dust... Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_coproc.h | 38 ----------------------------- arch/arm64/include/asm/kvm_host.h | 17 +++++++++++++ arch/arm64/kvm/arm.c | 3 +-- arch/arm64/kvm/guest.c | 1 - arch/arm64/kvm/handle_exit.c | 1 - arch/arm64/kvm/reset.c | 1 - arch/arm64/kvm/sys_regs.c | 1 - 7 files changed, 18 insertions(+), 44 deletions(-) delete mode 100644 arch/arm64/include/asm/kvm_coproc.h diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h deleted file mode 100644 index d6bb40122fdbe..0000000000000 --- a/arch/arm64/include/asm/kvm_coproc.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * Derived from arch/arm/include/asm/kvm_coproc.h - * Copyright (C) 2012 Rusty Russell IBM Corporation - */ - -#ifndef __ARM64_KVM_COPROC_H__ -#define __ARM64_KVM_COPROC_H__ - -#include - -void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); - -struct kvm_sys_reg_table { - const struct sys_reg_desc *table; - size_t num; -}; - -int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); -int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); - -#define kvm_coproc_table_init kvm_sys_reg_table_init -void kvm_sys_reg_table_init(void); - -struct kvm_one_reg; -int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); -int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); - -#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c527f9567713f..709f892f7a142 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -533,6 +533,12 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); + int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); @@ -595,6 +601,17 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); int handle_exit(struct kvm_vcpu *vcpu, int exception_index); void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +void kvm_sys_reg_table_init(void); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e9..9d69d2bf6943f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -1525,7 +1524,7 @@ static int init_subsystems(void) goto out; kvm_perf_init(); - kvm_coproc_table_init(); + kvm_sys_reg_table_init(); out: on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f23f7478d2ad..9bbd30e62799a 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "trace.h" diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index f79137ee4274a..cebe39f3b1b64 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f32490229a4c7..74ce92a4988c1 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 64fdfb64d7916..d2e1d745f0676 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include -- GitLab From f9f16dfbe76e63ba9aec68055c08242b09be297e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:45 +0800 Subject: [PATCH 0118/1894] perf mem: Search event name with more flexible path The perf tool searches a memory event name under the folder '/sys/devices/cpu/events/', this leads to the limitation for the selection of a memory profiling event which must be under this folder. Thus it's impossible to use any other event as memory event which is not under this specific folder, e.g. Arm SPE hardware event is not located in '/sys/devices/cpu/events/' so it cannot be enabled for memory profiling. This patch changes to search folder from '/sys/devices/cpu/events/' to '/sys/devices', so it give flexibility to find events which can be used for memory profiling. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ea0af0bc4314a..35c8d175a9d29 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -18,8 +18,8 @@ unsigned int perf_mem_events__loads_ldlat = 30; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"), - E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"), + E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), + E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), }; #undef E @@ -93,7 +93,7 @@ int perf_mem_events__init(void) struct perf_mem_event *e = &perf_mem_events[j]; struct stat st; - scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s", + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, e->sysfs_name); if (!stat(path, &st)) -- GitLab From eaf6aaeec5fa301c0eb8ae92962909b15d075e5f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:46 +0800 Subject: [PATCH 0119/1894] perf mem: Introduce weak function perf_mem_events__ptr() Different architectures might use different event or different event parameters for memory profiling, this patch introduces a weak perf_mem_events__ptr() function which allows to return back a architecture specific memory event. Since the variable 'perf_mem_events' can be only accessed by the perf_mem_events__ptr() function, mark the variable as 'static', this allows the architectures to define its own memory event array. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 18 ++++++++++++------ tools/perf/builtin-mem.c | 21 ++++++++++++++------- tools/perf/util/mem-events.c | 26 +++++++++++++++++++------- tools/perf/util/mem-events.h | 2 +- 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d5bea5d3cd51a..4d1a08e382338 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2867,6 +2867,7 @@ static int perf_c2c__record(int argc, const char **argv) int ret; bool all_user = false, all_kernel = false; bool event_set = false; + struct perf_mem_event *e; struct option options[] = { OPT_CALLBACK('e', "event", &event_set, "event", "event selector. Use 'perf c2c record -e list' to list available events", @@ -2894,11 +2895,15 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "record"; if (!event_set) { - perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; - perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; } - if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + if (e->record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; @@ -2906,12 +2911,13 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "--sample-cpu"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - if (!perf_mem_events[j].record) + e = perf_mem_events__ptr(j); + if (!e->record) continue; - if (!perf_mem_events[j].supported) { + if (!e->supported) { pr_err("failed: event '%s' not supported\n", - perf_mem_events[j].name); + perf_mem_events__name(j)); free(rec_argv); return -1; } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 3523279af6aff..9a7df8d012960 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -64,6 +64,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) const char **rec_argv; int ret; bool all_user = false, all_kernel = false; + struct perf_mem_event *e; struct option options[] = { OPT_CALLBACK('e', "event", &mem, "event", "event selector. use 'perf mem record -e list' to list available events", @@ -86,13 +87,18 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) - perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + if (mem->operation & MEM_OPERATION_LOAD) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + } - if (mem->operation & MEM_OPERATION_STORE) - perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + if (mem->operation & MEM_OPERATION_STORE) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } - if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + if (e->record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; @@ -101,10 +107,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "--phys-data"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - if (!perf_mem_events[j].record) + e = perf_mem_events__ptr(j); + if (!e->record) continue; - if (!perf_mem_events[j].supported) { + if (!e->supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(j)); free(rec_argv); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 35c8d175a9d29..7a5a0d699e270 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -17,7 +17,7 @@ unsigned int perf_mem_events__loads_ldlat = 30; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } -struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), }; @@ -28,19 +28,31 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { static char mem_loads_name[100]; static bool mem_loads_name__init; +struct perf_mem_event * __weak perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} + char * __weak perf_mem_events__name(int i) { + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (!e) + return NULL; + if (i == PERF_MEM_EVENTS__LOAD) { if (!mem_loads_name__init) { mem_loads_name__init = true; scnprintf(mem_loads_name, sizeof(mem_loads_name), - perf_mem_events[i].name, - perf_mem_events__loads_ldlat); + e->name, perf_mem_events__loads_ldlat); } return mem_loads_name; } - return (char *)perf_mem_events[i].name; + return (char *)e->name; } int perf_mem_events__parse(const char *str) @@ -61,7 +73,7 @@ int perf_mem_events__parse(const char *str) while (tok) { for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); if (strstr(e->tag, tok)) e->record = found = true; @@ -90,7 +102,7 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { char path[PATH_MAX]; - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); struct stat st; scnprintf(path, PATH_MAX, "%s/devices/%s", @@ -108,7 +120,7 @@ void perf_mem_events__list(void) int j; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); fprintf(stderr, "%-13s%-*s%s\n", e->tag, diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 904dad34f7f7e..726a9c8103e40 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -31,13 +31,13 @@ enum { PERF_MEM_EVENTS__MAX, }; -extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; extern unsigned int perf_mem_events__loads_ldlat; int perf_mem_events__parse(const char *str); int perf_mem_events__init(void); char *perf_mem_events__name(int i); +struct perf_mem_event *perf_mem_events__ptr(int i); void perf_mem_events__list(void); -- GitLab From 4ba2452cd88f39da68a6dc05fcc95e8977fd6403 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:47 +0800 Subject: [PATCH 0120/1894] perf mem: Support new memory event PERF_MEM_EVENTS__LOAD_STORE On the architectures with perf memory profiling, two types of hardware events have been supported: load and store; if want to profile memory for both load and store operations, the tool will use these two events at the same time, the usage is: # perf mem record -t load,store -- uname But this cannot be applied for AUX tracing event, the same PMU event can be used to only trace memory load, or only memory store, or trace for both memory load and store. This patch introduces a new event PERF_MEM_EVENTS__LOAD_STORE, which is used to support the event which can record both memory load and store operations. When user specifies memory operation type as 'load,store', or doesn't set type so use 'load,store' as default, if the arch supports the event PERF_MEM_EVENTS__LOAD_STORE, the tool will convert the required operations to this single event; otherwise, if the arch doesn't support PERF_MEM_EVENTS__LOAD_STORE, the tool rolls back to enable both events PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE, which keeps the same behaviour with before. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 24 ++++++++++++++++++------ tools/perf/util/mem-events.c | 13 ++++++++++++- tools/perf/util/mem-events.h | 1 + 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 9a7df8d012960..21ebe0f47e644 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -87,14 +87,26 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); - e->record = true; - } + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); - if (mem->operation & MEM_OPERATION_STORE) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + /* + * The load and store operations are required, use the event + * PERF_MEM_EVENTS__LOAD_STORE if it is supported. + */ + if (e->tag && + (mem->operation & MEM_OPERATION_LOAD) && + (mem->operation & MEM_OPERATION_STORE)) { e->record = true; + } else { + if (mem->operation & MEM_OPERATION_LOAD) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + } + + if (mem->operation & MEM_OPERATION_STORE) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } } e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 7a5a0d699e270..19007e463b8ab 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -20,6 +20,7 @@ unsigned int perf_mem_events__loads_ldlat = 30; static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), + E(NULL, NULL, NULL), }; #undef E @@ -75,6 +76,9 @@ int perf_mem_events__parse(const char *str) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); + if (!e->tag) + continue; + if (strstr(e->tag, tok)) e->record = found = true; } @@ -105,6 +109,13 @@ int perf_mem_events__init(void) struct perf_mem_event *e = perf_mem_events__ptr(j); struct stat st; + /* + * If the event entry isn't valid, skip initialization + * and "e->supported" will keep false. + */ + if (!e->tag) + continue; + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, e->sysfs_name); @@ -123,7 +134,7 @@ void perf_mem_events__list(void) struct perf_mem_event *e = perf_mem_events__ptr(j); fprintf(stderr, "%-13s%-*s%s\n", - e->tag, + e->tag ?: "", verbose > 0 ? 25 : 0, verbose > 0 ? perf_mem_events__name(j) : "", e->supported ? ": available" : ""); diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 726a9c8103e40..5ef178278909b 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -28,6 +28,7 @@ struct mem_info { enum { PERF_MEM_EVENTS__LOAD, PERF_MEM_EVENTS__STORE, + PERF_MEM_EVENTS__LOAD_STORE, PERF_MEM_EVENTS__MAX, }; -- GitLab From 8b8173b45a7a9709cc2597548469708a8efbd0d9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:48 +0800 Subject: [PATCH 0121/1894] perf c2c: Support memory event PERF_MEM_EVENTS__LOAD_STORE When user doesn't specify event name, perf c2c tool enables both the load and store events, and this leads to failure for opening the duplicate PMU device for AUX trace. After the memory event PERF_MEM_EVENTS__LOAD_STORE is introduced, when the user doesn't specify event name, this patch converts the required operation to PERF_MEM_EVENTS__LOAD_STORE if the arch supports it. Otherwise, the tool still rolls back to enable events PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4d1a08e382338..98ae33eac6cc1 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2895,11 +2895,20 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "record"; if (!event_set) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); - e->record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); + /* + * The load and store operations are required, use the event + * PERF_MEM_EVENTS__LOAD_STORE if it is supported. + */ + if (e->tag) { + e->record = true; + } else { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; - e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); - e->record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } } e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); -- GitLab From 436cce00710a3f234ab6b735b5980256e773d388 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:49 +0800 Subject: [PATCH 0122/1894] perf mem: Only initialize memory event for recording It's needless to initialize memory events for reporting, this patch moves memory event initialization for only recording. Furthermore, the change allows to parse perf data on cross platforms, e.g. perf tool can report result properly even the machine doesn't support the memory events. Signed-off-by: Leo Yan Acked-by: Ian Rogers Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 21ebe0f47e644..72ce4b8fbb0f4 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -77,6 +77,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_END() }; + if (perf_mem_events__init()) { + pr_err("failed: memory events not supported\n"); + return -1; + } + argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_KEEP_UNKNOWN); @@ -441,11 +446,6 @@ int cmd_mem(int argc, const char **argv) NULL }; - if (perf_mem_events__init()) { - pr_err("failed: memory events not supported\n"); - return -1; - } - argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_KEEP_UNKNOWN); -- GitLab From 014a771c7867fda5b40a95e1c7bc1aa5ac704c91 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:50 +0800 Subject: [PATCH 0123/1894] perf auxtrace: Add itrace option '-M' for memory events This patch is to add itrace option '-M' to synthesize memory event. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 1 + tools/perf/util/auxtrace.c | 4 ++++ tools/perf/util/auxtrace.h | 2 ++ 3 files changed, 7 insertions(+) diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index d3740c8f399ba..079cdfabb3520 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -11,6 +11,7 @@ d create a debug log f synthesize first level cache events m synthesize last level cache events + M synthesize memory events t synthesize TLB events a synthesize remote access events g synthesize a call chain (use with i or x) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 42a85c86421d7..62e7f6c5f8b5d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1333,6 +1333,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->flc = true; synth_opts->llc = true; synth_opts->tlb = true; + synth_opts->mem = true; synth_opts->remote_access = true; if (no_sample) { @@ -1554,6 +1555,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'a': synth_opts->remote_access = true; break; + case 'M': + synth_opts->mem = true; + break; case 'q': synth_opts->quick += 1; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 951d2d14cf249..7e5c9e1552bd4 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -88,6 +88,7 @@ enum itrace_period_type { * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events * @remote_access: whether to synthesize remote access events + * @mem: whether to synthesize memory events * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -126,6 +127,7 @@ struct itrace_synth_opts { bool llc; bool tlb; bool remote_access; + bool mem; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; -- GitLab From 13e5df1e3f1ba1a90944362bc57690ea1369b3b7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:51 +0800 Subject: [PATCH 0124/1894] perf mem: Support AUX trace The 'perf mem' tool doesn't support AUX trace data so it cannot receive the hardware tracing data. On arm64, although it doesn't support PMU events for memory load and store, ARM SPE is a good candidate for memory profiling, the hardware tracer can record memory accessing operations with affiliated information (e.g. physical address and virtual address for accessing, cache levels, TLB walking, latency, etc). To allow "perf mem" tool to support AUX trace, this patch adds the AUX callbacks for session structure; make itrace memory event as default for "perf mem", this tells the AUX trace decoder to synthesize memory samples. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 72ce4b8fbb0f4..fdfbff7592f43 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,6 +7,7 @@ #include "perf.h" #include +#include "util/auxtrace.h" #include "util/trace-event.h" #include "util/tool.h" #include "util/session.h" @@ -255,6 +256,12 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { + struct itrace_synth_opts itrace_synth_opts = { + .set = true, + .mem = true, /* Only enable memory event */ + .default_no_sample = true, + }; + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, @@ -267,6 +274,8 @@ static int report_raw_events(struct perf_mem *mem) if (IS_ERR(session)) return PTR_ERR(session); + session->itrace_synth_opts = &itrace_synth_opts; + if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, mem->cpu_bitmap); @@ -410,8 +419,12 @@ int cmd_mem(int argc, const char **argv) .comm = perf_event__process_comm, .lost = perf_event__process_lost, .fork = perf_event__process_fork, + .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, .namespaces = perf_event__process_namespaces, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, }, .input_name = "perf.data", -- GitLab From c825f7885178f994a2a00ca02016940d94aaed6e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:52 +0800 Subject: [PATCH 0125/1894] perf c2c: Support AUX trace This patch adds the AUX callbacks in session structure, so support AUX trace for "perf c2c" tool; make itrace memory event as default for "perf c2c", this tells the AUX trace decoder to synthesize samples and can be used for statistics. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-9-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 98ae33eac6cc1..c5babeaa3b389 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -369,6 +369,10 @@ static struct perf_c2c c2c = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, + .attr = perf_event__process_attr, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -2678,6 +2682,12 @@ static int setup_coalesce(const char *coalesce, bool no_source) static int perf_c2c__report(int argc, const char **argv) { + struct itrace_synth_opts itrace_synth_opts = { + .set = true, + .mem = true, /* Only enable memory event */ + .default_no_sample = true, + }; + struct perf_session *session; struct ui_progress prog; struct perf_data data = { @@ -2757,6 +2767,8 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } + session->itrace_synth_opts = &itrace_synth_opts; + err = setup_nodes(session); if (err) { pr_err("Failed setup nodes\n"); -- GitLab From 40714c58630aaaf1eb3acc431fe206a6b36a03d6 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:53 +0800 Subject: [PATCH 0126/1894] perf mem: Support ARM SPE events This patch adds ARM SPE events for perf memory profiling: 'spe-load': event for only recording memory load ops; 'spe-store': event for only recording memory store ops; 'spe-ldst': event for recording memory load and store ops. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-10-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 2 +- tools/perf/arch/arm64/util/mem-events.c | 37 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/arm64/util/mem-events.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 8d2b9bcfffcaf..ead2f2275eee7 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -10,4 +10,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ ../../arm/util/cs-etm.o \ - arm-spe.o + arm-spe.o mem-events.o diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c new file mode 100644 index 0000000000000..2a24973726712 --- /dev/null +++ b/tools/perf/arch/arm64/util/mem-events.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "map_symbol.h" +#include "mem-events.h" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("spe-load", "arm_spe_0/ts_enable=1,load_filter=1,store_filter=0,min_latency=%u/", "arm_spe_0"), + E("spe-store", "arm_spe_0/ts_enable=1,load_filter=0,store_filter=1/", "arm_spe_0"), + E("spe-ldst", "arm_spe_0/ts_enable=1,load_filter=1,store_filter=1,min_latency=%u/", "arm_spe_0"), +}; + +static char mem_ev_name[100]; + +struct perf_mem_event *perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} + +char *perf_mem_events__name(int i) +{ + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) + scnprintf(mem_ev_name, sizeof(mem_ev_name), + e->name, perf_mem_events__loads_ldlat); + else /* PERF_MEM_EVENTS__STORE */ + scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name); + + return mem_ev_name; +} -- GitLab From c185f1cde46653cd0a7a1eaf461d16c462870781 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:28 +0800 Subject: [PATCH 0127/1894] perf arm-spe: Include bitops.h for BIT() macro Include header linux/bitops.h, directly use its BIT() macro and remove the self defined macros. Committer notes: Use BIT_ULL() instead of BIT to build on 32-bit arches as mentioned in review by Andre Przywara . I noticed the build failure when crossbuilding to arm32 from x86_64. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 5 +---- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 7 +++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 93e063f22be50..cc18a1e8c212b 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -21,10 +22,6 @@ #include "arm-spe-decoder.h" -#ifndef BIT -#define BIT(n) (1UL << (n)) -#endif - static u64 arm_spe_calc_ip(int index, u64 payload) { u8 *addr = (u8 *)&payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index b94001b756c79..5f65a3a70c577 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -8,13 +8,12 @@ #include #include #include +#include #include "arm-spe-pkt-decoder.h" -#define BIT(n) (1ULL << (n)) - -#define NS_FLAG BIT(63) -#define EL_FLAG (BIT(62) | BIT(61)) +#define NS_FLAG BIT_ULL(63) +#define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) #define SPE_HEADER0_PAD 0x0 #define SPE_HEADER0_END 0x1 -- GitLab From 903b659436b706928934ff5ef59d591267e5ce1a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:29 +0800 Subject: [PATCH 0128/1894] perf arm-spe: Fix a typo in comment Fix a typo: s/iff/if. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 5f65a3a70c577..12a96585da949 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -142,7 +142,7 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, /* we use index to identify Events with a less number of * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS, - * LLC-REFILL, and REMOTE-ACCESS events are identified iff + * LLC-REFILL, and REMOTE-ACCESS events are identified if * index > 1. */ packet->index = ret - 1; -- GitLab From b2ded2e2e2764e502fc025f615210434f1eaa2a9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:30 +0800 Subject: [PATCH 0129/1894] perf arm-spe: Refactor payload size calculation This patch defines macro to extract "sz" field from header, and renames the function payloadlen() to arm_spe_payload_len(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 12a96585da949..a8eb7be189ecd 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -69,22 +69,22 @@ const char *arm_spe_pkt_name(enum arm_spe_pkt_type type) return arm_spe_packet_name[type]; } -/* return ARM SPE payload size from its encoding, - * which is in bits 5:4 of the byte. - * 00 : byte - * 01 : halfword (2) - * 10 : word (4) - * 11 : doubleword (8) +/* + * Extracts the field "sz" from header bits and converts to bytes: + * 00 : byte (1) + * 01 : halfword (2) + * 10 : word (4) + * 11 : doubleword (8) */ -static int payloadlen(unsigned char byte) +static unsigned int arm_spe_payload_len(unsigned char hdr) { - return 1 << ((byte & 0x30) >> 4); + return 1U << ((hdr & GENMASK_ULL(5, 4)) >> 4); } static int arm_spe_get_payload(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { - size_t payload_len = payloadlen(buf[0]); + size_t payload_len = arm_spe_payload_len(buf[0]); if (len < 1 + payload_len) return ARM_SPE_NEED_MORE_BYTES; -- GitLab From b65577baf482909225c79d8a6bad44d2a62751f4 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:31 +0800 Subject: [PATCH 0130/1894] perf arm-spe: Refactor arm_spe_get_events() In function arm_spe_get_events(), the event packet's 'index' is assigned as payload length, but the flow is not directive: it firstly gets the packet length from the return value of arm_spe_get_payload(), the value includes header length (1) and payload length: int ret = arm_spe_get_payload(buf, len, packet); and then reduces header length from packet length, so finally get the payload length: packet->index = ret - 1; To simplify the code, this patch directly assigns payload length to event packet's index; and at the end it calls arm_spe_get_payload() to return the payload value. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index a8eb7be189ecd..57904da89db1f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -136,8 +136,6 @@ static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, static int arm_spe_get_events(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { - int ret = arm_spe_get_payload(buf, len, packet); - packet->type = ARM_SPE_EVENTS; /* we use index to identify Events with a less number of @@ -145,9 +143,9 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, * LLC-REFILL, and REMOTE-ACCESS events are identified if * index > 1. */ - packet->index = ret - 1; + packet->index = arm_spe_payload_len(buf[0]); - return ret; + return arm_spe_get_payload(buf, len, packet); } static int arm_spe_get_data_source(const unsigned char *buf, size_t len, -- GitLab From 0a04244cabc5560ce1e08555e8712a4cd20ab6ce Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:32 +0800 Subject: [PATCH 0131/1894] perf arm-spe: Fix packet length handling When processing address packet and counter packet, if the packet contains extended header, it misses to account the extra one byte for header length calculation, thus returns the wrong packet length. To correct the packet length calculation, one possible fixing is simply to plus extra 1 for extended header, but will spread some duplicate code in the flows for processing address packet and counter packet. Alternatively, we can refine the function arm_spe_get_payload() to not only support short header and allow it to support extended header, and rely on it for the packet length calculation. So this patch refactors function arm_spe_get_payload() with a new argument 'ext_hdr' for support extended header; the packet processing flows can invoke this function to unify the packet length calculation. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 34 +++++++------------ 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 57904da89db1f..671a4763fb47b 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -82,14 +82,15 @@ static unsigned int arm_spe_payload_len(unsigned char hdr) } static int arm_spe_get_payload(const unsigned char *buf, size_t len, + unsigned char ext_hdr, struct arm_spe_pkt *packet) { - size_t payload_len = arm_spe_payload_len(buf[0]); + size_t payload_len = arm_spe_payload_len(buf[ext_hdr]); - if (len < 1 + payload_len) + if (len < 1 + ext_hdr + payload_len) return ARM_SPE_NEED_MORE_BYTES; - buf++; + buf += 1 + ext_hdr; switch (payload_len) { case 1: packet->payload = *(uint8_t *)buf; break; @@ -99,7 +100,7 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len, default: return ARM_SPE_BAD_PACKET; } - return 1 + payload_len; + return 1 + ext_hdr + payload_len; } static int arm_spe_get_pad(struct arm_spe_pkt *packet) @@ -130,7 +131,7 @@ static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_TIMESTAMP; - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_events(const unsigned char *buf, size_t len, @@ -145,14 +146,14 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, */ packet->index = arm_spe_payload_len(buf[0]); - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_data_source(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_DATA_SOURCE; - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_context(const unsigned char *buf, size_t len, @@ -160,8 +161,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len, { packet->type = ARM_SPE_CONTEXT; packet->index = buf[0] & 0x3; - - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_op_type(const unsigned char *buf, size_t len, @@ -169,41 +169,31 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len, { packet->type = ARM_SPE_OP_TYPE; packet->index = buf[0] & 0x3; - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_counter(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { - if (len < 2) - return ARM_SPE_NEED_MORE_BYTES; - packet->type = ARM_SPE_COUNTER; if (ext_hdr) packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); else packet->index = buf[0] & 0x7; - packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); - - return 1 + ext_hdr + 2; + return arm_spe_get_payload(buf, len, ext_hdr, packet); } static int arm_spe_get_addr(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { - if (len < 8) - return ARM_SPE_NEED_MORE_BYTES; - packet->type = ARM_SPE_ADDRESS; if (ext_hdr) packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); else packet->index = buf[0] & 0x7; - memcpy_le64(&packet->payload, buf + 1, 8); - - return 1 + ext_hdr + 8; + return arm_spe_get_payload(buf, len, ext_hdr, packet); } static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, -- GitLab From 735e8d93dc2b107f7891a9c2b1c4cfbea1fcbbbc Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 6 Nov 2020 21:46:11 +0100 Subject: [PATCH 0132/1894] ARM: 9022/1: Change arch/arm/lib/mem*.S to use WEAK instead of .weak Commit d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for KASan") add .weak directives to memcpy/memmove/memset to avoid collision with KASAN interceptors. This does not work with LLVM's integrated assembler (the assembly snippet `.weak memcpy ... .globl memcpy` produces a STB_GLOBAL memcpy while GNU as produces a STB_WEAK memcpy). LLVM 12 (since https://reviews.llvm.org/D90108) will error on such an overridden symbol binding. Use the appropriate WEAK macro instead. Link: https://github.com/ClangBuiltLinux/linux/issues/1190 -- Fixes: d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for KASan") Reported-by: Nick Desaulniers Signed-off-by: Fangrui Song Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Russell King --- arch/arm/lib/memcpy.S | 3 +-- arch/arm/lib/memmove.S | 3 +-- arch/arm/lib/memset.S | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index ad4625d16e117..e4caf48c089f2 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -58,10 +58,9 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ -.weak memcpy ENTRY(__memcpy) ENTRY(mmiocpy) -ENTRY(memcpy) +WEAK(memcpy) #include "copy_template.S" diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index fd123ea5a5a4a..6fecc12a1f51a 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -24,9 +24,8 @@ * occurring in the opposite direction. */ -.weak memmove ENTRY(__memmove) -ENTRY(memmove) +WEAK(memmove) UNWIND( .fnstart ) subs ip, r0, r1 diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 0e7ff0423f50b..9817cb258c1a5 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -13,10 +13,9 @@ .text .align 5 -.weak memset ENTRY(__memset) ENTRY(mmioset) -ENTRY(memset) +WEAK(memset) UNWIND( .fnstart ) ands r3, r0, #3 @ 1 unaligned? mov ip, r0 @ preserve r0 as return value -- GitLab From df8eda0f1f58e2419875046f57c27c4d72378575 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:59:30 +0100 Subject: [PATCH 0133/1894] ARM: 9023/1: Spelling s/mmeory/memory/ Fix a misspelling of the word "memory". Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d32f7652a5bf4..f9de7658d9b3a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1136,7 +1136,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* * Make sure the calculation for lowmem/highmem is set appropriately - * before reserving/allocating any mmeory + * before reserving/allocating any memory */ adjust_lowmem_bounds(); arm_memblock_init(mdesc); -- GitLab From 730b5764ea8526e48bdb85a24ed96d62de435940 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:58:41 +0100 Subject: [PATCH 0134/1894] ARM: 9024/1: Drop useless cast of "u64" to "long long" As "u64" is equivalent to "unsigned long long", there is no need to cast a "u64" parameter for printing it using the "0x%08llx" format specifier. Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f9de7658d9b3a..1a5edf562e85e 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -765,7 +765,7 @@ int __init arm_add_memory(u64 start, u64 size) #ifndef CONFIG_PHYS_ADDR_T_64BIT if (aligned_start > ULONG_MAX) { pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n", - (long long)start); + start); return -EINVAL; } -- GitLab From 853c1a789f5fe8e783586a5c2dcc2ad1b57ac20f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 30 Oct 2020 16:25:23 -0700 Subject: [PATCH 0135/1894] platform/chrome: Don't treat RTC events as wakeup sources The EC sends an RTC host event when the RTC fires, but we don't need to treat that as a wakeup event here. The RTC class already properly handles activating and deactivating a wakeup source in rtc_update_irq() by calling pm_stay_awake() at the start of processing and pm_relax() once all expired RTC timers have been processed. This reduces one wakeup increment but not much else. I noticed this while debugging RTC wakeups and how they always incremented the wakeup count by two instead of one because this is duplicated. Signed-off-by: Stephen Boyd Signed-off-by: Enric Balletbo i Serra Cc: Guenter Roeck Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: Link: https://lore.kernel.org/r/20201030232523.2654478-1-swboyd@chromium.org --- drivers/platform/chrome/cros_ec_proto.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 0ecee8b8773d0..7c92a6e22d75d 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -742,12 +742,16 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, * Sensor events need to be parsed by the sensor sub-device. * Defer them, and don't report the wakeup here. */ - if (event_type == EC_MKBP_EVENT_SENSOR_FIFO) - *wake_event = false; - /* Masked host-events should not count as wake events. */ - else if (host_event && - !(host_event & ec_dev->host_event_wake_mask)) + if (event_type == EC_MKBP_EVENT_SENSOR_FIFO) { *wake_event = false; + } else if (host_event) { + /* rtc_update_irq() already handles wakeup events. */ + if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_RTC)) + *wake_event = false; + /* Masked host-events should not count as wake events. */ + if (!(host_event & ec_dev->host_event_wake_mask)) + *wake_event = false; + } } return ret; -- GitLab From 0498710be002b35bcb43895c4133a4c4bbfd837e Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:30 -0700 Subject: [PATCH 0136/1894] platform/chrome: cros_ec_typec: Relocate set_port_params_v*() functions Move the cros_typec_set_port_params_v0/v1() functions closer to the place where they are called, cros_typec_port_update(). While we are performing the relocation, also move cros_typec_get_mux_info() closer to its call-site. No functional changes are introduced by this commit. Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201029222738.482366-2-pmalani@chromium.org --- drivers/platform/chrome/cros_ec_typec.c | 136 ++++++++++++------------ 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index 31be31161350e..49083e21317df 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -329,74 +329,6 @@ static int cros_typec_ec_command(struct cros_typec_data *typec, return ret; } -static void cros_typec_set_port_params_v0(struct cros_typec_data *typec, - int port_num, struct ec_response_usb_pd_control *resp) -{ - struct typec_port *port = typec->ports[port_num]->port; - enum typec_orientation polarity; - - if (!resp->enabled) - polarity = TYPEC_ORIENTATION_NONE; - else if (!resp->polarity) - polarity = TYPEC_ORIENTATION_NORMAL; - else - polarity = TYPEC_ORIENTATION_REVERSE; - - typec_set_pwr_role(port, resp->role ? TYPEC_SOURCE : TYPEC_SINK); - typec_set_orientation(port, polarity); -} - -static void cros_typec_set_port_params_v1(struct cros_typec_data *typec, - int port_num, struct ec_response_usb_pd_control_v1 *resp) -{ - struct typec_port *port = typec->ports[port_num]->port; - enum typec_orientation polarity; - bool pd_en; - int ret; - - if (!(resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED)) - polarity = TYPEC_ORIENTATION_NONE; - else if (!resp->polarity) - polarity = TYPEC_ORIENTATION_NORMAL; - else - polarity = TYPEC_ORIENTATION_REVERSE; - typec_set_orientation(port, polarity); - typec_set_data_role(port, resp->role & PD_CTRL_RESP_ROLE_DATA ? - TYPEC_HOST : TYPEC_DEVICE); - typec_set_pwr_role(port, resp->role & PD_CTRL_RESP_ROLE_POWER ? - TYPEC_SOURCE : TYPEC_SINK); - typec_set_vconn_role(port, resp->role & PD_CTRL_RESP_ROLE_VCONN ? - TYPEC_SOURCE : TYPEC_SINK); - - /* Register/remove partners when a connect/disconnect occurs. */ - if (resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED) { - if (typec->ports[port_num]->partner) - return; - - pd_en = resp->enabled & PD_CTRL_RESP_ENABLED_PD_CAPABLE; - ret = cros_typec_add_partner(typec, port_num, pd_en); - if (ret) - dev_warn(typec->dev, - "Failed to register partner on port: %d\n", - port_num); - } else { - if (!typec->ports[port_num]->partner) - return; - cros_typec_remove_partner(typec, port_num); - } -} - -static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num, - struct ec_response_usb_pd_mux_info *resp) -{ - struct ec_params_usb_pd_mux_info req = { - .port = port_num, - }; - - return cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_MUX_INFO, &req, - sizeof(req), resp, sizeof(*resp)); -} - static int cros_typec_usb_safe_state(struct cros_typec_port *port) { port->state.mode = TYPEC_STATE_SAFE; @@ -573,6 +505,74 @@ static int cros_typec_configure_mux(struct cros_typec_data *typec, int port_num, return ret; } +static void cros_typec_set_port_params_v0(struct cros_typec_data *typec, + int port_num, struct ec_response_usb_pd_control *resp) +{ + struct typec_port *port = typec->ports[port_num]->port; + enum typec_orientation polarity; + + if (!resp->enabled) + polarity = TYPEC_ORIENTATION_NONE; + else if (!resp->polarity) + polarity = TYPEC_ORIENTATION_NORMAL; + else + polarity = TYPEC_ORIENTATION_REVERSE; + + typec_set_pwr_role(port, resp->role ? TYPEC_SOURCE : TYPEC_SINK); + typec_set_orientation(port, polarity); +} + +static void cros_typec_set_port_params_v1(struct cros_typec_data *typec, + int port_num, struct ec_response_usb_pd_control_v1 *resp) +{ + struct typec_port *port = typec->ports[port_num]->port; + enum typec_orientation polarity; + bool pd_en; + int ret; + + if (!(resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED)) + polarity = TYPEC_ORIENTATION_NONE; + else if (!resp->polarity) + polarity = TYPEC_ORIENTATION_NORMAL; + else + polarity = TYPEC_ORIENTATION_REVERSE; + typec_set_orientation(port, polarity); + typec_set_data_role(port, resp->role & PD_CTRL_RESP_ROLE_DATA ? + TYPEC_HOST : TYPEC_DEVICE); + typec_set_pwr_role(port, resp->role & PD_CTRL_RESP_ROLE_POWER ? + TYPEC_SOURCE : TYPEC_SINK); + typec_set_vconn_role(port, resp->role & PD_CTRL_RESP_ROLE_VCONN ? + TYPEC_SOURCE : TYPEC_SINK); + + /* Register/remove partners when a connect/disconnect occurs. */ + if (resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED) { + if (typec->ports[port_num]->partner) + return; + + pd_en = resp->enabled & PD_CTRL_RESP_ENABLED_PD_CAPABLE; + ret = cros_typec_add_partner(typec, port_num, pd_en); + if (ret) + dev_warn(typec->dev, + "Failed to register partner on port: %d\n", + port_num); + } else { + if (!typec->ports[port_num]->partner) + return; + cros_typec_remove_partner(typec, port_num); + } +} + +static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num, + struct ec_response_usb_pd_mux_info *resp) +{ + struct ec_params_usb_pd_mux_info req = { + .port = port_num, + }; + + return cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_MUX_INFO, &req, + sizeof(req), resp, sizeof(*resp)); +} + static int cros_typec_port_update(struct cros_typec_data *typec, int port_num) { struct ec_params_usb_pd_control req; -- GitLab From 7ab5a673f4ce65875c76e9812d2e6da063b87fb7 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:32 -0700 Subject: [PATCH 0137/1894] platform/chrome: cros_ec_typec: Fix remove partner logic The cros_unregister_ports() function can be called in situations where the partner has not been registered yet, and so its related data structures would not have been initialized. Calling cros_typec_remove_partner() in such a situation can lead to null pointer dereferences. So, only call cros_typec_remove_partner() if there is a valid registered partner pointer. Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201029222738.482366-3-pmalani@chromium.org --- drivers/platform/chrome/cros_ec_typec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index 49083e21317df..2665d81259104 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -190,7 +190,10 @@ static void cros_unregister_ports(struct cros_typec_data *typec) for (i = 0; i < typec->num_ports; i++) { if (!typec->ports[i]) continue; - cros_typec_remove_partner(typec, i); + + if (typec->ports[i]->partner) + cros_typec_remove_partner(typec, i); + usb_role_switch_put(typec->ports[i]->role_sw); typec_switch_put(typec->ports[i]->ori_sw); typec_mux_put(typec->ports[i]->mux); -- GitLab From 514acf1cefd020eb21d7c180050a8d66b723d2d8 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:34 -0700 Subject: [PATCH 0138/1894] platform/chrome: cros_ec_typec: Clear partner identity on device removal The partner identity struct isn't reset when a partner is removed, meaning a subsequent partner can inherit an old partner's identity VDOs before discovery is complete. So, clear that struct when a partner removal is detected. Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201029222738.482366-4-pmalani@chromium.org --- drivers/platform/chrome/cros_ec_typec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index 2665d81259104..faef56bcb9c54 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -181,6 +181,7 @@ static void cros_typec_remove_partner(struct cros_typec_data *typec, typec_unregister_partner(port->partner); port->partner = NULL; + memset(&port->p_identity, 0, sizeof(port->p_identity)); } static void cros_unregister_ports(struct cros_typec_data *typec) -- GitLab From cd2c40ff90b0e385c18f881ab5e17f7137864223 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:36 -0700 Subject: [PATCH 0139/1894] platform/chrome: cros_ec: Import Type C host commands Import the EC_CMD_TYPEC_STATUS and EC_CMD_TYPEC_DISCOVERY Chrome OS EC host commands from the EC code base [1]. These commands can be used by the application processor to query Power Delivery (PD) discovery information concerning connected Type C peripherals. Also add the EC_FEATURE_TYPEC_CMD feature flag, which is used to determine whether these commands are supported by the EC. [1]: https://chromium.googlesource.com/chromiumos/platform/ec/+/refs/heads/master/include/ec_commands.h Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201029222738.482366-5-pmalani@chromium.org --- .../linux/platform_data/cros_ec_commands.h | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 1fcfe9e63cb9b..7f54fdcdd8cb5 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1284,6 +1284,8 @@ enum ec_feature_code { EC_FEATURE_SCP = 39, /* The MCU is an Integrated Sensor Hub */ EC_FEATURE_ISH = 40, + /* New TCPMv2 TYPEC_ prefaced commands supported */ + EC_FEATURE_TYPEC_CMD = 41, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) @@ -5528,6 +5530,159 @@ struct ec_response_regulator_get_voltage { uint32_t voltage_mv; } __ec_align4; +/* + * Gather all discovery information for the given port and partner type. + * + * Note that if discovery has not yet completed, only the currently completed + * responses will be filled in. If the discovery data structures are changed + * in the process of the command running, BUSY will be returned. + * + * VDO field sizes are set to the maximum possible number of VDOs a VDM may + * contain, while the number of SVIDs here is selected to fit within the PROTO2 + * maximum parameter size. + */ +#define EC_CMD_TYPEC_DISCOVERY 0x0131 + +enum typec_partner_type { + TYPEC_PARTNER_SOP = 0, + TYPEC_PARTNER_SOP_PRIME = 1, +}; + +struct ec_params_typec_discovery { + uint8_t port; + uint8_t partner_type; /* enum typec_partner_type */ +} __ec_align1; + +struct svid_mode_info { + uint16_t svid; + uint16_t mode_count; /* Number of modes partner sent */ + uint32_t mode_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ +}; + +struct ec_response_typec_discovery { + uint8_t identity_count; /* Number of identity VDOs partner sent */ + uint8_t svid_count; /* Number of SVIDs partner sent */ + uint16_t reserved; + uint32_t discovery_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ + struct svid_mode_info svids[0]; +} __ec_align1; + +/* + * Gather all status information for a port. + * + * Note: this covers many of the return fields from the deprecated + * EC_CMD_USB_PD_CONTROL command, except those that are redundant with the + * discovery data. The "enum pd_cc_states" is defined with the deprecated + * EC_CMD_USB_PD_CONTROL command. + * + * This also combines in the EC_CMD_USB_PD_MUX_INFO flags. + */ +#define EC_CMD_TYPEC_STATUS 0x0133 + +/* + * Power role. + * + * Note this is also used for PD header creation, and values align to those in + * the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.4 Port Power Role). + */ +enum pd_power_role { + PD_ROLE_SINK = 0, + PD_ROLE_SOURCE = 1 +}; + +/* + * Data role. + * + * Note this is also used for PD header creation, and the first two values + * align to those in the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.6 Port Data Role). + */ +enum pd_data_role { + PD_ROLE_UFP = 0, + PD_ROLE_DFP = 1, + PD_ROLE_DISCONNECTED = 2, +}; + +enum pd_vconn_role { + PD_ROLE_VCONN_OFF = 0, + PD_ROLE_VCONN_SRC = 1, +}; + +/* + * Note: BIT(0) may be used to determine whether the polarity is CC1 or CC2, + * regardless of whether a debug accessory is connected. + */ +enum tcpc_cc_polarity { + /* + * _CCx: is used to indicate the polarity while not connected to + * a Debug Accessory. Only one CC line will assert a resistor and + * the other will be open. + */ + POLARITY_CC1 = 0, + POLARITY_CC2 = 1, + + /* + * _CCx_DTS is used to indicate the polarity while connected to a + * SRC Debug Accessory. Assert resistors on both lines. + */ + POLARITY_CC1_DTS = 2, + POLARITY_CC2_DTS = 3, + + /* + * The current TCPC code relies on these specific POLARITY values. + * Adding in a check to verify if the list grows for any reason + * that this will give a hint that other places need to be + * adjusted. + */ + POLARITY_COUNT +}; + +#define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) +#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) + +struct ec_params_typec_status { + uint8_t port; +} __ec_align1; + +struct ec_response_typec_status { + uint8_t pd_enabled; /* PD communication enabled - bool */ + uint8_t dev_connected; /* Device connected - bool */ + uint8_t sop_connected; /* Device is SOP PD capable - bool */ + uint8_t source_cap_count; /* Number of Source Cap PDOs */ + + uint8_t power_role; /* enum pd_power_role */ + uint8_t data_role; /* enum pd_data_role */ + uint8_t vconn_role; /* enum pd_vconn_role */ + uint8_t sink_cap_count; /* Number of Sink Cap PDOs */ + + uint8_t polarity; /* enum tcpc_cc_polarity */ + uint8_t cc_state; /* enum pd_cc_states */ + uint8_t dp_pin; /* DP pin mode (MODE_DP_IN_[A-E]) */ + uint8_t mux_state; /* USB_PD_MUX* - encoded mux state */ + + char tc_state[32]; /* TC state name */ + + uint32_t events; /* PD_STATUS_EVENT bitmask */ + + /* + * BCD PD revisions for partners + * + * The format has the PD major reversion in the upper nibble, and PD + * minor version in the next nibble. Following two nibbles are + * currently 0. + * ex. PD 3.2 would map to 0x3200 + * + * PD major/minor will be 0 if no PD device is connected. + */ + uint16_t sop_revision; + uint16_t sop_prime_revision; + + uint32_t source_cap_pdos[7]; /* Max 7 PDOs can be present */ + + uint32_t sink_cap_pdos[7]; /* Max 7 PDOs can be present */ +} __ec_align1; + /*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ -- GitLab From 80f8cef60d79f23c02e546ba3de2fce84d5e8bdb Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:38 -0700 Subject: [PATCH 0140/1894] platform/chrome: cros_ec_typec: Introduce TYPEC_STATUS Make a call to the newly introduced EC_CMD_TYPEC_STATUS command. Currently we just check to see if the SOP (port-partner) discovery was done and emit a debug level print for it. Subsequent patches will retrieve and parse the discovery data and fill out the Type C connector class data structures. Also check the EC_FEATURE_TYPEC_CMD feature flag at probe, and only call the new TYPEC_STATUS command if the feature flag is supported. Reported-by: kernel test robot Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201029222738.482366-6-pmalani@chromium.org --- drivers/platform/chrome/cros_ec_typec.c | 52 +++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index faef56bcb9c54..f578d0bfbe5a7 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -48,6 +48,9 @@ struct cros_typec_port { /* Port alt modes. */ struct typec_altmode p_altmode[CROS_EC_ALTMODE_MAX]; + + /* Flag indicating that PD discovery data parsing is completed. */ + bool disc_done; }; /* Platform-specific data for the Chrome OS EC Type C controller. */ @@ -60,6 +63,7 @@ struct cros_typec_data { struct cros_typec_port *ports[EC_USB_PD_MAX_PORTS]; struct notifier_block nb; struct work_struct port_work; + bool typec_cmd_supported; }; static int cros_typec_parse_port_props(struct typec_capability *cap, @@ -182,6 +186,7 @@ static void cros_typec_remove_partner(struct cros_typec_data *typec, typec_unregister_partner(port->partner); port->partner = NULL; memset(&port->p_identity, 0, sizeof(port->p_identity)); + port->disc_done = false; } static void cros_unregister_ports(struct cros_typec_data *typec) @@ -577,6 +582,31 @@ static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num, sizeof(req), resp, sizeof(*resp)); } +static void cros_typec_handle_status(struct cros_typec_data *typec, int port_num) +{ + struct ec_response_typec_status resp; + struct ec_params_typec_status req = { + .port = port_num, + }; + int ret; + + ret = cros_typec_ec_command(typec, 0, EC_CMD_TYPEC_STATUS, &req, sizeof(req), + &resp, sizeof(resp)); + if (ret < 0) { + dev_warn(typec->dev, "EC_CMD_TYPEC_STATUS failed for port: %d\n", port_num); + return; + } + + if (typec->ports[port_num]->disc_done) + return; + + /* Handle any events appropriately. */ + if (resp.events & PD_STATUS_EVENT_SOP_DISC_DONE) { + dev_dbg(typec->dev, "SOP Discovery done for port: %d\n", port_num); + typec->ports[port_num]->disc_done = true; + } +} + static int cros_typec_port_update(struct cros_typec_data *typec, int port_num) { struct ec_params_usb_pd_control req; @@ -613,6 +643,9 @@ static int cros_typec_port_update(struct cros_typec_data *typec, int port_num) cros_typec_set_port_params_v0(typec, port_num, (struct ec_response_usb_pd_control *) &resp); + if (typec->typec_cmd_supported) + cros_typec_handle_status(typec, port_num); + /* Update the switches if they exist, according to requested state */ ret = cros_typec_get_mux_info(typec, port_num, &mux_resp); if (ret < 0) { @@ -661,6 +694,23 @@ static int cros_typec_get_cmd_version(struct cros_typec_data *typec) return 0; } +/* Check the EC feature flags to see if TYPEC_* commands are supported. */ +static int cros_typec_cmds_supported(struct cros_typec_data *typec) +{ + struct ec_response_get_features resp = {}; + int ret; + + ret = cros_typec_ec_command(typec, 0, EC_CMD_GET_FEATURES, NULL, 0, + &resp, sizeof(resp)); + if (ret < 0) { + dev_warn(typec->dev, + "Failed to get features, assuming typec commands unsupported.\n"); + return 0; + } + + return resp.flags[EC_FEATURE_TYPEC_CMD / 32] & EC_FEATURE_MASK_1(EC_FEATURE_TYPEC_CMD); +} + static void cros_typec_port_work(struct work_struct *work) { struct cros_typec_data *typec = container_of(work, struct cros_typec_data, port_work); @@ -720,6 +770,8 @@ static int cros_typec_probe(struct platform_device *pdev) return ret; } + typec->typec_cmd_supported = !!cros_typec_cmds_supported(typec); + ret = cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_PORTS, NULL, 0, &resp, sizeof(resp)); if (ret < 0) -- GitLab From f6f668118918f533676e51f3214f5a104562b59c Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:40 -0700 Subject: [PATCH 0141/1894] platform/chrome: cros_ec_typec: Parse partner PD ID VDOs Use EC_CMD_TYPE_DISCOVERY to retrieve and store the discovery data for the port partner. With that data, update the PD Identity VDO values for the partner, which were earlier not initialized. Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Acked-by: Heikki Krogerus Cc: Heikki Krogerus Link: https://lore.kernel.org/r/20201029222738.482366-7-pmalani@chromium.org --- drivers/platform/chrome/cros_ec_typec.c | 60 ++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index f578d0bfbe5a7..f14550dac6140 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ struct cros_typec_port { /* Flag indicating that PD discovery data parsing is completed. */ bool disc_done; + struct ec_response_typec_discovery *sop_disc; }; /* Platform-specific data for the Chrome OS EC Type C controller. */ @@ -298,6 +300,12 @@ static int cros_typec_init_ports(struct cros_typec_data *typec) port_num); cros_typec_register_port_altmodes(typec, port_num); + + cros_port->sop_disc = devm_kzalloc(dev, EC_PROTO2_MAX_RESPONSE_SIZE, GFP_KERNEL); + if (!cros_port->sop_disc) { + ret = -ENOMEM; + goto unregister_ports; + } } return 0; @@ -582,6 +590,51 @@ static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num, sizeof(req), resp, sizeof(*resp)); } +static int cros_typec_handle_sop_disc(struct cros_typec_data *typec, int port_num) +{ + struct cros_typec_port *port = typec->ports[port_num]; + struct ec_response_typec_discovery *sop_disc = port->sop_disc; + struct ec_params_typec_discovery req = { + .port = port_num, + .partner_type = TYPEC_PARTNER_SOP, + }; + int ret = 0; + int i; + + if (!port->partner) { + dev_err(typec->dev, + "SOP Discovery received without partner registered, port: %d\n", + port_num); + ret = -EINVAL; + goto disc_exit; + } + + memset(sop_disc, 0, EC_PROTO2_MAX_RESPONSE_SIZE); + ret = cros_typec_ec_command(typec, 0, EC_CMD_TYPEC_DISCOVERY, &req, sizeof(req), + sop_disc, EC_PROTO2_MAX_RESPONSE_SIZE); + if (ret < 0) { + dev_err(typec->dev, "Failed to get SOP discovery data for port: %d\n", port_num); + goto disc_exit; + } + + /* First, update the PD identity VDOs for the partner. */ + if (sop_disc->identity_count > 0) + port->p_identity.id_header = sop_disc->discovery_vdo[0]; + if (sop_disc->identity_count > 1) + port->p_identity.cert_stat = sop_disc->discovery_vdo[1]; + if (sop_disc->identity_count > 2) + port->p_identity.product = sop_disc->discovery_vdo[2]; + + /* Copy the remaining identity VDOs till a maximum of 6. */ + for (i = 3; i < sop_disc->identity_count && i < VDO_MAX_OBJECTS; i++) + port->p_identity.vdo[i - 3] = sop_disc->discovery_vdo[i]; + + ret = typec_partner_set_identity(port->partner); + +disc_exit: + return ret; +} + static void cros_typec_handle_status(struct cros_typec_data *typec, int port_num) { struct ec_response_typec_status resp; @@ -602,7 +655,12 @@ static void cros_typec_handle_status(struct cros_typec_data *typec, int port_num /* Handle any events appropriately. */ if (resp.events & PD_STATUS_EVENT_SOP_DISC_DONE) { - dev_dbg(typec->dev, "SOP Discovery done for port: %d\n", port_num); + ret = cros_typec_handle_sop_disc(typec, port_num); + if (ret < 0) { + dev_err(typec->dev, "Couldn't parse SOP Disc data, port: %d\n", port_num); + return; + } + typec->ports[port_num]->disc_done = true; } } -- GitLab From de0f49487db3667f5204dcec6d3482c9bd1a0a30 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:42 -0700 Subject: [PATCH 0142/1894] platform/chrome: cros_ec_typec: Register partner altmodes Use the discovery data from the Chrome EC to register parter altmodes with the Type C Connector Class framework. Also introduce a node struct to keep track of the list of registered alt modes. Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Acked-by: Heikki Krogerus Cc: Heikki Krogerus Link: https://lore.kernel.org/r/20201029222738.482366-8-pmalani@chromium.org --- drivers/platform/chrome/cros_ec_typec.c | 77 +++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index f14550dac6140..ce031a10eb1b3 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -31,6 +32,12 @@ enum { CROS_EC_ALTMODE_MAX, }; +/* Container for altmode pointer nodes. */ +struct cros_typec_altmode_node { + struct typec_altmode *amode; + struct list_head list; +}; + /* Per port data. */ struct cros_typec_port { struct typec_port *port; @@ -53,6 +60,7 @@ struct cros_typec_port { /* Flag indicating that PD discovery data parsing is completed. */ bool disc_done; struct ec_response_typec_discovery *sop_disc; + struct list_head partner_mode_list; }; /* Platform-specific data for the Chrome OS EC Type C controller. */ @@ -172,11 +180,25 @@ static int cros_typec_add_partner(struct cros_typec_data *typec, int port_num, return ret; } +static void cros_typec_unregister_altmodes(struct cros_typec_data *typec, int port_num) +{ + struct cros_typec_port *port = typec->ports[port_num]; + struct cros_typec_altmode_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &port->partner_mode_list, list) { + list_del(&node->list); + typec_unregister_altmode(node->amode); + devm_kfree(typec->dev, node); + } +} + static void cros_typec_remove_partner(struct cros_typec_data *typec, int port_num) { struct cros_typec_port *port = typec->ports[port_num]; + cros_typec_unregister_altmodes(typec, port_num); + port->state.alt = NULL; port->state.mode = TYPEC_STATE_USB; port->state.data = NULL; @@ -306,6 +328,8 @@ static int cros_typec_init_ports(struct cros_typec_data *typec) ret = -ENOMEM; goto unregister_ports; } + + INIT_LIST_HEAD(&cros_port->partner_mode_list); } return 0; @@ -590,6 +614,49 @@ static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num, sizeof(req), resp, sizeof(*resp)); } +static int cros_typec_register_altmodes(struct cros_typec_data *typec, int port_num) +{ + struct cros_typec_port *port = typec->ports[port_num]; + struct ec_response_typec_discovery *sop_disc = port->sop_disc; + struct cros_typec_altmode_node *node; + struct typec_altmode_desc desc; + struct typec_altmode *amode; + int ret = 0; + int i, j; + + for (i = 0; i < sop_disc->svid_count; i++) { + for (j = 0; j < sop_disc->svids[i].mode_count; j++) { + memset(&desc, 0, sizeof(desc)); + desc.svid = sop_disc->svids[i].svid; + desc.mode = j; + desc.vdo = sop_disc->svids[i].mode_vdo[j]; + + amode = typec_partner_register_altmode(port->partner, &desc); + if (IS_ERR(amode)) { + ret = PTR_ERR(amode); + goto err_cleanup; + } + + /* If no memory is available we should unregister and exit. */ + node = devm_kzalloc(typec->dev, sizeof(*node), GFP_KERNEL); + if (!node) { + ret = -ENOMEM; + typec_unregister_altmode(amode); + goto err_cleanup; + } + + node->amode = amode; + list_add_tail(&node->list, &port->partner_mode_list); + } + } + + return 0; + +err_cleanup: + cros_typec_unregister_altmodes(typec, port_num); + return ret; +} + static int cros_typec_handle_sop_disc(struct cros_typec_data *typec, int port_num) { struct cros_typec_port *port = typec->ports[port_num]; @@ -630,6 +697,16 @@ static int cros_typec_handle_sop_disc(struct cros_typec_data *typec, int port_nu port->p_identity.vdo[i - 3] = sop_disc->discovery_vdo[i]; ret = typec_partner_set_identity(port->partner); + if (ret < 0) { + dev_err(typec->dev, "Failed to update partner PD identity, port: %d\n", port_num); + goto disc_exit; + } + + ret = cros_typec_register_altmodes(typec, port_num); + if (ret < 0) { + dev_err(typec->dev, "Failed to register partner altmodes, port: %d\n", port_num); + goto disc_exit; + } disc_exit: return ret; -- GitLab From 65cdb4a214c0015c19fc1876896746c05396f45d Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 13 Nov 2020 16:58:13 +0800 Subject: [PATCH 0143/1894] configfs: fix kernel-doc markup issue Add explanation for 'frag' parameter to avoid kernel-doc issue: fs/configfs/dir.c:277: warning: Function parameter or member 'frag' not described in 'configfs_create_dir' Signed-off-by: Alex Shi Cc: Joel Becker Cc: Christoph Hellwig Cc: linux-kernel@vger.kernel.org Signed-off-by: Christoph Hellwig --- fs/configfs/dir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index b0983e2a4e2c7..b839dd1b459f4 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -267,6 +267,7 @@ static void configfs_remove_dirent(struct dentry *dentry) * configfs_create_dir - create a directory for an config_item. * @item: config_itemwe're creating directory for. * @dentry: config_item's dentry. + * @frag: config_item's fragment. * * Note: user-created entries won't be allowed under this new directory * until it is validated by configfs_dir_set_ready() -- GitLab From e4accab4e0b069bd4ddc1ac769a77b989bb9ed1c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Nov 2020 13:14:19 +0300 Subject: [PATCH 0144/1894] clk: qcom: lpass-sc7180: Clean up on error in lpass_sc7180_init() Clean up the first driver if the second driver can't be registered. Fixes: 4ee9fe3e292b ("clk: qcom: lpass-sc7180: Disentangle the two clock devices") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20201113101419.GC168908@mwanda Reviewed-by: Douglas Anderson Signed-off-by: Stephen Boyd --- drivers/clk/qcom/lpasscorecc-sc7180.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c index 1a3925badd7c4..9081649f476f5 100644 --- a/drivers/clk/qcom/lpasscorecc-sc7180.c +++ b/drivers/clk/qcom/lpasscorecc-sc7180.c @@ -491,7 +491,13 @@ static int __init lpass_sc7180_init(void) if (ret) return ret; - return platform_driver_register(&lpass_hm_sc7180_driver); + ret = platform_driver_register(&lpass_hm_sc7180_driver); + if (ret) { + platform_driver_unregister(&lpass_core_cc_sc7180_driver); + return ret; + } + + return 0; } subsys_initcall(lpass_sc7180_init); -- GitLab From 8d4025943e13010d753935e37ad085fca4906e6c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 14 Nov 2020 09:44:08 -0800 Subject: [PATCH 0145/1894] clk: qcom: camcc-sc7180: Use runtime PM ops instead of clk ones Let's call pm_runtime_get() here instead of calling the PM clk APIs directly. This avoids a compilation problem on CONFIG_PM=n where the pm_clk_runtime_{resume,suspend}() functions don't exist and covers the intent, i.e. enable the clks for this device so we can program PLL settings. Reported-by: Randy Dunlap Reported-by: Geert Uytterhoeven Cc: Nathan Chancellor Cc: Stephen Rothwell Cc: Taniya Das Cc: "Rafael J. Wysocki" Acked-by: Randy Dunlap # build-tested Fixes: 15d09e830bbc ("clk: qcom: camcc: Add camera clock controller driver for SC7180") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20201114174408.579047-1-sboyd@kernel.org --- drivers/clk/qcom/camcc-sc7180.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/clk/qcom/camcc-sc7180.c b/drivers/clk/qcom/camcc-sc7180.c index f51bf5b6decc6..dbac5651ab855 100644 --- a/drivers/clk/qcom/camcc-sc7180.c +++ b/drivers/clk/qcom/camcc-sc7180.c @@ -1669,16 +1669,14 @@ static int cam_cc_sc7180_probe(struct platform_device *pdev) goto disable_pm_runtime; } - ret = pm_clk_runtime_resume(&pdev->dev); - if (ret < 0) { - dev_err(&pdev->dev, "pm runtime resume failed\n"); + ret = pm_runtime_get(&pdev->dev); + if (ret) goto destroy_pm_clk; - } regmap = qcom_cc_map(pdev, &cam_cc_sc7180_desc); if (IS_ERR(regmap)) { ret = PTR_ERR(regmap); - pm_clk_runtime_suspend(&pdev->dev); + pm_runtime_put(&pdev->dev); goto destroy_pm_clk; } @@ -1688,9 +1686,7 @@ static int cam_cc_sc7180_probe(struct platform_device *pdev) clk_fabia_pll_configure(&cam_cc_pll3, regmap, &cam_cc_pll3_config); ret = qcom_cc_really_probe(pdev, &cam_cc_sc7180_desc, regmap); - - pm_clk_runtime_suspend(&pdev->dev); - + pm_runtime_put(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "Failed to register CAM CC clocks\n"); goto destroy_pm_clk; -- GitLab From 6a17849703581f11b499da8e1d6a2941b9e738dd Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 6 Nov 2020 10:48:20 +0100 Subject: [PATCH 0146/1894] clk: remove unneeded dead-store initialization make clang-analyzer on x86_64 defconfig caught my attention with: drivers/clk/clk.c:423:19: warning: Value stored to 'parent' during its initialization is never read [clang-analyzer-deadcode.DeadStores] struct clk_core *parent = ERR_PTR(-ENOENT); ^ Commit fc0c209c147f ("clk: Allow parents to be specified without string names") introduced clk_core_fill_parent_index() with this unneeded dead-store initialization. So, simply remove this unneeded dead-store initialization to make clang-analyzer happy. As compilers will detect this unneeded assignment and optimize this anyway, the resulting object code is identical before and after this change. No functional change. No change to object code. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20201106094820.30167-1-lukas.bulwahn@gmail.com Reviewed-by: Nathan Chancellor Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f83dac54ed853..ba35bf35bcd36 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -420,7 +420,7 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) static void clk_core_fill_parent_index(struct clk_core *core, u8 index) { struct clk_parent_map *entry = &core->parents[index]; - struct clk_core *parent = ERR_PTR(-ENOENT); + struct clk_core *parent; if (entry->hw) { parent = entry->hw->core; -- GitLab From e5a4b9b99e5b70a41578e78d30349315772add1b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:21:45 +0200 Subject: [PATCH 0147/1894] clk: avoid devm_clk_release name clash In clk-devres.c, devm_clk_release() is used to call clk_put() memory managed clock. In clk.c the same name, in a different scope is used to call clk_unregister(). As it stands, it is not really a problem but it does not readability, especially if we need to call clk_put() on managed clock in clk.c Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021162147.563655-2-jbrunet@baylibre.com Tested-by: Kevin Hilman Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f83dac54ed853..f70dc0ef1cddb 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4068,12 +4068,12 @@ void clk_hw_unregister(struct clk_hw *hw) } EXPORT_SYMBOL_GPL(clk_hw_unregister); -static void devm_clk_release(struct device *dev, void *res) +static void devm_clk_unregister_cb(struct device *dev, void *res) { clk_unregister(*(struct clk **)res); } -static void devm_clk_hw_release(struct device *dev, void *res) +static void devm_clk_hw_unregister_cb(struct device *dev, void *res) { clk_hw_unregister(*(struct clk_hw **)res); } @@ -4093,7 +4093,7 @@ struct clk *devm_clk_register(struct device *dev, struct clk_hw *hw) struct clk *clk; struct clk **clkp; - clkp = devres_alloc(devm_clk_release, sizeof(*clkp), GFP_KERNEL); + clkp = devres_alloc(devm_clk_unregister_cb, sizeof(*clkp), GFP_KERNEL); if (!clkp) return ERR_PTR(-ENOMEM); @@ -4123,7 +4123,7 @@ int devm_clk_hw_register(struct device *dev, struct clk_hw *hw) struct clk_hw **hwp; int ret; - hwp = devres_alloc(devm_clk_hw_release, sizeof(*hwp), GFP_KERNEL); + hwp = devres_alloc(devm_clk_hw_unregister_cb, sizeof(*hwp), GFP_KERNEL); if (!hwp) return -ENOMEM; @@ -4167,7 +4167,7 @@ static int devm_clk_hw_match(struct device *dev, void *res, void *data) */ void devm_clk_unregister(struct device *dev, struct clk *clk) { - WARN_ON(devres_release(dev, devm_clk_release, devm_clk_match, clk)); + WARN_ON(devres_release(dev, devm_clk_unregister_cb, devm_clk_match, clk)); } EXPORT_SYMBOL_GPL(devm_clk_unregister); @@ -4182,7 +4182,7 @@ EXPORT_SYMBOL_GPL(devm_clk_unregister); */ void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw) { - WARN_ON(devres_release(dev, devm_clk_hw_release, devm_clk_hw_match, + WARN_ON(devres_release(dev, devm_clk_hw_unregister_cb, devm_clk_hw_match, hw)); } EXPORT_SYMBOL_GPL(devm_clk_hw_unregister); -- GitLab From 30d6f8c15d2cd877c1f3d47d8a1064649ebe58e2 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:21:46 +0200 Subject: [PATCH 0148/1894] clk: add api to get clk consumer from clk_hw clk_register() is deprecated. Using 'clk' member of struct clk_hw is discouraged. With this constraint, it is difficult for driver to register clocks using the clk_hw API and then use the clock with the consumer API This adds a simple helper, clk_hw_get_clk(), to get a struct clk from a struct clk_hw. Like other clk_get() variant, each call to this helper must be balanced with a call to clk_put(). To make life easier on the consumers, a memory managed version is provided as well. Cc: Martin Blumenstingl Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021162147.563655-3-jbrunet@baylibre.com Tested-by: Kevin Hilman [sboyd@kernel.org: Fix kernel-doc] Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 61 ++++++++++++++++++++++++++++++++++++ include/linux/clk-provider.h | 5 +++ 2 files changed, 66 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f70dc0ef1cddb..48931f442de8f 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3667,6 +3667,24 @@ struct clk *clk_hw_create_clk(struct device *dev, struct clk_hw *hw, return clk; } +/** + * clk_hw_get_clk - get clk consumer given an clk_hw + * @hw: clk_hw associated with the clk being consumed + * @con_id: connection ID string on device + * + * Returns: new clk consumer + * This is the function to be used by providers which need + * to get a consumer clk and act on the clock element + * Calls to this function must be balanced with calls clk_put() + */ +struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id) +{ + struct device *dev = hw->core->dev; + + return clk_hw_create_clk(dev, hw, dev_name(dev), con_id); +} +EXPORT_SYMBOL(clk_hw_get_clk); + static int clk_cpy_name(const char **dst_p, const char *src, bool must_exist) { const char *dst; @@ -4187,6 +4205,49 @@ void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw) } EXPORT_SYMBOL_GPL(devm_clk_hw_unregister); +static void devm_clk_release(struct device *dev, void *res) +{ + clk_put(*(struct clk **)res); +} + +/** + * devm_clk_hw_get_clk - resource managed clk_hw_get_clk() + * @dev: device that is registering this clock + * @hw: clk_hw associated with the clk being consumed + * @con_id: connection ID string on device + * + * Managed clk_hw_get_clk(). Clocks got with this function are + * automatically clk_put() on driver detach. See clk_put() + * for more information. + */ +struct clk *devm_clk_hw_get_clk(struct device *dev, struct clk_hw *hw, + const char *con_id) +{ + struct clk *clk; + struct clk **clkp; + + /* This should not happen because it would mean we have drivers + * passing around clk_hw pointers instead of having the caller use + * proper clk_get() style APIs + */ + WARN_ON_ONCE(dev != hw->core->dev); + + clkp = devres_alloc(devm_clk_release, sizeof(*clkp), GFP_KERNEL); + if (!clkp) + return ERR_PTR(-ENOMEM); + + clk = clk_hw_get_clk(hw, con_id); + if (!IS_ERR(clk)) { + *clkp = clk; + devres_add(dev, clkp); + } else { + devres_free(clkp); + } + + return clk; +} +EXPORT_SYMBOL_GPL(devm_clk_hw_get_clk); + /* * clkdev helpers */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 03a5de5f99f4a..86b707520ec0d 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1088,6 +1088,11 @@ static inline struct clk_hw *__clk_get_hw(struct clk *clk) return (struct clk_hw *)clk; } #endif + +struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id); +struct clk *devm_clk_hw_get_clk(struct device *dev, struct clk_hw *hw, + const char *con_id); + unsigned int clk_hw_get_num_parents(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent_by_index(const struct clk_hw *hw, -- GitLab From 8e677e7f0aa3b01c501a9a48a04a34173380ccfd Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:21:47 +0200 Subject: [PATCH 0149/1894] clk: meson: g12: drop use of __clk_lookup() g12 clock controller used __clk_lookup() to get struct clk from a struct clk_hw. This type of hack is no longer required as CCF now provides the necessary functions to get this. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021162147.563655-4-jbrunet@baylibre.com Tested-by: Kevin Hilman Signed-off-by: Stephen Boyd --- drivers/clk/meson/g12a.c | 68 +++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index b814d44917a5d..235dcf72e34a6 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -5156,10 +5156,11 @@ static const struct reg_sequence g12a_init_regs[] = { { .reg = HHI_MPLL_CNTL0, .def = 0x00000543 }, }; -static int meson_g12a_dvfs_setup_common(struct platform_device *pdev, +#define DVFS_CON_ID "dvfs" + +static int meson_g12a_dvfs_setup_common(struct device *dev, struct clk_hw **hws) { - const char *notifier_clk_name; struct clk *notifier_clk; struct clk_hw *xtal; int ret; @@ -5168,21 +5169,21 @@ static int meson_g12a_dvfs_setup_common(struct platform_device *pdev, /* Setup clock notifier for cpu_clk_postmux0 */ g12a_cpu_clk_postmux0_nb_data.xtal = xtal; - notifier_clk_name = clk_hw_get_name(&g12a_cpu_clk_postmux0.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk_postmux0.hw, + DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_postmux0_nb_data.nb); if (ret) { - dev_err(&pdev->dev, "failed to register the cpu_clk_postmux0 notifier\n"); + dev_err(dev, "failed to register the cpu_clk_postmux0 notifier\n"); return ret; } /* Setup clock notifier for cpu_clk_dyn mux */ - notifier_clk_name = clk_hw_get_name(&g12a_cpu_clk_dyn.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk_dyn.hw, + DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); if (ret) { - dev_err(&pdev->dev, "failed to register the cpu_clk_dyn notifier\n"); + dev_err(dev, "failed to register the cpu_clk_dyn notifier\n"); return ret; } @@ -5192,33 +5193,33 @@ static int meson_g12a_dvfs_setup_common(struct platform_device *pdev, static int meson_g12b_dvfs_setup(struct platform_device *pdev) { struct clk_hw **hws = g12b_hw_onecell_data.hws; - const char *notifier_clk_name; + struct device *dev = &pdev->dev; struct clk *notifier_clk; struct clk_hw *xtal; int ret; - ret = meson_g12a_dvfs_setup_common(pdev, hws); + ret = meson_g12a_dvfs_setup_common(dev, hws); if (ret) return ret; xtal = clk_hw_get_parent_by_index(hws[CLKID_CPU_CLK_DYN1_SEL], 0); /* Setup clock notifier for cpu_clk mux */ - notifier_clk_name = clk_hw_get_name(&g12b_cpu_clk.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpu_clk.hw, + DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); if (ret) { - dev_err(&pdev->dev, "failed to register the cpu_clk notifier\n"); + dev_err(dev, "failed to register the cpu_clk notifier\n"); return ret; } /* Setup clock notifier for sys1_pll */ - notifier_clk_name = clk_hw_get_name(&g12b_sys1_pll.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12b_sys1_pll.hw, + DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12b_cpu_clk_sys1_pll_nb_data.nb); if (ret) { - dev_err(&pdev->dev, "failed to register the sys1_pll notifier\n"); + dev_err(dev, "failed to register the sys1_pll notifier\n"); return ret; } @@ -5226,40 +5227,37 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for cpub_clk_postmux0 */ g12b_cpub_clk_postmux0_nb_data.xtal = xtal; - notifier_clk_name = clk_hw_get_name(&g12b_cpub_clk_postmux0.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk_postmux0.hw, + DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12b_cpub_clk_postmux0_nb_data.nb); if (ret) { - dev_err(&pdev->dev, "failed to register the cpub_clk_postmux0 notifier\n"); + dev_err(dev, "failed to register the cpub_clk_postmux0 notifier\n"); return ret; } /* Setup clock notifier for cpub_clk_dyn mux */ - notifier_clk_name = clk_hw_get_name(&g12b_cpub_clk_dyn.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk_dyn.hw, "dvfs"); ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); if (ret) { - dev_err(&pdev->dev, "failed to register the cpub_clk_dyn notifier\n"); + dev_err(dev, "failed to register the cpub_clk_dyn notifier\n"); return ret; } /* Setup clock notifier for cpub_clk mux */ - notifier_clk_name = clk_hw_get_name(&g12b_cpub_clk.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk.hw, DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); if (ret) { - dev_err(&pdev->dev, "failed to register the cpub_clk notifier\n"); + dev_err(dev, "failed to register the cpub_clk notifier\n"); return ret; } /* Setup clock notifier for sys_pll */ - notifier_clk_name = clk_hw_get_name(&g12a_sys_pll.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12a_sys_pll.hw, DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12b_cpub_clk_sys_pll_nb_data.nb); if (ret) { - dev_err(&pdev->dev, "failed to register the sys_pll notifier\n"); + dev_err(dev, "failed to register the sys_pll notifier\n"); return ret; } @@ -5269,29 +5267,27 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) static int meson_g12a_dvfs_setup(struct platform_device *pdev) { struct clk_hw **hws = g12a_hw_onecell_data.hws; - const char *notifier_clk_name; + struct device *dev = &pdev->dev; struct clk *notifier_clk; int ret; - ret = meson_g12a_dvfs_setup_common(pdev, hws); + ret = meson_g12a_dvfs_setup_common(dev, hws); if (ret) return ret; /* Setup clock notifier for cpu_clk mux */ - notifier_clk_name = clk_hw_get_name(&g12a_cpu_clk.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk.hw, DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); if (ret) { - dev_err(&pdev->dev, "failed to register the cpu_clk notifier\n"); + dev_err(dev, "failed to register the cpu_clk notifier\n"); return ret; } /* Setup clock notifier for sys_pll */ - notifier_clk_name = clk_hw_get_name(&g12a_sys_pll.hw); - notifier_clk = __clk_lookup(notifier_clk_name); + notifier_clk = devm_clk_hw_get_clk(dev, &g12a_sys_pll.hw, DVFS_CON_ID); ret = clk_notifier_register(notifier_clk, &g12a_sys_pll_nb_data.nb); if (ret) { - dev_err(&pdev->dev, "failed to register the sys_pll notifier\n"); + dev_err(dev, "failed to register the sys_pll notifier\n"); return ret; } -- GitLab From 6d30d50d037dfa092f9d5d1fffa348ab4abb7163 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:38:46 +0200 Subject: [PATCH 0150/1894] clk: add devm variant of clk_notifier_register Add a memory managed variant of clk_notifier_register() to make life easier on clock consumers using notifiers Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021163847.595189-2-jbrunet@baylibre.com Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/clk.h | 10 ++++++++++ 2 files changed, 46 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 48931f442de8f..6cf59e3c31b46 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4395,6 +4395,42 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb) } EXPORT_SYMBOL_GPL(clk_notifier_unregister); +struct clk_notifier_devres { + struct clk *clk; + struct notifier_block *nb; +}; + +static void devm_clk_notifier_release(struct device *dev, void *res) +{ + struct clk_notifier_devres *devres = res; + + clk_notifier_unregister(devres->clk, devres->nb); +} + +int devm_clk_notifier_register(struct device *dev, struct clk *clk, + struct notifier_block *nb) +{ + struct clk_notifier_devres *devres; + int ret; + + devres = devres_alloc(devm_clk_notifier_release, + sizeof(*devres), GFP_KERNEL); + + if (!devres) + return -ENOMEM; + + ret = clk_notifier_register(clk, nb); + if (!ret) { + devres->clk = clk; + devres->nb = nb; + } else { + devres_free(devres); + } + + return ret; +} +EXPORT_SYMBOL_GPL(devm_clk_notifier_register); + #ifdef CONFIG_OF static void clk_core_reparent_orphans(void) { diff --git a/include/linux/clk.h b/include/linux/clk.h index 7fd6a1febcf4f..f53afdf8198bc 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -109,6 +109,16 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb); */ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); +/** + * devm_clk_notifier_register - register a managed rate-change notifier callback + * @dev: device for clock "consumer" + * @clk: clock whose rate we are interested in + * @nb: notifier block with callback function pointer + * + * Returns 0 on success, -EERROR otherwise + */ +int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb); + /** * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion) * for a clock source. -- GitLab From e6fb7aee486c7fbd4d94f4894feaa6f0424c1740 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:38:47 +0200 Subject: [PATCH 0151/1894] clk: meson: g12: use devm variant to register notifiers Until now, nothing was done to unregister the dvfs clock notifiers of the Amlogic g12 SoC family. This is not great but this driver was not really expected to be unloaded. With the ongoing effort to build everything as module for this platform, this needs to be cleanly handled. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021163847.595189-3-jbrunet@baylibre.com Signed-off-by: Stephen Boyd --- drivers/clk/meson/g12a.c | 34 ++++++++++++++++++++-------------- include/linux/clk.h | 10 +++++++++- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 235dcf72e34a6..108e4491b1e25 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -5171,8 +5171,8 @@ static int meson_g12a_dvfs_setup_common(struct device *dev, g12a_cpu_clk_postmux0_nb_data.xtal = xtal; notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk_postmux0.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, - &g12a_cpu_clk_postmux0_nb_data.nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12a_cpu_clk_postmux0_nb_data.nb); if (ret) { dev_err(dev, "failed to register the cpu_clk_postmux0 notifier\n"); return ret; @@ -5181,7 +5181,8 @@ static int meson_g12a_dvfs_setup_common(struct device *dev, /* Setup clock notifier for cpu_clk_dyn mux */ notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk_dyn.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12a_cpu_clk_mux_nb); if (ret) { dev_err(dev, "failed to register the cpu_clk_dyn notifier\n"); return ret; @@ -5207,7 +5208,8 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for cpu_clk mux */ notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpu_clk.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12a_cpu_clk_mux_nb); if (ret) { dev_err(dev, "failed to register the cpu_clk notifier\n"); return ret; @@ -5216,8 +5218,8 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for sys1_pll */ notifier_clk = devm_clk_hw_get_clk(dev, &g12b_sys1_pll.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, - &g12b_cpu_clk_sys1_pll_nb_data.nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12b_cpu_clk_sys1_pll_nb_data.nb); if (ret) { dev_err(dev, "failed to register the sys1_pll notifier\n"); return ret; @@ -5229,8 +5231,8 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) g12b_cpub_clk_postmux0_nb_data.xtal = xtal; notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk_postmux0.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, - &g12b_cpub_clk_postmux0_nb_data.nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12b_cpub_clk_postmux0_nb_data.nb); if (ret) { dev_err(dev, "failed to register the cpub_clk_postmux0 notifier\n"); return ret; @@ -5238,7 +5240,8 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for cpub_clk_dyn mux */ notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk_dyn.hw, "dvfs"); - ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12a_cpu_clk_mux_nb); if (ret) { dev_err(dev, "failed to register the cpub_clk_dyn notifier\n"); return ret; @@ -5246,7 +5249,8 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for cpub_clk mux */ notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12a_cpu_clk_mux_nb); if (ret) { dev_err(dev, "failed to register the cpub_clk notifier\n"); return ret; @@ -5254,8 +5258,8 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for sys_pll */ notifier_clk = devm_clk_hw_get_clk(dev, &g12a_sys_pll.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, - &g12b_cpub_clk_sys_pll_nb_data.nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12b_cpub_clk_sys_pll_nb_data.nb); if (ret) { dev_err(dev, "failed to register the sys_pll notifier\n"); return ret; @@ -5277,7 +5281,8 @@ static int meson_g12a_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for cpu_clk mux */ notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12a_cpu_clk_mux_nb); if (ret) { dev_err(dev, "failed to register the cpu_clk notifier\n"); return ret; @@ -5285,7 +5290,8 @@ static int meson_g12a_dvfs_setup(struct platform_device *pdev) /* Setup clock notifier for sys_pll */ notifier_clk = devm_clk_hw_get_clk(dev, &g12a_sys_pll.hw, DVFS_CON_ID); - ret = clk_notifier_register(notifier_clk, &g12a_sys_pll_nb_data.nb); + ret = devm_clk_notifier_register(dev, notifier_clk, + &g12a_sys_pll_nb_data.nb); if (ret) { dev_err(dev, "failed to register the sys_pll notifier\n"); return ret; diff --git a/include/linux/clk.h b/include/linux/clk.h index f53afdf8198bc..4ac766dc3dafc 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -117,7 +117,8 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); * * Returns 0 on success, -EERROR otherwise */ -int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb); +int devm_clk_notifier_register(struct device *dev, struct clk *clk, + struct notifier_block *nb); /** * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion) @@ -196,6 +197,13 @@ static inline int clk_notifier_unregister(struct clk *clk, return -ENOTSUPP; } +static inline int devm_clk_notifier_register(struct device *dev, + struct clk *clk, + struct notifier_block *nb) +{ + return -ENOTSUPP; +} + static inline long clk_get_accuracy(struct clk *clk) { return -ENOTSUPP; -- GitLab From 3105c7c91feb176f8b918ebe13abd520f7651834 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 23 Oct 2020 16:19:25 +0300 Subject: [PATCH 0152/1894] clk: qcom: dispcc-sm8250: handle MMCX power domain On SM8250 MMCX power domain is required to access MMDS_GDSC registers. This power domain is expressed as mmcx-supply regulator property. Use this regulator as MDSS_GDSC supply. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20201023131925.334864-6-dmitry.baryshkov@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/dispcc-sm8250.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/dispcc-sm8250.c b/drivers/clk/qcom/dispcc-sm8250.c index 07a98d3f882d0..588575e1169d5 100644 --- a/drivers/clk/qcom/dispcc-sm8250.c +++ b/drivers/clk/qcom/dispcc-sm8250.c @@ -963,6 +963,7 @@ static struct gdsc mdss_gdsc = { }, .pwrsts = PWRSTS_OFF_ON, .flags = HW_CTRL, + .supply = "mmcx", }; static struct clk_regmap *disp_cc_sm8250_clocks[] = { -- GitLab From 825156a5eeded9bcb55e9c36d4b4b72bf20bcba6 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Fri, 6 Nov 2020 15:30:54 +0800 Subject: [PATCH 0153/1894] rtc: sc27xx: Remove unnecessary conversion to bool Here we could use the '!=' expression to fix the following coccicheck warning: ./drivers/rtc/rtc-sc27xx.c:566:50-55: WARNING: conversion to bool not needed here Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1604647854-876-1-git-send-email-kaixuxia@tencent.com --- drivers/rtc/rtc-sc27xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c index 36810dd40cd35..6e65f68ea86dc 100644 --- a/drivers/rtc/rtc-sc27xx.c +++ b/drivers/rtc/rtc-sc27xx.c @@ -563,7 +563,7 @@ static int sprd_rtc_check_power_down(struct sprd_rtc *rtc) * means the RTC has been powered down, so the RTC time values are * invalid. */ - rtc->valid = val == SPRD_RTC_POWER_RESET_VALUE ? false : true; + rtc->valid = val != SPRD_RTC_POWER_RESET_VALUE; return 0; } -- GitLab From a48c6224ae07bed02893c58073ca2942acb5c3d5 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Fri, 6 Nov 2020 16:00:37 +0800 Subject: [PATCH 0154/1894] rtc: da9063: Simplify bool comparison Fix the following coccicheck warning: ./drivers/rtc/rtc-da9063.c:246:5-18: WARNING: Comparison to bool Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1604649637-1014-1-git-send-email-kaixuxia@tencent.com --- drivers/rtc/rtc-da9063.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c index 046b1d4c3daea..6f0a3a7111356 100644 --- a/drivers/rtc/rtc-da9063.c +++ b/drivers/rtc/rtc-da9063.c @@ -243,7 +243,7 @@ static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm) al_secs = rtc_tm_to_time64(&rtc->alarm_time); /* handle the rtc synchronisation delay */ - if (rtc->rtc_sync == true && al_secs - tm_secs == 1) + if (rtc->rtc_sync && al_secs - tm_secs == 1) memcpy(tm, &rtc->alarm_time, sizeof(struct rtc_time)); else rtc->rtc_sync = false; -- GitLab From c56ac7a0f468ceb38d24db41f4446d98ab94da2d Mon Sep 17 00:00:00 2001 From: Guillaume Tucker Date: Fri, 6 Nov 2020 09:06:31 +0000 Subject: [PATCH 0155/1894] rtc: hym8563: enable wakeup when applicable Enable wakeup in the hym8563 driver if the IRQ was successfully requested or if wakeup-source is set in the devicetree. As per the description of device_init_wakeup(), it should be enabled for "devices that everyone expects to be wakeup sources". One would expect this to be the case with a real-time clock. Tested on rk3288-rock2-square, which has an IRQ configured for the RTC. As a result, wakeup was enabled during driver initialisation. Fixes: dcaf03849352 ("rtc: add hym8563 rtc-driver") Reported-by: kernelci.org bot Signed-off-by: Guillaume Tucker Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1ea023e2ba50a4dab6e39be93d7de3146af71a60.1604653374.git.guillaume.tucker@collabora.com --- drivers/rtc/rtc-hym8563.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 0fb79c4afb463..24e0095be0582 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -527,8 +527,6 @@ static int hym8563_probe(struct i2c_client *client, hym8563->client = client; i2c_set_clientdata(client, hym8563); - device_set_wakeup_capable(&client->dev, true); - ret = hym8563_init_device(client); if (ret) { dev_err(&client->dev, "could not init device, %d\n", ret); @@ -547,6 +545,11 @@ static int hym8563_probe(struct i2c_client *client, } } + if (client->irq > 0 || + device_property_read_bool(&client->dev, "wakeup-source")) { + device_init_wakeup(&client->dev, true); + } + /* check state of calendar information */ ret = i2c_smbus_read_byte_data(client, HYM8563_SEC); if (ret < 0) -- GitLab From bc06cfc1c41e3b60b159132e5bba4c059a2e7f83 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 10 Nov 2020 17:35:47 +0800 Subject: [PATCH 0156/1894] rtc: cpcap: Fix missing IRQF_ONESHOT as only threaded handler Coccinelle noticed: drivers/rtc/rtc-cpcap.c:271:7-32: ERROR: Threaded IRQ with no primary handler requested without IRQF_ONESHOT drivers/rtc/rtc-cpcap.c:287:7-32: ERROR: Threaded IRQ with no primary handler requested without IRQF_ONESHOT Signed-off-by: Tian Tao Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1605000947-32882-1-git-send-email-tiantao6@hisilicon.com --- drivers/rtc/rtc-cpcap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index 800667d73a6fb..38d576b0c4faa 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -269,7 +269,8 @@ static int cpcap_rtc_probe(struct platform_device *pdev) rtc->alarm_irq = platform_get_irq(pdev, 0); err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL, - cpcap_rtc_alarm_irq, IRQF_TRIGGER_NONE, + cpcap_rtc_alarm_irq, + IRQF_TRIGGER_NONE | IRQF_ONESHOT, "rtc_alarm", rtc); if (err) { dev_err(dev, "Could not request alarm irq: %d\n", err); @@ -285,7 +286,8 @@ static int cpcap_rtc_probe(struct platform_device *pdev) */ rtc->update_irq = platform_get_irq(pdev, 1); err = devm_request_threaded_irq(dev, rtc->update_irq, NULL, - cpcap_rtc_update_irq, IRQF_TRIGGER_NONE, + cpcap_rtc_update_irq, + IRQF_TRIGGER_NONE | IRQF_ONESHOT, "rtc_1hz", rtc); if (err) { dev_err(dev, "Could not request update irq: %d\n", err); -- GitLab From 1eab0fea2514b269e384c117f5b5772b882761f0 Mon Sep 17 00:00:00 2001 From: Zheng Liang Date: Thu, 12 Nov 2020 17:31:39 +0800 Subject: [PATCH 0157/1894] rtc: pl031: fix resource leak in pl031_probe When devm_rtc_allocate_device is failed in pl031_probe, it should release mem regions with device. Reported-by: Hulk Robot Signed-off-by: Zheng Liang Signed-off-by: Alexandre Belloni Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20201112093139.32566-1-zhengliang6@huawei.com --- drivers/rtc/rtc-pl031.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index c6b89273feba8..d4b2ab7861266 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -361,8 +361,10 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) device_init_wakeup(&adev->dev, true); ldata->rtc = devm_rtc_allocate_device(&adev->dev); - if (IS_ERR(ldata->rtc)) - return PTR_ERR(ldata->rtc); + if (IS_ERR(ldata->rtc)) { + ret = PTR_ERR(ldata->rtc); + goto out; + } ldata->rtc->ops = ops; ldata->rtc->range_min = vendor->range_min; -- GitLab From 910d002d84df21da61cadba92dd510ece5e46312 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 13 Nov 2020 07:45:38 +0000 Subject: [PATCH 0158/1894] rtc: brcmstb-waketimer: Remove redundant null check before clk_disable_unprepare Because clk_disable_unprepare() already checked NULL clock parameter, so the additional check is unnecessary, just remove it. Signed-off-by: Xu Wang Signed-off-by: Alexandre Belloni Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20201113074538.65028-1-vulab@iscas.ac.cn --- drivers/rtc/rtc-brcmstb-waketimer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-brcmstb-waketimer.c b/drivers/rtc/rtc-brcmstb-waketimer.c index 4fee57c512802..375a9987a1d68 100644 --- a/drivers/rtc/rtc-brcmstb-waketimer.c +++ b/drivers/rtc/rtc-brcmstb-waketimer.c @@ -264,8 +264,7 @@ err_notifier: unregister_reboot_notifier(&timer->reboot_notifier); err_clk: - if (timer->clk) - clk_disable_unprepare(timer->clk); + clk_disable_unprepare(timer->clk); return ret; } -- GitLab From 081e2500df50c7f330b9346794c6759ea7f8fb81 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 13 Nov 2020 08:03:05 +0000 Subject: [PATCH 0159/1894] rtc: snvs: Remove NULL pointer check before clk_* Because clk_* already checked NULL clock parameter, so the additional checks are unnecessary, just remove them. Signed-off-by: Xu Wang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201113080305.65961-1-vulab@iscas.ac.cn --- drivers/rtc/rtc-snvs.c | 67 +++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 43 deletions(-) diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index 0263d996b8a86..a7d39a49b7486 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -151,17 +151,14 @@ static int snvs_rtc_read_time(struct device *dev, struct rtc_time *tm) unsigned long time; int ret; - if (data->clk) { - ret = clk_enable(data->clk); - if (ret) - return ret; - } + ret = clk_enable(data->clk); + if (ret) + return ret; time = rtc_read_lp_counter(data); rtc_time64_to_tm(time, tm); - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk); return 0; } @@ -172,11 +169,9 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm) unsigned long time = rtc_tm_to_time64(tm); int ret; - if (data->clk) { - ret = clk_enable(data->clk); - if (ret) - return ret; - } + ret = clk_enable(data->clk); + if (ret) + return ret; /* Disable RTC first */ ret = snvs_rtc_enable(data, false); @@ -190,8 +185,7 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm) /* Enable RTC again */ ret = snvs_rtc_enable(data, true); - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk); return ret; } @@ -202,11 +196,9 @@ static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) u32 lptar, lpsr; int ret; - if (data->clk) { - ret = clk_enable(data->clk); - if (ret) - return ret; - } + ret = clk_enable(data->clk); + if (ret) + return ret; regmap_read(data->regmap, data->offset + SNVS_LPTAR, &lptar); rtc_time64_to_tm(lptar, &alrm->time); @@ -214,8 +206,7 @@ static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) regmap_read(data->regmap, data->offset + SNVS_LPSR, &lpsr); alrm->pending = (lpsr & SNVS_LPSR_LPTA) ? 1 : 0; - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk); return 0; } @@ -225,11 +216,9 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) struct snvs_rtc_data *data = dev_get_drvdata(dev); int ret; - if (data->clk) { - ret = clk_enable(data->clk); - if (ret) - return ret; - } + ret = clk_enable(data->clk); + if (ret) + return ret; regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN), @@ -237,8 +226,7 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) ret = rtc_write_sync_lp(data); - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk); return ret; } @@ -249,11 +237,9 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) unsigned long time = rtc_tm_to_time64(&alrm->time); int ret; - if (data->clk) { - ret = clk_enable(data->clk); - if (ret) - return ret; - } + ret = clk_enable(data->clk); + if (ret) + return ret; regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, SNVS_LPCR_LPTA_EN, 0); ret = rtc_write_sync_lp(data); @@ -264,8 +250,7 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) /* Clear alarm interrupt status bit */ regmap_write(data->regmap, data->offset + SNVS_LPSR, SNVS_LPSR_LPTA); - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk); return snvs_rtc_alarm_irq_enable(dev, alrm->enabled); } @@ -285,8 +270,7 @@ static irqreturn_t snvs_rtc_irq_handler(int irq, void *dev_id) u32 lpsr; u32 events = 0; - if (data->clk) - clk_enable(data->clk); + clk_enable(data->clk); regmap_read(data->regmap, data->offset + SNVS_LPSR, &lpsr); @@ -302,8 +286,7 @@ static irqreturn_t snvs_rtc_irq_handler(int irq, void *dev_id) /* clear interrupt status */ regmap_write(data->regmap, data->offset + SNVS_LPSR, lpsr); - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk); return events ? IRQ_HANDLED : IRQ_NONE; } @@ -316,8 +299,7 @@ static const struct regmap_config snvs_rtc_config = { static void snvs_rtc_action(void *data) { - if (data) - clk_disable_unprepare(data); + clk_disable_unprepare(data); } static int snvs_rtc_probe(struct platform_device *pdev) @@ -412,8 +394,7 @@ static int __maybe_unused snvs_rtc_suspend_noirq(struct device *dev) { struct snvs_rtc_data *data = dev_get_drvdata(dev); - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk); return 0; } -- GitLab From 5022cfc112328e7fd489f5e3d41b7f352322880c Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 14 Nov 2020 21:09:20 +0800 Subject: [PATCH 0160/1894] rtc: goldfish: Remove GOLDFISH dependency Goldfish platform is covered with dust. However the goldfish-rtc had been used as virtualized RTC in QEMU for RISC-V virt hw and MIPS loongson3-virt hw, thus we can drop other parts of goldfish but leave goldfish-rtc here. Signed-off-by: Jiaxun Yang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201114130921.651882-2-jiaxun.yang@flygoat.com --- drivers/rtc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 65ad9d0b47ab1..f784b52381b1a 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1935,7 +1935,6 @@ config RTC_DRV_HID_SENSOR_TIME config RTC_DRV_GOLDFISH tristate "Goldfish Real Time Clock" depends on OF && HAS_IOMEM - depends on GOLDFISH || COMPILE_TEST help Say yes to enable RTC driver for the Goldfish based virtual platform. -- GitLab From 9844484eac2bff09ba3fcdebcf5a41d94df6b6c1 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 14 Nov 2020 21:09:21 +0800 Subject: [PATCH 0161/1894] MAINTAINERS: Set myself as Goldfish RTC maintainer While Goldfish platform is dusted, the RTC driver remains valuable for us. I'm volunteering to maintain goldfish RTC driver onward. Signed-off-by: Jiaxun Yang Signed-off-by: Alexandre Belloni Cc: Miodrag Dinic Link: https://lore.kernel.org/r/20201114130921.651882-3-jiaxun.yang@flygoat.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e73636b75f29d..b576544264e6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1168,7 +1168,7 @@ F: Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.tx F: drivers/irqchip/irq-goldfish-pic.c ANDROID GOLDFISH RTC DRIVER -M: Miodrag Dinic +M: Jiaxun Yang S: Supported F: Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt F: drivers/rtc/rtc-goldfish.c -- GitLab From 767fbb7102c69bedb8dca5a877c4eae4bbf8cf9b Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 8 Nov 2020 23:37:10 +0100 Subject: [PATCH 0162/1894] rtc: rv3032: fix nvram nvmem priv pointer The nvmem priv pointer is set to rv3032 but the rv3032_nvram_write and rv3032_nvram_read expect the regmap pointer. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201108223710.1574331-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3032.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index 3e67f71f42614..14e931d6f9c6a 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -889,7 +889,7 @@ static int rv3032_probe(struct i2c_client *client) if (ret) return ret; - nvmem_cfg.priv = rv3032; + nvmem_cfg.priv = rv3032->regmap; rtc_nvmem_register(rv3032->rtc, &nvmem_cfg); eeprom_cfg.priv = rv3032; rtc_nvmem_register(rv3032->rtc, &eeprom_cfg); -- GitLab From d3a9e4146a6f79f19430bca3f2a4d6ebaaffe36b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:12 -0700 Subject: [PATCH 0163/1894] KVM: VMX: Drop guest CPUID check for VMXE in vmx_set_cr4() Drop vmx_set_cr4()'s somewhat hidden guest_cpuid_has() check on VMXE now that common x86 handles the check by incorporating VMXE into the CR4 reserved bits, i.e. in cr4_guest_rsvd_bits. This fixes a bug where KVM incorrectly rejects KVM_SET_SREGS with CR4.VMXE=1 if it's executed before KVM_SET_CPUID{,2}. Fixes: 5e1746d6205d ("KVM: nVMX: Allow setting the VMXE bit in CR4") Reported-by: Stas Sergeev Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 47b8357b97517..7ebf08db67474 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3129,9 +3129,10 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * must first be able to turn on cr4.VMXE (see handle_vmon()). * So basically the check on whether to allow nested VMX * is here. We operate under the default treatment of SMM, - * so VMX cannot be enabled under SMM. + * so VMX cannot be enabled under SMM. Note, guest CPUID is + * intentionally ignored, it's handled by cr4_guest_rsvd_bits. */ - if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) + if (!nested || is_smm(vcpu)) return 1; } -- GitLab From a447e38a7fadb2e554c3942dda183e55cccd5df0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:13 -0700 Subject: [PATCH 0164/1894] KVM: VMX: Drop explicit 'nested' check from vmx_set_cr4() Drop vmx_set_cr4()'s explicit check on the 'nested' module param now that common x86 handles the check by incorporating VMXE into the CR4 reserved bits, via kvm_cpu_caps. X86_FEATURE_VMX is set in kvm_cpu_caps (by vmx_set_cpu_caps()), if and only if 'nested' is true. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-3-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7ebf08db67474..d9a4526c730c8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3123,18 +3123,13 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } } - if (cr4 & X86_CR4_VMXE) { - /* - * To use VMXON (and later other VMX instructions), a guest - * must first be able to turn on cr4.VMXE (see handle_vmon()). - * So basically the check on whether to allow nested VMX - * is here. We operate under the default treatment of SMM, - * so VMX cannot be enabled under SMM. Note, guest CPUID is - * intentionally ignored, it's handled by cr4_guest_rsvd_bits. - */ - if (!nested || is_smm(vcpu)) - return 1; - } + /* + * We operate under the default treatment of SMM, so VMX cannot be + * enabled under SMM. Note, whether or not VMXE is allowed at all is + * handled by kvm_valid_cr4(). + */ + if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) + return 1; if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) return 1; -- GitLab From 311a06593b9a3944a63ed176b95cb8d857f7c83b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:14 -0700 Subject: [PATCH 0165/1894] KVM: SVM: Drop VMXE check from svm_set_cr4() Drop svm_set_cr4()'s explicit check CR4.VMXE now that common x86 handles the check by incorporating VMXE into the CR4 reserved bits, via kvm_cpu_caps. SVM obviously does not set X86_FEATURE_VMX. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-4-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1e81cfebd4914..e584da54738c6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1687,9 +1687,6 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; - if (cr4 & X86_CR4_VMXE) - return 1; - if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) svm_flush_tlb(vcpu); -- GitLab From c2fe3cd4604ac87c587db05d41843d667dc43815 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:15 -0700 Subject: [PATCH 0166/1894] KVM: x86: Move vendor CR4 validity check to dedicated kvm_x86_ops hook Split out VMX's checks on CR4.VMXE to a dedicated hook, .is_valid_cr4(), and invoke the new hook from kvm_valid_cr4(). This fixes an issue where KVM_SET_SREGS would return success while failing to actually set CR4. Fixing the issue by explicitly checking kvm_x86_ops.set_cr4()'s return in __set_sregs() is not a viable option as KVM has already stuffed a variety of vCPU state. Note, kvm_valid_cr4() and is_valid_cr4() have different return types and inverted semantics. This will be remedied in a future patch. Fixes: 5e1746d6205d ("KVM: nVMX: Allow setting the VMXE bit in CR4") Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-5-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/svm/svm.c | 9 +++++++-- arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 31 ++++++++++++++++++------------- arch/x86/kvm/vmx/vmx.h | 2 +- arch/x86/kvm/x86.c | 6 ++++-- 7 files changed, 34 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 324ddd7fd0aa4..5bb0a2bbd9c40 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1115,7 +1115,8 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e584da54738c6..8c858f80f3992 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1682,7 +1682,12 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) update_cr0_intercept(svm); } -int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + return true; +} + +void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; @@ -1696,7 +1701,6 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); - return 0; } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -4212,6 +4216,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .set_cr0 = svm_set_cr0, + .is_valid_cr4 = svm_is_valid_cr4, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, .get_idt = svm_get_idt, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1d853fe4c778b..eb6fbafbe36c2 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -358,7 +358,7 @@ void svm_vcpu_free_msrpm(u32 *msrpm); int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void svm_flush_tlb(struct kvm_vcpu *vcpu); void disable_nmi_singlestep(struct vcpu_svm *svm); bool svm_smi_blocked(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 89af692deb7ef..efebd849b3c9b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4814,7 +4814,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) /* * The Intel VMX Instruction Reference lists a bunch of bits that are * prerequisite to running VMXON, most notably cr4.VMXE must be set to - * 1 (see vmx_set_cr4() for when we allow the guest to set this). + * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this). * Otherwise, we should fail with #UD. But most faulting conditions * have already been checked by hardware, prior to the VM-exit for * VMXON. We do test guest cr4.VMXE because processor CR4 always has diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d9a4526c730c8..3327964a1e2eb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3095,7 +3095,23 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd, vmcs_writel(GUEST_CR3, guest_cr3); } -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + /* + * We operate under the default treatment of SMM, so VMX cannot be + * enabled under SMM. Note, whether or not VMXE is allowed at all is + * handled by kvm_valid_cr4(). + */ + if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) + return false; + + if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) + return false; + + return true; +} + +void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { struct vcpu_vmx *vmx = to_vmx(vcpu); /* @@ -3123,17 +3139,6 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } } - /* - * We operate under the default treatment of SMM, so VMX cannot be - * enabled under SMM. Note, whether or not VMXE is allowed at all is - * handled by kvm_valid_cr4(). - */ - if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) - return 1; - - if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) - return 1; - vcpu->arch.cr4 = cr4; kvm_register_mark_available(vcpu, VCPU_EXREG_CR4); @@ -3164,7 +3169,6 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); - return 0; } void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) @@ -7612,6 +7616,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .set_cr0 = vmx_set_cr0, + .is_valid_cr4 = vmx_is_valid_cr4, .set_cr4 = vmx_set_cr4, .set_efer = vmx_set_efer, .get_idt = vmx_get_idt, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f6f66e5c65104..9d3a557949ac2 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -321,7 +321,7 @@ u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer); void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void set_cr4_guest_host_mask(struct vcpu_vmx *vmx); void ept_save_pdptrs(struct kvm_vcpu *vcpu); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 078a39d489fe1..8c8205cb57bca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -972,6 +972,9 @@ int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) return -EINVAL; + if (!kvm_x86_ops.is_valid_cr4(vcpu, cr4)) + return -EINVAL; + return 0; } EXPORT_SYMBOL_GPL(kvm_valid_cr4); @@ -1006,8 +1009,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } - if (kvm_x86_ops.set_cr4(vcpu, cr4)) - return 1; + kvm_x86_ops.set_cr4(vcpu, cr4); if (((cr4 ^ old_cr4) & mmu_role_bits) || (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) -- GitLab From ee69c92bac61f4379e97f40b259a1c1257e5987f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:16 -0700 Subject: [PATCH 0167/1894] KVM: x86: Return bool instead of int for CR4 and SREGS validity checks Rework the common CR4 and SREGS checks to return a bool instead of an int, i.e. true/false instead of 0/-EINVAL, and add "is" to the name to clarify the polarity of the return value (which is effectively inverted by this change). No functional changed intended. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-6-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 28 ++++++++++++---------------- arch/x86/kvm/x86.h | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9e4c226dbf7d9..b0f37183e8f55 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -254,7 +254,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK)) return false; } - if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) + if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) return false; return nested_vmcb_check_controls(&vmcb12->control); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3327964a1e2eb..39ff7600b0aff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3100,7 +3100,7 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) /* * We operate under the default treatment of SMM, so VMX cannot be * enabled under SMM. Note, whether or not VMXE is allowed at all is - * handled by kvm_valid_cr4(). + * handled by kvm_is_valid_cr4(). */ if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) return false; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c8205cb57bca..2db86702cac49 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -964,20 +964,17 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & cr4_reserved_bits) - return -EINVAL; + return false; if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) - return -EINVAL; - - if (!kvm_x86_ops.is_valid_cr4(vcpu, cr4)) - return -EINVAL; + return false; - return 0; + return kvm_x86_ops.is_valid_cr4(vcpu, cr4); } -EXPORT_SYMBOL_GPL(kvm_valid_cr4); +EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -986,7 +983,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) X86_CR4_SMEP; unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; - if (kvm_valid_cr4(vcpu, cr4)) + if (!kvm_is_valid_cr4(vcpu, cr4)) return 1; if (is_long_mode(vcpu)) { @@ -9535,7 +9532,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); -static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* @@ -9543,19 +9540,18 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) * 64-bit mode (though maybe in a 32-bit code segment). * CR4.PAE and EFER.LMA must be set. */ - if (!(sregs->cr4 & X86_CR4_PAE) - || !(sregs->efer & EFER_LMA)) - return -EINVAL; + if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) + return false; } else { /* * Not in 64-bit mode: EFER.LMA is clear and the code * segment cannot be 64-bit. */ if (sregs->efer & EFER_LMA || sregs->cs.l) - return -EINVAL; + return false; } - return kvm_valid_cr4(vcpu, sregs->cr4); + return kvm_is_valid_cr4(vcpu, sregs->cr4); } static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) @@ -9567,7 +9563,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) struct desc_ptr dt; int ret = -EINVAL; - if (kvm_valid_sregs(vcpu, sregs)) + if (!kvm_is_valid_sregs(vcpu, sregs)) goto out; apic_base_msr.data = sregs->apic_base; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e7ca622a468f5..764c967a19933 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -369,7 +369,7 @@ static inline bool kvm_dr6_valid(u64 data) void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); int kvm_spec_ctrl_test_value(u64 value); -int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu); int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, struct x86_exception *e); -- GitLab From 7a873e4555679a0e749422db071c142b57f80be9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:17 -0700 Subject: [PATCH 0168/1894] KVM: selftests: Verify supported CR4 bits can be set before KVM_SET_CPUID2 Extend the KVM_SET_SREGS test to verify that all supported CR4 bits, as enumerated by KVM, can be set before KVM_SET_CPUID2, i.e. without first defining the vCPU model. KVM is supposed to skip guest CPUID checks when host userspace is stuffing guest state. Check the inverse as well, i.e. that KVM rejects KVM_SET_REGS if CR4 has one or more unsupported bits set. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-7-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 17 ++++ .../selftests/kvm/include/x86_64/vmx.h | 4 - .../selftests/kvm/x86_64/set_sregs_test.c | 92 ++++++++++++++++++- 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8e61340b39111..90cd5984751b7 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -27,6 +27,7 @@ #define X86_CR4_OSFXSR (1ul << 9) #define X86_CR4_OSXMMEXCPT (1ul << 10) #define X86_CR4_UMIP (1ul << 11) +#define X86_CR4_LA57 (1ul << 12) #define X86_CR4_VMXE (1ul << 13) #define X86_CR4_SMXE (1ul << 14) #define X86_CR4_FSGSBASE (1ul << 16) @@ -36,6 +37,22 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +/* CPUID.1.ECX */ +#define CPUID_VMX (1ul << 5) +#define CPUID_SMX (1ul << 6) +#define CPUID_PCID (1ul << 17) +#define CPUID_XSAVE (1ul << 26) + +/* CPUID.7.EBX */ +#define CPUID_FSGSBASE (1ul << 0) +#define CPUID_SMEP (1ul << 7) +#define CPUID_SMAP (1ul << 20) + +/* CPUID.7.ECX */ +#define CPUID_UMIP (1ul << 2) +#define CPUID_PKU (1ul << 3) +#define CPUID_LA57 (1ul << 16) + #define UNEXPECTED_VECTOR_PORT 0xfff0u /* General Registers in 64-Bit Mode */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index e78d7e26ba611..65eb1079a161e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -11,10 +11,6 @@ #include #include "processor.h" -#define CPUID_VMX_BIT 5 - -#define CPUID_VMX (1 << 5) - /* * Definitions of Primary Processor-Based VM-Execution Controls. */ diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 9f7656184f31e..318be0bf77ab6 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -24,16 +24,106 @@ #define VCPU_ID 5 +static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig, + uint64_t feature_bit) +{ + struct kvm_sregs sregs; + int rc; + + /* Skip the sub-test, the feature is supported. */ + if (orig->cr4 & feature_bit) + return; + + memcpy(&sregs, orig, sizeof(sregs)); + sregs.cr4 |= feature_bit; + + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit); + + /* Sanity check that KVM didn't change anything. */ + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs"); +} + +static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm) +{ + struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7; + uint64_t cr4; + + cpuid_1 = kvm_get_supported_cpuid_entry(1); + cpuid_7 = kvm_get_supported_cpuid_entry(7); + + cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | + X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | + X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; + if (cpuid_7->ecx & CPUID_UMIP) + cr4 |= X86_CR4_UMIP; + if (cpuid_7->ecx & CPUID_LA57) + cr4 |= X86_CR4_LA57; + if (cpuid_1->ecx & CPUID_VMX) + cr4 |= X86_CR4_VMXE; + if (cpuid_1->ecx & CPUID_SMX) + cr4 |= X86_CR4_SMXE; + if (cpuid_7->ebx & CPUID_FSGSBASE) + cr4 |= X86_CR4_FSGSBASE; + if (cpuid_1->ecx & CPUID_PCID) + cr4 |= X86_CR4_PCIDE; + if (cpuid_1->ecx & CPUID_XSAVE) + cr4 |= X86_CR4_OSXSAVE; + if (cpuid_7->ebx & CPUID_SMEP) + cr4 |= X86_CR4_SMEP; + if (cpuid_7->ebx & CPUID_SMAP) + cr4 |= X86_CR4_SMAP; + if (cpuid_7->ecx & CPUID_PKU) + cr4 |= X86_CR4_PKE; + + return cr4; +} + int main(int argc, char *argv[]) { struct kvm_sregs sregs; struct kvm_vm *vm; + uint64_t cr4; int rc; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - /* Create VM */ + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify all supported CR4 bits can be set prior to defining + * the vCPU model, i.e. without doing KVM_SET_CPUID2. + */ + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm_vcpu_add(vm, VCPU_ID); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + + sregs.cr4 |= calc_cr4_feature_bits(vm); + cr4 = sregs.cr4; + + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", + sregs.cr4, cr4); + + /* Verify all unsupported features are rejected by KVM. */ + test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP); + test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57); + test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP); + test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP); + test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE); + kvm_vm_free(vm); + + /* Create a "real" VM and verify APIC_BASE can be set. */ vm = vm_create_default(VCPU_ID, 0, NULL); vcpu_sregs_get(vm, VCPU_ID, &sregs); -- GitLab From 1c96dcceaeb3a99aaf0d548eef2223e0b02a7e40 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Nov 2020 11:20:49 -0500 Subject: [PATCH 0169/1894] KVM: x86: fix apic_accept_events vs check_nested_events vmx_apic_init_signal_blocked is buggy in that it returns true even in VMX non-root mode. In non-root mode, however, INITs are not latched, they just cause a vmexit. Previously, KVM was waiting for them to be processed when kvm_apic_accept_events and in the meanwhile it ate the SIPIs that the processor received. However, in order to implement the wait-for-SIPI activity state, KVM will have to process KVM_APIC_SIPI in vmx_check_nested_events, and it will not be possible anymore to disregard SIPIs in non-root mode as the code is currently doing. By calling kvm_x86_ops.nested_ops->check_events, we can force a vmexit (with the side-effect of latching INITs) before incorrectly injecting an INIT or SIPI in a guest, and therefore vmx_apic_init_signal_blocked can do the right thing. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 30 ++++++++++++++++++++++++++---- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 105e7859d1f23..e3ee597ff5404 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2843,14 +2843,35 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; u8 sipi_vector; + int r; unsigned long pe; - if (!lapic_in_kernel(vcpu) || !apic->pending_events) + if (!lapic_in_kernel(vcpu)) + return; + + /* + * Read pending events before calling the check_events + * callback. + */ + pe = smp_load_acquire(&apic->pending_events); + if (!pe) return; + if (is_guest_mode(vcpu)) { + r = kvm_x86_ops.nested_ops->check_events(vcpu); + if (r < 0) + return; + /* + * If an event has happened and caused a vmexit, + * we know INITs are latched and therefore + * we will not incorrectly deliver an APIC + * event instead of a vmexit. + */ + } + /* * INITs are latched while CPU is in specific states - * (SMM, VMX non-root mode, SVM with GIF=0). + * (SMM, VMX root mode, SVM with GIF=0). * Because a CPU cannot be in these states immediately * after it has processed an INIT signal (and thus in * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs @@ -2858,13 +2879,13 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_latch_init(vcpu)) { WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); - if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) + if (test_bit(KVM_APIC_SIPI, &pe)) clear_bit(KVM_APIC_SIPI, &apic->pending_events); return; } - pe = xchg(&apic->pending_events, 0); if (test_bit(KVM_APIC_INIT, &pe)) { + clear_bit(KVM_APIC_INIT, &apic->pending_events); kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -2873,6 +2894,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) } if (test_bit(KVM_APIC_SIPI, &pe) && vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + clear_bit(KVM_APIC_SIPI, &apic->pending_events); /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 39ff7600b0aff..fd70bdf638441 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7558,7 +7558,7 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { - return to_vmx(vcpu)->nested.vmxon; + return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu); } static void vmx_migrate_timers(struct kvm_vcpu *vcpu) -- GitLab From bf0cd88ce363a2de3684baaa48d3f194acdc516c Mon Sep 17 00:00:00 2001 From: Yadong Qi Date: Fri, 6 Nov 2020 14:51:22 +0800 Subject: [PATCH 0170/1894] KVM: x86: emulate wait-for-SIPI and SIPI-VMExit Background: We have a lightweight HV, it needs INIT-VMExit and SIPI-VMExit to wake-up APs for guests since it do not monitor the Local APIC. But currently virtual wait-for-SIPI(WFS) state is not supported in nVMX, so when running on top of KVM, the L1 HV cannot receive the INIT-VMExit and SIPI-VMExit which cause the L2 guest cannot wake up the APs. According to Intel SDM Chapter 25.2 Other Causes of VM Exits, SIPIs cause VM exits when a logical processor is in wait-for-SIPI state. In this patch: 1. introduce SIPI exit reason, 2. introduce wait-for-SIPI state for nVMX, 3. advertise wait-for-SIPI support to guest. When L1 hypervisor is not monitoring Local APIC, L0 need to emulate INIT-VMExit and SIPI-VMExit to L1 to emulate INIT-SIPI-SIPI for L2. L2 LAPIC write would be traped by L0 Hypervisor(KVM), L0 should emulate the INIT/SIPI vmexit to L1 hypervisor to set proper state for L2's vcpu state. Handle procdure: Source vCPU: L2 write LAPIC.ICR(INIT). L0 trap LAPIC.ICR write(INIT): inject a latched INIT event to target vCPU. Target vCPU: L0 emulate an INIT VMExit to L1 if is guest mode. L1 set guest VMCS, guest_activity_state=WAIT_SIPI, vmresume. L0 set vcpu.mp_state to INIT_RECEIVED if (vmcs12.guest_activity_state == WAIT_SIPI). Source vCPU: L2 write LAPIC.ICR(SIPI). L0 trap LAPIC.ICR write(INIT): inject a latched SIPI event to traget vCPU. Target vCPU: L0 emulate an SIPI VMExit to L1 if (vcpu.mp_state == INIT_RECEIVED). L1 set CS:IP, guest_activity_state=ACTIVE, vmresume. L0 resume to L2. L2 start-up. Signed-off-by: Yadong Qi Message-Id: <20200922052343.84388-1-yadong.qi@intel.com> Signed-off-by: Paolo Bonzini Message-Id: <20201106065122.403183-1-yadong.qi@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/include/uapi/asm/vmx.h | 2 ++ arch/x86/kvm/vmx/nested.c | 55 ++++++++++++++++++++++++--------- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f8ba5289ecb01..38ca445a84295 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -113,6 +113,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 #define VMX_MISC_ZERO_LEN_INS 0x40000000 #define VMX_MISC_MSR_LIST_MULTIPLIER 512 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d51..ada955c5ebb60 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -32,6 +32,7 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 +#define EXIT_REASON_SIPI_SIGNAL 4 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -94,6 +95,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index efebd849b3c9b..e2f26564a12de 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2952,7 +2952,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) { if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)) + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT && + vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI)) return -EINVAL; return 0; @@ -3559,19 +3560,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) */ nested_cache_shadow_vmcs12(vcpu, vmcs12); - /* - * If we're entering a halted L2 vcpu and the L2 vcpu won't be - * awakened by event injection or by an NMI-window VM-exit or - * by an interrupt-window VM-exit, halt the vcpu. - */ - if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && - !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && - !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) && - !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) && - (vmcs12->guest_rflags & X86_EFLAGS_IF))) { + switch (vmcs12->guest_activity_state) { + case GUEST_ACTIVITY_HLT: + /* + * If we're entering a halted L2 vcpu and the L2 vcpu won't be + * awakened by event injection or by an NMI-window VM-exit or + * by an interrupt-window VM-exit, halt the vcpu. + */ + if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && + !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) && + !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) && + (vmcs12->guest_rflags & X86_EFLAGS_IF))) { + vmx->nested.nested_run_pending = 0; + return kvm_vcpu_halt(vcpu); + } + break; + case GUEST_ACTIVITY_WAIT_SIPI: vmx->nested.nested_run_pending = 0; - return kvm_vcpu_halt(vcpu); + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + break; + default: + break; } + return 1; vmentry_failed: @@ -3797,7 +3808,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return -EBUSY; nested_vmx_update_pending_dbg(vcpu); clear_bit(KVM_APIC_INIT, &apic->pending_events); - nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) + nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + return 0; + } + + if (lapic_in_kernel(vcpu) && + test_bit(KVM_APIC_SIPI, &apic->pending_events)) { + if (block_nested_events) + return -EBUSY; + + clear_bit(KVM_APIC_SIPI, &apic->pending_events); + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0, + apic->sipi_vector & 0xFFUL); return 0; } @@ -4036,6 +4060,8 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; + else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI; else vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; @@ -6483,7 +6509,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) msrs->misc_low |= MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | - VMX_MISC_ACTIVITY_HLT; + VMX_MISC_ACTIVITY_HLT | + VMX_MISC_ACTIVITY_WAIT_SIPI; msrs->misc_high = 0; /* -- GitLab From c4d51a52c67a1e3a0fa3006e5ec21cdc07649cd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 14:39:43 +0000 Subject: [PATCH 0171/1894] sched/wait: Add add_wait_queue_priority() This allows an exclusive wait_queue_entry to be added at the head of the queue, instead of the tail as normal. Thus, it gets to consume events first without allowing non-exclusive waiters to be woken at all. The (first) intended use is for KVM IRQFD, which currently has inconsistent behaviour depending on whether posted interrupts are available or not. If they are, KVM will bypass the eventfd completely and deliver interrupts directly to the appropriate vCPU. If not, events are delivered through the eventfd and userspace will receive them when polling on the eventfd. By using add_wait_queue_priority(), KVM will be able to consistently consume events within the kernel without accidentally exposing them to userspace when they're supposed to be bypassed. This, in turn, means that userspace doesn't have to jump through hoops to avoid listening on the erroneously noisy eventfd and injecting duplicate interrupts. Signed-off-by: David Woodhouse Message-Id: <20201027143944.648769-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/wait.h | 12 +++++++++++- kernel/sched/wait.c | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/linux/wait.h b/include/linux/wait.h index 27fb99cfeb026..fe10e8570a522 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -22,6 +22,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int #define WQ_FLAG_BOOKMARK 0x04 #define WQ_FLAG_CUSTOM 0x08 #define WQ_FLAG_DONE 0x10 +#define WQ_FLAG_PRIORITY 0x20 /* * A single wait-queue entry structure: @@ -164,11 +165,20 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->entry, &wq_head->head); + struct list_head *head = &wq_head->head; + struct wait_queue_entry *wq; + + list_for_each_entry(wq, &wq_head->head, entry) { + if (!(wq->flags & WQ_FLAG_PRIORITY)) + break; + head = &wq->entry; + } + list_add(&wq_entry->entry, head); } /* diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 01f5d3020589d..183cc6ae68a68 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -37,6 +37,17 @@ void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue } EXPORT_SYMBOL(add_wait_queue_exclusive); +void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY; + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL_GPL(add_wait_queue_priority); + void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -57,7 +68,11 @@ EXPORT_SYMBOL(remove_wait_queue); /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve - * number) then we wake all the non-exclusive tasks and one exclusive task. + * number) then we wake that number of exclusive tasks, and potentially all + * the non-exclusive tasks. Normally, exclusive tasks will be at the end of + * the list and any non-exclusive tasks will be woken first. A priority task + * may be at the head of the list, and can consume the event without any other + * tasks being woken. * * There are circumstances in which we can try to wake a task which has already * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns -- GitLab From e8dbf19508a112d125190df77ee0464b7ba56192 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 26 Oct 2020 17:53:25 +0000 Subject: [PATCH 0172/1894] kvm/eventfd: Use priority waitqueue to catch events before userspace As far as I can tell, when we use posted interrupts we silently cut off the events from userspace, if it's listening on the same eventfd that feeds the irqfd. I like that behaviour. Let's do it all the time, even without posted interrupts. It makes it much easier to handle IRQ remapping invalidation without having to constantly add/remove the fd from the userspace poll set. We can just leave userspace polling on it, and the bypass will... well... bypass it. Signed-off-by: David Woodhouse Message-Id: <20201026175325.585623-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c2323c27a28b5..efa8a5ae7a95f 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -191,6 +191,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) struct kvm *kvm = irqfd->kvm; unsigned seq; int idx; + int ret = 0; if (flags & EPOLLIN) { idx = srcu_read_lock(&kvm->irq_srcu); @@ -204,6 +205,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); + ret = 1; } if (flags & EPOLLHUP) { @@ -227,7 +229,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) spin_unlock_irqrestore(&kvm->irqfds.lock, iflags); } - return 0; + return ret; } static void @@ -236,7 +238,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, { struct kvm_kernel_irqfd *irqfd = container_of(pt, struct kvm_kernel_irqfd, pt); - add_wait_queue(wqh, &irqfd->wait); + add_wait_queue_priority(wqh, &irqfd->wait); } /* Must be called under irqfds.lock */ -- GitLab From 28f1326710555bbe666f64452d08f2d7dd657cae Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 13:55:21 +0000 Subject: [PATCH 0173/1894] eventfd: Export eventfd_ctx_do_read() Where events are consumed in the kernel, for example by KVM's irqfd_wakeup() and VFIO's virqfd_wakeup(), they currently lack a mechanism to drain the eventfd's counter. Since the wait queue is already locked while the wakeup functions are invoked, all they really need to do is call eventfd_ctx_do_read(). Add a check for the lock, and export it for them. Signed-off-by: David Woodhouse Message-Id: <20201027135523.646811-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- fs/eventfd.c | 5 ++++- include/linux/eventfd.h | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/eventfd.c b/fs/eventfd.c index df466ef81dddf..e265b6dd4f345 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -182,11 +182,14 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait) return events; } -static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) { + lockdep_assert_held(&ctx->wqh.lock); + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; ctx->count -= *cnt; } +EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); /** * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index dc4fd8a6644dd..fa0a524baed0a 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -41,6 +41,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); +void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); DECLARE_PER_CPU(int, eventfd_wake_count); @@ -82,6 +83,11 @@ static inline bool eventfd_signal_count(void) return false; } +static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + +} + #endif #endif /* _LINUX_EVENTFD_H */ -- GitLab From b1b397aeef8177f4f7bd91a0d5fa708f4752a499 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 13:55:22 +0000 Subject: [PATCH 0174/1894] vfio/virqfd: Drain events from eventfd in virqfd_wakeup() Don't allow the events to accumulate in the eventfd counter, drain them as they are handled. Signed-off-by: David Woodhouse Message-Id: <20201027135523.646811-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini Acked-by: Alex Williamson --- drivers/vfio/virqfd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 997cb5d0a657c..414e98d82b02e 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -46,6 +46,9 @@ static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void __poll_t flags = key_to_poll(key); if (flags & EPOLLIN) { + u64 cnt; + eventfd_ctx_do_read(virqfd->eventfd, &cnt); + /* An event has been signaled, call function */ if ((!virqfd->handler || virqfd->handler(virqfd->opaque, virqfd->data)) && -- GitLab From b59e00dd8cda75fc8303c9f6847ac720b10664e3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 13:55:23 +0000 Subject: [PATCH 0175/1894] kvm/eventfd: Drain events from eventfd in irqfd_wakeup() Don't allow the events to accumulate in the eventfd counter, drain them as they are handled. Signed-off-by: David Woodhouse Message-Id: <20201027135523.646811-4-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index efa8a5ae7a95f..e996989cd580e 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -194,6 +194,9 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) int ret = 0; if (flags & EPOLLIN) { + u64 cnt; + eventfd_ctx_do_read(irqfd->eventfd, &cnt); + idx = srcu_read_lock(&kvm->irq_srcu); do { seq = read_seqcount_begin(&irqfd->irq_entry_sc); -- GitLab From c21d54f0307ff42a346294899107b570b98c47b5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 29 Sep 2020 17:09:43 +0200 Subject: [PATCH 0176/1894] KVM: x86: hyper-v: allow KVM_GET_SUPPORTED_HV_CPUID as a system ioctl KVM_GET_SUPPORTED_HV_CPUID is a vCPU ioctl but its output is now independent from vCPU and in some cases VMMs may want to use it as a system ioctl instead. In particular, QEMU doesn CPU feature expansion before any vCPU gets created so KVM_GET_SUPPORTED_HV_CPUID can't be used. Convert KVM_GET_SUPPORTED_HV_CPUID to 'dual' system/vCPU ioctl with the same meaning. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200929150944.1235688-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 16 ++++++++---- arch/x86/kvm/hyperv.c | 6 ++--- arch/x86/kvm/hyperv.h | 4 +-- arch/x86/kvm/vmx/evmcs.c | 3 +-- arch/x86/kvm/x86.c | 45 ++++++++++++++++++++-------------- include/uapi/linux/kvm.h | 3 ++- 6 files changed, 46 insertions(+), 31 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e00a66d723728..81d54fe76a2d5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4455,9 +4455,9 @@ that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present. 4.118 KVM_GET_SUPPORTED_HV_CPUID -------------------------------- -:Capability: KVM_CAP_HYPERV_CPUID +:Capability: KVM_CAP_HYPERV_CPUID (vcpu), KVM_CAP_SYS_HYPERV_CPUID (system) :Architectures: x86 -:Type: vcpu ioctl +:Type: system ioctl, vcpu ioctl :Parameters: struct kvm_cpuid2 (in/out) :Returns: 0 on success, -1 on error @@ -4502,9 +4502,6 @@ Currently, the following list of CPUID leaves are returned: - HYPERV_CPUID_SYNDBG_INTERFACE - HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES -HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was -enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS). - Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure with the 'nent' field indicating the number of entries in the variable-size array 'entries'. If the number of entries is too low to describe all Hyper-V @@ -4515,6 +4512,15 @@ number of valid entries in the 'entries' array, which is then filled. 'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved, userspace should not expect to get any particular value there. +Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike +system ioctl which exposes all supported feature bits unconditionally, vcpu +version has the following quirks: +- HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED + feature bit are only exposed when Enlightened VMCS was previously enabled + on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS). +- HV_STIMER_DIRECT_MODE_AVAILABLE bit is only exposed with in-kernel LAPIC. + (presumes KVM_CREATE_IRQCHIP has already been called). + 4.119 KVM_ARM_VCPU_FINALIZE --------------------------- diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5c7c4060b45cb..922c69dcca4d9 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1951,8 +1951,8 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); } -int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { uint16_t evmcs_ver = 0; struct kvm_cpuid_entry2 cpuid_entries[] = { @@ -2037,7 +2037,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, * Direct Synthetic timers only make sense with in-kernel * LAPIC */ - if (lapic_in_kernel(vcpu)) + if (!vcpu || lapic_in_kernel(vcpu)) ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; break; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e68c6c2e96490..6d7def2b0aade 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -126,7 +126,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); -int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries); #endif diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index f3199bb02f22d..41f24661af04d 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -326,7 +326,6 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); /* * vmcs_version represents the range of supported Enlightened VMCS * versions: lower 8 bits is the minimal version, higher 8 bits is the @@ -334,7 +333,7 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) * KVM_EVMCS_VERSION. */ if (kvm_cpu_cap_get(X86_FEATURE_VMX) && - vmx->nested.enlightened_vmcs_enabled) + (!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled)) return (KVM_EVMCS_VERSION << 8) | 1; return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2db86702cac49..773cb52cb7758 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3677,6 +3677,27 @@ static inline bool kvm_can_mwait_in_guest(void) boot_cpu_has(X86_FEATURE_ARAT); } +static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid2 __user *cpuid_arg) +{ + struct kvm_cpuid2 cpuid; + int r; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) + return r; + + r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries); + if (r) + return r; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) + return r; + + return 0; +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r = 0; @@ -3713,6 +3734,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_TLBFLUSH: case KVM_CAP_HYPERV_SEND_IPI: case KVM_CAP_HYPERV_CPUID: + case KVM_CAP_SYS_HYPERV_CPUID: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -3898,6 +3920,9 @@ long kvm_arch_dev_ioctl(struct file *filp, case KVM_GET_MSRS: r = msr_io(NULL, argp, do_get_msr_feature, 1); break; + case KVM_GET_SUPPORTED_HV_CPUID: + r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp); + break; default: r = -EINVAL; break; @@ -4974,25 +4999,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } - case KVM_GET_SUPPORTED_HV_CPUID: { - struct kvm_cpuid2 __user *cpuid_arg = argp; - struct kvm_cpuid2 cpuid; - - r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) - goto out; - - r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid, - cpuid_arg->entries); - if (r) - goto out; - - r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) - goto out; - r = 0; + case KVM_GET_SUPPORTED_HV_CPUID: + r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp); break; - } default: r = -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ca41220b40b8b..204afbe1240e3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 #ifdef KVM_CAP_IRQ_ROUTING @@ -1511,7 +1512,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ -- GitLab From 8b460692fee46a47cebd66d70df88dc9aa6d6b8b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 29 Sep 2020 17:09:44 +0200 Subject: [PATCH 0177/1894] KVM: selftests: test KVM_GET_SUPPORTED_HV_CPUID as a system ioctl KVM_GET_SUPPORTED_HV_CPUID is now supported as both vCPU and VM ioctl, test that. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200929150944.1235688-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util.h | 2 + tools/testing/selftests/kvm/lib/kvm_util.c | 26 ++++++ .../selftests/kvm/x86_64/hyperv_cpuid.c | 87 +++++++++++-------- 3 files changed, 77 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 7d29aa7869596..b2c1364c0402f 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -114,6 +114,8 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 126c6727a6b09..ef2114e1b7ad7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1582,6 +1582,32 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) cmd, ret, errno, strerror(errno)); } +/* + * KVM system ioctl + * + * Input Args: + * vm - Virtual Machine + * cmd - Ioctl number + * arg - Argument to pass to the ioctl + * + * Return: None + * + * Issues an arbitrary ioctl on a KVM fd. + */ +void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + int ret; + + ret = ioctl(vm->kvm_fd, cmd, arg); + TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)", + cmd, ret, errno, strerror(errno)); +} + +int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + return ioctl(vm->kvm_fd, cmd, arg); +} + /* * VM Dump * diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 745b708c2d3bb..88a595b7fbdd0 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -46,19 +46,19 @@ static bool smt_possible(void) } static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_enabled) + bool evmcs_expected) { int i; int nent = 9; u32 test_val; - if (evmcs_enabled) + if (evmcs_expected) nent += 1; /* 0x4000000A */ TEST_ASSERT(hv_cpuid_entries->nent == nent, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" " with evmcs=%d (returned %d)", - nent, evmcs_enabled, hv_cpuid_entries->nent); + nent, evmcs_expected, hv_cpuid_entries->nent); for (i = 0; i < hv_cpuid_entries->nent; i++) { struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; @@ -68,7 +68,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, "function %x is our of supported range", entry->function); - TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A), + TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A), "0x4000000A leaf should not be reported"); TEST_ASSERT(entry->index == 0, @@ -87,7 +87,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(entry->eax == test_val, "Wrong max leaf report in 0x40000000.EAX: %x" " (evmcs=%d)", - entry->eax, evmcs_enabled + entry->eax, evmcs_expected ); break; case 0x40000004: @@ -110,20 +110,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, } -void test_hv_cpuid_e2big(struct kvm_vm *vm) +void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; - ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + if (!system) + ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + else + ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); TEST_ASSERT(ret == -1 && errno == E2BIG, - "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" - " it should have: %d %d", ret, errno); + "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" + " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno); } -struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) +struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system) { int nent = 20; /* should be enough */ static struct kvm_cpuid2 *cpuid; @@ -137,7 +140,10 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) cpuid->nent = nent; - vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + if (!system) + vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + else + kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid); return cpuid; } @@ -146,45 +152,50 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) int main(int argc, char *argv[]) { struct kvm_vm *vm; - int rv, stage; struct kvm_cpuid2 *hv_cpuid_entries; - bool evmcs_enabled; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID); - if (!rv) { + if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) { print_skip("KVM_CAP_HYPERV_CPUID not supported"); exit(KSFT_SKIP); } - for (stage = 0; stage < 3; stage++) { - evmcs_enabled = false; + vm = vm_create_default(VCPU_ID, 0, guest_code); - vm = vm_create_default(VCPU_ID, 0, guest_code); - switch (stage) { - case 0: - test_hv_cpuid_e2big(vm); - continue; - case 1: - break; - case 2: - if (!nested_vmx_supported() || - !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { - print_skip("Enlightened VMCS is unsupported"); - continue; - } - vcpu_enable_evmcs(vm, VCPU_ID); - evmcs_enabled = true; - break; - } + /* Test vCPU ioctl version */ + test_hv_cpuid_e2big(vm, false); + + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false); + test_hv_cpuid(hv_cpuid_entries, false); + free(hv_cpuid_entries); - hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); - test_hv_cpuid(hv_cpuid_entries, evmcs_enabled); - free(hv_cpuid_entries); - kvm_vm_free(vm); + if (!nested_vmx_supported() || + !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { + print_skip("Enlightened VMCS is unsupported"); + goto do_sys; } + vcpu_enable_evmcs(vm, VCPU_ID); + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false); + test_hv_cpuid(hv_cpuid_entries, true); + free(hv_cpuid_entries); + +do_sys: + /* Test system ioctl version */ + if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) { + print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported"); + goto out; + } + + test_hv_cpuid_e2big(vm, true); + + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true); + test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported()); + free(hv_cpuid_entries); + +out: + kvm_vm_free(vm); return 0; } -- GitLab From ff5a983cbb3746d371de2cc95ea7dcfd982b4084 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:20:33 -0400 Subject: [PATCH 0178/1894] KVM: X86: Don't track dirty for KVM_SET_[TSS_ADDR|IDENTITY_MAP_ADDR] Originally, we have three code paths that can dirty a page without vcpu context for X86: - init_rmode_identity_map - init_rmode_tss - kvmgt_rw_gpa init_rmode_identity_map and init_rmode_tss will be setup on destination VM no matter what (and the guest cannot even see them), so it does not make sense to track them at all. To do this, allow __x86_set_memory_region() to return the userspace address that just allocated to the caller. Then in both of the functions we directly write to the userspace address instead of calling kvm_write_*() APIs. Another trivial change is that we don't need to explicitly clear the identity page table root in init_rmode_identity_map() because no matter what we'll write to the whole page with 4M huge page entries. Suggested-by: Paolo Bonzini Reviewed-by: Sean Christopherson Signed-off-by: Peter Xu Message-Id: <20201001012044.5151-4-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +- arch/x86/kvm/svm/avic.c | 9 ++-- arch/x86/kvm/vmx/vmx.c | 88 ++++++++++++++++----------------- arch/x86/kvm/x86.c | 37 +++++++++++--- 4 files changed, 81 insertions(+), 56 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5bb0a2bbd9c40..69e94aa716e94 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1696,7 +1696,8 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); +void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, + u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 8c550999ace0c..0ef84d57b72ea 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -233,7 +233,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, */ static int avic_update_access_page(struct kvm *kvm, bool activate) { - int ret = 0; + void __user *ret; + int r = 0; mutex_lock(&kvm->slots_lock); /* @@ -249,13 +250,15 @@ static int avic_update_access_page(struct kvm *kvm, bool activate) APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, APIC_DEFAULT_PHYS_BASE, activate ? PAGE_SIZE : 0); - if (ret) + if (IS_ERR(ret)) { + r = PTR_ERR(ret); goto out; + } kvm->arch.apic_access_page_done = activate; out: mutex_unlock(&kvm->slots_lock); - return ret; + return r; } static int avic_init_backing_page(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fd70bdf638441..46b32aa43811d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3515,42 +3515,33 @@ bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu) return true; } -static int init_rmode_tss(struct kvm *kvm) +static int init_rmode_tss(struct kvm *kvm, void __user *ua) { - gfn_t fn; - u16 data = 0; - int idx, r; + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); + u16 data; + int i; + + for (i = 0; i < 3; i++) { + if (__copy_to_user(ua + PAGE_SIZE * i, zero_page, PAGE_SIZE)) + return -EFAULT; + } - idx = srcu_read_lock(&kvm->srcu); - fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; - r = kvm_write_guest_page(kvm, fn++, &data, - TSS_IOPB_BASE_OFFSET, sizeof(u16)); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; + if (__copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16))) + return -EFAULT; + data = ~0; - r = kvm_write_guest_page(kvm, fn, &data, - RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, - sizeof(u8)); -out: - srcu_read_unlock(&kvm->srcu, idx); - return r; + if (__copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8))) + return -EFAULT; + + return 0; } static int init_rmode_identity_map(struct kvm *kvm) { struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); int i, r = 0; - kvm_pfn_t identity_map_pfn; + void __user *uaddr; u32 tmp; /* Protect kvm_vmx->ept_identity_pagetable_done. */ @@ -3561,24 +3552,24 @@ static int init_rmode_identity_map(struct kvm *kvm) if (!kvm_vmx->ept_identity_map_addr) kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm_vmx->ept_identity_map_addr, PAGE_SIZE); - if (r < 0) + uaddr = __x86_set_memory_region(kvm, + IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + kvm_vmx->ept_identity_map_addr, + PAGE_SIZE); + if (IS_ERR(uaddr)) { + r = PTR_ERR(uaddr); goto out; + } - r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); - if (r < 0) - goto out; /* Set up identity-mapping pagetable for EPT in real mode */ for (i = 0; i < PT32_ENT_PER_PAGE; i++) { tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); - r = kvm_write_guest_page(kvm, identity_map_pfn, - &tmp, i * sizeof(tmp), sizeof(tmp)); - if (r < 0) + if (__copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp))) { + r = -EFAULT; goto out; + } } kvm_vmx->ept_identity_pagetable_done = true; @@ -3605,19 +3596,22 @@ static void seg_setup(int seg) static int alloc_apic_access_page(struct kvm *kvm) { struct page *page; - int r = 0; + void __user *hva; + int ret = 0; mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) goto out; - r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); - if (r) + hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); + if (IS_ERR(hva)) { + ret = PTR_ERR(hva); goto out; + } page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (is_error_page(page)) { - r = -EFAULT; + ret = -EFAULT; goto out; } @@ -3629,7 +3623,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm->arch.apic_access_page_done = true; out: mutex_unlock(&kvm->slots_lock); - return r; + return ret; } int allocate_vpid(void) @@ -4638,7 +4632,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { - int ret; + void __user *ret; if (enable_unrestricted_guest) return 0; @@ -4648,10 +4642,12 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) PAGE_SIZE * 3); mutex_unlock(&kvm->slots_lock); - if (ret) - return ret; + if (IS_ERR(ret)) + return PTR_ERR(ret); + to_kvm_vmx(kvm)->tss_addr = addr; - return init_rmode_tss(kvm); + + return init_rmode_tss(kvm, ret); } static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 773cb52cb7758..b4ac726526f8a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10354,7 +10354,32 @@ void kvm_arch_sync_events(struct kvm *kvm) kvm_free_pit(kvm); } -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) +#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) + +/** + * __x86_set_memory_region: Setup KVM internal memory slot + * + * @kvm: the kvm pointer to the VM. + * @id: the slot ID to setup. + * @gpa: the GPA to install the slot (unused when @size == 0). + * @size: the size of the slot. Set to zero to uninstall a slot. + * + * This function helps to setup a KVM internal memory slot. Specify + * @size > 0 to install a new slot, while @size == 0 to uninstall a + * slot. The return code can be one of the following: + * + * HVA: on success (uninstall will return a bogus HVA) + * -errno: on error + * + * The caller should always use IS_ERR() to check the return value + * before use. Note, the KVM internal memory slots are guaranteed to + * remain valid and unchanged until the VM is destroyed, i.e., the + * GPA->HVA translation will not change. However, the HVA is a user + * address, i.e. its accessibility is not guaranteed, and must be + * accessed via __copy_{to,from}_user(). + */ +void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, + u32 size) { int i, r; unsigned long hva, old_npages; @@ -10363,12 +10388,12 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) /* Called with kvm->slots_lock held. */ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) - return -EINVAL; + return ERR_PTR_USR(-EINVAL); slot = id_to_memslot(slots, id); if (size) { if (slot && slot->npages) - return -EEXIST; + return ERR_PTR_USR(-EEXIST); /* * MAP_SHARED to prevent internal slot pages from being moved @@ -10377,7 +10402,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); if (IS_ERR((void *)hva)) - return PTR_ERR((void *)hva); + return (void __user *)hva; } else { if (!slot || !slot->npages) return 0; @@ -10396,13 +10421,13 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) m.memory_size = size; r = __kvm_set_memory_region(kvm, &m); if (r < 0) - return r; + return ERR_PTR_USR(r); } if (!size) vm_munmap(hva, old_npages * PAGE_SIZE); - return 0; + return (void __user *)hva; } EXPORT_SYMBOL_GPL(__x86_set_memory_region); -- GitLab From 2f5414423ef577e9e8bdb227f32d0abdd34e4274 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Nov 2020 05:25:09 -0500 Subject: [PATCH 0179/1894] KVM: remove kvm_clear_guest_page kvm_clear_guest_page is not used anymore after "KVM: X86: Don't track dirty for KVM_SET_[TSS_ADDR|IDENTITY_MAP_ADDR]", except from kvm_clear_guest. We can just inline it in its sole user. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 11 ++--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7f2e2a09ebbd9..66a4324f329d6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -792,7 +792,6 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, offset_in_page(__gpa), v); \ }) -int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2541a17ff1c45..1c75145798616 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2616,23 +2616,16 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } EXPORT_SYMBOL_GPL(kvm_read_guest_cached); -int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) -{ - const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); - - return kvm_write_guest_page(kvm, gfn, zero_page, offset, len); -} -EXPORT_SYMBOL_GPL(kvm_clear_guest_page); - int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) { + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); gfn_t gfn = gpa >> PAGE_SHIFT; int seg; int offset = offset_in_page(gpa); int ret; while ((seg = next_segment(len, offset)) != 0) { - ret = kvm_clear_guest_page(kvm, gfn, offset, seg); + ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len); if (ret < 0) return ret; offset = 0; -- GitLab From 28bd726aa404c0da8fd6852fe69bb4538a103b71 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:20:34 -0400 Subject: [PATCH 0180/1894] KVM: Pass in kvm pointer into mark_page_dirty_in_slot() The context will be needed to implement the kvm dirty ring. Signed-off-by: Peter Xu Message-Id: <20201001012044.5151-5-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 30 +++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 66a4324f329d6..ca7c1459a8e30 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -797,7 +797,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1c75145798616..68598fdba2268 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2196,7 +2196,8 @@ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) } EXPORT_SYMBOL_GPL(kvm_vcpu_map); -static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, +static void __kvm_unmap_gfn(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_host_map *map, struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) @@ -2221,7 +2222,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, #endif if (dirty) - mark_page_dirty_in_slot(memslot, map->gfn); + mark_page_dirty_in_slot(kvm, memslot, map->gfn); if (cache) cache->dirty |= dirty; @@ -2235,7 +2236,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) { - __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, + __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map, cache, dirty, atomic); return 0; } @@ -2243,8 +2244,8 @@ EXPORT_SYMBOL_GPL(kvm_unmap_gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) { - __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL, - dirty, false); + __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), + map, NULL, dirty, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); @@ -2418,7 +2419,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, } EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic); -static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn, +static int __kvm_write_guest_page(struct kvm *kvm, + struct kvm_memory_slot *memslot, gfn_t gfn, const void *data, int offset, int len) { int r; @@ -2430,7 +2432,7 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn, r = __copy_to_user((void __user *)addr + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(memslot, gfn); + mark_page_dirty_in_slot(kvm, memslot, gfn); return 0; } @@ -2439,7 +2441,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - return __kvm_write_guest_page(slot, gfn, data, offset, len); + return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_write_guest_page); @@ -2448,7 +2450,7 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - return __kvm_write_guest_page(slot, gfn, data, offset, len); + return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); @@ -2567,7 +2569,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); return 0; } @@ -2636,7 +2638,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } EXPORT_SYMBOL_GPL(kvm_clear_guest); -void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn) +void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, + gfn_t gfn) { if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; @@ -2651,7 +2655,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *memslot; memslot = gfn_to_memslot(kvm, gfn); - mark_page_dirty_in_slot(memslot, gfn); + mark_page_dirty_in_slot(kvm, memslot, gfn); } EXPORT_SYMBOL_GPL(mark_page_dirty); @@ -2660,7 +2664,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) struct kvm_memory_slot *memslot; memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - mark_page_dirty_in_slot(memslot, gfn); + mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); -- GitLab From fb04a1eddb1a65b6588a021bdc132270d5ae48bb Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:22 -0400 Subject: [PATCH 0181/1894] KVM: X86: Implement ring-based dirty memory tracking This patch is heavily based on previous work from Lei Cao and Paolo Bonzini . [1] KVM currently uses large bitmaps to track dirty memory. These bitmaps are copied to userspace when userspace queries KVM for its dirty page information. The use of bitmaps is mostly sufficient for live migration, as large parts of memory are be dirtied from one log-dirty pass to another. However, in a checkpointing system, the number of dirty pages is small and in fact it is often bounded---the VM is paused when it has dirtied a pre-defined number of pages. Traversing a large, sparsely populated bitmap to find set bits is time-consuming, as is copying the bitmap to user-space. A similar issue will be there for live migration when the guest memory is huge while the page dirty procedure is trivial. In that case for each dirty sync we need to pull the whole dirty bitmap to userspace and analyse every bit even if it's mostly zeros. The preferred data structure for above scenarios is a dense list of guest frame numbers (GFN). This patch series stores the dirty list in kernel memory that can be memory mapped into userspace to allow speedy harvesting. This patch enables dirty ring for X86 only. However it should be easily extended to other archs as well. [1] https://patchwork.kernel.org/patch/10471409/ Signed-off-by: Lei Cao Signed-off-by: Paolo Bonzini Signed-off-by: Peter Xu Message-Id: <20201001012222.5767-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 93 +++++++++++++++ arch/x86/include/asm/kvm_host.h | 3 + arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/Makefile | 3 +- arch/x86/kvm/mmu/mmu.c | 8 ++ arch/x86/kvm/mmu/tdp_mmu.c | 2 +- arch/x86/kvm/vmx/vmx.c | 7 ++ arch/x86/kvm/x86.c | 9 ++ include/linux/kvm_dirty_ring.h | 103 +++++++++++++++++ include/linux/kvm_host.h | 13 +++ include/trace/events/kvm.h | 63 +++++++++++ include/uapi/linux/kvm.h | 53 +++++++++ virt/kvm/dirty_ring.c | 194 ++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 113 ++++++++++++++++++- 14 files changed, 662 insertions(+), 3 deletions(-) create mode 100644 include/linux/kvm_dirty_ring.h create mode 100644 virt/kvm/dirty_ring.c diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 81d54fe76a2d5..e264ebc35e27a 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -262,6 +262,18 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared memory region. This ioctl returns the size of that region. See the KVM_RUN documentation for details. +Besides the size of the KVM_RUN communication region, other areas of +the VCPU file descriptor can be mmap-ed, including: + +- if KVM_CAP_COALESCED_MMIO is available, a page at + KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE; for historical reasons, + this page is included in the result of KVM_GET_VCPU_MMAP_SIZE. + KVM_CAP_COALESCED_MMIO is not documented yet. + +- if KVM_CAP_DIRTY_LOG_RING is available, a number of pages at + KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE. For more information on + KVM_CAP_DIRTY_LOG_RING, see section 8.3. + 4.6 KVM_SET_MEMORY_REGION ------------------------- @@ -6396,3 +6408,84 @@ When enabled, KVM will disable paravirtual features provided to the guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf (0x40000001). Otherwise, a guest may use the paravirtual features regardless of what has actually been exposed through the CPUID leaf. + + +8.29 KVM_CAP_DIRTY_LOG_RING +--------------------------- + +:Architectures: x86 +:Parameters: args[0] - size of the dirty log ring + +KVM is capable of tracking dirty memory using ring buffers that are +mmaped into userspace; there is one dirty ring per vcpu. + +The dirty ring is available to userspace as an array of +``struct kvm_dirty_gfn``. Each dirty entry it's defined as:: + + struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; /* as_id | slot_id */ + __u64 offset; + }; + +The following values are defined for the flags field to define the +current state of the entry:: + + #define KVM_DIRTY_GFN_F_DIRTY BIT(0) + #define KVM_DIRTY_GFN_F_RESET BIT(1) + #define KVM_DIRTY_GFN_F_MASK 0x3 + +Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM +ioctl to enable this capability for the new guest and set the size of +the rings. Enabling the capability is only allowed before creating any +vCPU, and the size of the ring must be a power of two. The larger the +ring buffer, the less likely the ring is full and the VM is forced to +exit to userspace. The optimal size depends on the workload, but it is +recommended that it be at least 64 KiB (4096 entries). + +Just like for dirty page bitmaps, the buffer tracks writes to +all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was +set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered +with the flag set, userspace can start harvesting dirty pages from the +ring buffer. + +An entry in the ring buffer can be unused (flag bits ``00``), +dirty (flag bits ``01``) or harvested (flag bits ``1X``). The +state machine for the entry is as follows:: + + dirtied harvested reset + 00 -----------> 01 -------------> 1X -------+ + ^ | + | | + +------------------------------------------+ + +To harvest the dirty pages, userspace accesses the mmaped ring buffer +to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage +the RESET bit must be cleared), then it means this GFN is a dirty GFN. +The userspace should harvest this GFN and mark the flags from state +``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set +to show that this GFN is harvested and waiting for a reset), and move +on to the next GFN. The userspace should continue to do this until the +flags of a GFN have the DIRTY bit cleared, meaning that it has harvested +all the dirty GFNs that were available. + +It's not necessary for userspace to harvest the all dirty GFNs at once. +However it must collect the dirty GFNs in sequence, i.e., the userspace +program cannot skip one dirty GFN to collect the one next to it. + +After processing one or more entries in the ring buffer, userspace +calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about +it, so that the kernel will reprotect those collected GFNs. +Therefore, the ioctl must be called *before* reading the content of +the dirty pages. + +The dirty ring can get full. When it happens, the KVM_RUN of the +vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL. + +The dirty ring interface has a major difference comparing to the +KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from +userspace, it's still possible that the kernel has not yet flushed the +processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the +flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one +needs to kick the vcpu out of KVM_RUN using a signal. The resulting +vmexit ensures that all dirty GFNs are flushed to the dirty rings. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 69e94aa716e94..f002cdb13a0b1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1232,6 +1232,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); + int (*cpu_dirty_log_size)(void); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; @@ -1744,4 +1745,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define GET_SMSTATE(type, buf, offset) \ (*(type *)((buf) + (offset) - 0x7e00)) +int kvm_cpu_dirty_log_size(void); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba86..8e76d3701db3f 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -12,6 +12,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define DE_VECTOR 0 #define DB_VECTOR 1 diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b804444e16d47..4bd14ab013232 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -10,7 +10,8 @@ endif KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \ + $(KVM)/dirty_ring.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5bb1939b65d8d..12e5cfe0995e0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1289,6 +1289,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } +int kvm_cpu_dirty_log_size(void) +{ + if (kvm_x86_ops.cpu_dirty_log_size) + return kvm_x86_ops.cpu_dirty_log_size(); + + return 0; +} + bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn) { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index ff28a5c6abd63..cffa51c6049ee 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -185,7 +185,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, if ((!is_writable_pte(old_spte) || pfn_changed) && is_writable_pte(new_spte)) { slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); - mark_page_dirty_in_slot(slot, gfn); + mark_page_dirty_in_slot(kvm, slot, gfn); } } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 46b32aa43811d..2b6d538454a61 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7583,6 +7583,11 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit) return supported & BIT(bit); } +static int vmx_cpu_dirty_log_size(void) +{ + return enable_pml ? PML_ENTITY_NUM : 0; +} + static struct kvm_x86_ops vmx_x86_ops __initdata = { .hardware_unsetup = hardware_unsetup, @@ -7712,6 +7717,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .migrate_timers = vmx_migrate_timers, .msr_filter_changed = vmx_msr_filter_changed, + .cpu_dirty_log_size = vmx_cpu_dirty_log_size, }; static __init int hardware_setup(void) @@ -7829,6 +7835,7 @@ static __init int hardware_setup(void) vmx_x86_ops.slot_disable_log_dirty = NULL; vmx_x86_ops.flush_log_dirty = NULL; vmx_x86_ops.enable_log_dirty_pt_masked = NULL; + vmx_x86_ops.cpu_dirty_log_size = NULL; } if (!cpu_has_vmx_preemption_timer()) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b4ac726526f8a..6c704a597b7ca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8754,6 +8754,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; + /* Forbid vmenter if vcpu dirty ring is soft-full */ + if (unlikely(vcpu->kvm->dirty_ring_size && + kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) { + vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; + trace_kvm_dirty_ring_exit(vcpu); + r = 0; + goto out; + } + if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h new file mode 100644 index 0000000000000..120e5e90fa1d6 --- /dev/null +++ b/include/linux/kvm_dirty_ring.h @@ -0,0 +1,103 @@ +#ifndef KVM_DIRTY_RING_H +#define KVM_DIRTY_RING_H + +#include + +/** + * kvm_dirty_ring: KVM internal dirty ring structure + * + * @dirty_index: free running counter that points to the next slot in + * dirty_ring->dirty_gfns, where a new dirty page should go + * @reset_index: free running counter that points to the next dirty page + * in dirty_ring->dirty_gfns for which dirty trap needs to + * be reenabled + * @size: size of the compact list, dirty_ring->dirty_gfns + * @soft_limit: when the number of dirty pages in the list reaches this + * limit, vcpu that owns this ring should exit to userspace + * to allow userspace to harvest all the dirty pages + * @dirty_gfns: the array to keep the dirty gfns + * @index: index of this dirty ring + */ +struct kvm_dirty_ring { + u32 dirty_index; + u32 reset_index; + u32 size; + u32 soft_limit; + struct kvm_dirty_gfn *dirty_gfns; + int index; +}; + +#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0) +/* + * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should + * not be included as well, so define these nop functions for the arch. + */ +static inline u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return 0; +} + +static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, + int index, u32 size) +{ + return 0; +} + +static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + return NULL; +} + +static inline int kvm_dirty_ring_reset(struct kvm *kvm, + struct kvm_dirty_ring *ring) +{ + return 0; +} + +static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, + u32 slot, u64 offset) +{ +} + +static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, + u32 offset) +{ + return NULL; +} + +static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ +} + +static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return true; +} + +#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +u32 kvm_dirty_ring_get_rsvd_entries(void); +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); + +/* + * called with kvm->slots_lock held, returns the number of + * processed pages. + */ +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); + +/* + * returns =0: successfully pushed + * <0: unable to push, need to wait + */ +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset); + +/* for use in vm_operations_struct */ +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); + +#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +#endif /* KVM_DIRTY_RING_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca7c1459a8e30..864b156391c88 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,7 @@ #include #include +#include #ifndef KVM_MAX_VCPU_ID #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS @@ -319,6 +320,7 @@ struct kvm_vcpu { bool preempted; bool ready; struct kvm_vcpu_arch arch; + struct kvm_dirty_ring dirty_ring; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) @@ -505,6 +507,7 @@ struct kvm { struct srcu_struct irq_srcu; pid_t userspace_pid; unsigned int max_halt_poll_ns; + u32 dirty_ring_size; }; #define kvm_err(fmt, ...) \ @@ -1477,4 +1480,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +/* + * This defines how many reserved entries we want to keep before we + * kick the vcpu to the userspace to avoid dirty ring full. This + * value can be tuned to higher if e.g. PML is enabled on the host. + */ +#define KVM_DIRTY_RING_RSVD_ENTRIES 64 + +/* Max number of entries allowed for each kvm dirty ring */ +#define KVM_DIRTY_RING_MAX_ENTRIES 65536 + #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 26cfb0fa8e7ee..49d7d0fe29f6f 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -399,6 +399,69 @@ TRACE_EVENT(kvm_halt_poll_ns, #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \ trace_kvm_halt_poll_ns(false, vcpu_id, new, old) +TRACE_EVENT(kvm_dirty_ring_push, + TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset), + TP_ARGS(ring, slot, offset), + + TP_STRUCT__entry( + __field(int, index) + __field(u32, dirty_index) + __field(u32, reset_index) + __field(u32, slot) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->index = ring->index; + __entry->dirty_index = ring->dirty_index; + __entry->reset_index = ring->reset_index; + __entry->slot = slot; + __entry->offset = offset; + ), + + TP_printk("ring %d: dirty 0x%x reset 0x%x " + "slot %u offset 0x%llx (used %u)", + __entry->index, __entry->dirty_index, + __entry->reset_index, __entry->slot, __entry->offset, + __entry->dirty_index - __entry->reset_index) +); + +TRACE_EVENT(kvm_dirty_ring_reset, + TP_PROTO(struct kvm_dirty_ring *ring), + TP_ARGS(ring), + + TP_STRUCT__entry( + __field(int, index) + __field(u32, dirty_index) + __field(u32, reset_index) + ), + + TP_fast_assign( + __entry->index = ring->index; + __entry->dirty_index = ring->dirty_index; + __entry->reset_index = ring->reset_index; + ), + + TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)", + __entry->index, __entry->dirty_index, __entry->reset_index, + __entry->dirty_index - __entry->reset_index) +); + +TRACE_EVENT(kvm_dirty_ring_exit, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + ), + + TP_printk("vcpu %d", __entry->vcpu_id) +); + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 204afbe1240e3..886802b8ffba3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1054,6 +1055,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 #ifdef KVM_CAP_IRQ_ROUTING @@ -1558,6 +1560,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1711,4 +1716,52 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c new file mode 100644 index 0000000000000..9d01299563ee6 --- /dev/null +++ b/virt/kvm/dirty_ring.c @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * KVM dirty ring implementation + * + * Copyright 2019 Red Hat, Inc. + */ +#include +#include +#include +#include +#include + +int __weak kvm_cpu_dirty_log_size(void) +{ + return 0; +} + +u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); +} + +static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) +{ + return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); +} + +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return kvm_dirty_ring_used(ring) >= ring->soft_limit; +} + +static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring) +{ + return kvm_dirty_ring_used(ring) >= ring->size; +} + +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + + WARN_ON_ONCE(vcpu->kvm != kvm); + + return &vcpu->dirty_ring; +} + +static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) +{ + struct kvm_memory_slot *memslot; + int as_id, id; + + as_id = slot >> 16; + id = (u16)slot; + + if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) + return; + + memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id); + + if (!memslot || (offset + __fls(mask)) >= memslot->npages) + return; + + spin_lock(&kvm->mmu_lock); + kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); + spin_unlock(&kvm->mmu_lock); +} + +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) +{ + ring->dirty_gfns = vmalloc(size); + if (!ring->dirty_gfns) + return -ENOMEM; + memset(ring->dirty_gfns, 0, size); + + ring->size = size / sizeof(struct kvm_dirty_gfn); + ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries(); + ring->dirty_index = 0; + ring->reset_index = 0; + ring->index = index; + + return 0; +} + +static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = 0; +} + +static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = KVM_DIRTY_GFN_F_DIRTY; +} + +static inline bool kvm_dirty_gfn_invalid(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags == 0; +} + +static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags & KVM_DIRTY_GFN_F_RESET; +} + +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) +{ + u32 cur_slot, next_slot; + u64 cur_offset, next_offset; + unsigned long mask; + int count = 0; + struct kvm_dirty_gfn *entry; + bool first_round = true; + + /* This is only needed to make compilers happy */ + cur_slot = cur_offset = mask = 0; + + while (true) { + entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; + + if (!kvm_dirty_gfn_harvested(entry)) + break; + + next_slot = READ_ONCE(entry->slot); + next_offset = READ_ONCE(entry->offset); + + /* Update the flags to reflect that this GFN is reset */ + kvm_dirty_gfn_set_invalid(entry); + + ring->reset_index++; + count++; + /* + * Try to coalesce the reset operations when the guest is + * scanning pages in the same slot. + */ + if (!first_round && next_slot == cur_slot) { + s64 delta = next_offset - cur_offset; + + if (delta >= 0 && delta < BITS_PER_LONG) { + mask |= 1ull << delta; + continue; + } + + /* Backwards visit, careful about overflows! */ + if (delta > -BITS_PER_LONG && delta < 0 && + (mask << -delta >> -delta) == mask) { + cur_offset = next_offset; + mask = (mask << -delta) | 1; + continue; + } + } + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); + cur_slot = next_slot; + cur_offset = next_offset; + mask = 1; + first_round = false; + } + + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); + + trace_kvm_dirty_ring_reset(ring); + + return count; +} + +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) +{ + struct kvm_dirty_gfn *entry; + + /* It should never get full */ + WARN_ON_ONCE(kvm_dirty_ring_full(ring)); + + entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)]; + + entry->slot = slot; + entry->offset = offset; + /* + * Make sure the data is filled in before we publish this to + * the userspace program. There's no paired kernel-side reader. + */ + smp_wmb(); + kvm_dirty_gfn_set_dirtied(entry); + ring->dirty_index++; + trace_kvm_dirty_ring_push(ring, slot, offset); +} + +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset) +{ + return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE); +} + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ + vfree(ring->dirty_gfns); + ring->dirty_gfns = NULL; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68598fdba2268..78ef414512bff 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -63,6 +63,8 @@ #define CREATE_TRACE_POINTS #include +#include + /* Worst case buffer size needed for holding an integer. */ #define ITOA_MAX_LEN 12 @@ -415,6 +417,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { + kvm_dirty_ring_free(&vcpu->dirty_ring); kvm_arch_vcpu_destroy(vcpu); /* @@ -2644,8 +2647,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm, { if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; + u32 slot = (memslot->as_id << 16) | memslot->id; - set_bit_le(rel_gfn, memslot->dirty_bitmap); + if (kvm->dirty_ring_size) + kvm_dirty_ring_push(kvm_dirty_ring_get(kvm), + slot, rel_gfn); + else + set_bit_le(rel_gfn, memslot->dirty_bitmap); } } EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot); @@ -3005,6 +3013,17 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); +static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff) +{ +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0 + return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) && + (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET + + kvm->dirty_ring_size / PAGE_SIZE); +#else + return false; +#endif +} + static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) { struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; @@ -3020,6 +3039,10 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif + else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) + page = kvm_dirty_ring_get_page( + &vcpu->dirty_ring, + vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); else return kvm_arch_vcpu_fault(vcpu, vmf); get_page(page); @@ -3033,6 +3056,14 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = { static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) { + struct kvm_vcpu *vcpu = file->private_data; + unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || + kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && + ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) + return -EINVAL; + vma->vm_ops = &kvm_vcpu_vm_ops; return 0; } @@ -3126,6 +3157,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto vcpu_free_run_page; + if (kvm->dirty_ring_size) { + r = kvm_dirty_ring_alloc(&vcpu->dirty_ring, + id, kvm->dirty_ring_size); + if (r) + goto arch_vcpu_destroy; + } + mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; @@ -3159,6 +3197,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) unlock_vcpu_destroy: mutex_unlock(&kvm->lock); + kvm_dirty_ring_free(&vcpu->dirty_ring); +arch_vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); vcpu_free_run_page: free_page((unsigned long)vcpu->run); @@ -3631,12 +3671,78 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif case KVM_CAP_NR_MEMSLOTS: return KVM_USER_MEM_SLOTS; + case KVM_CAP_DIRTY_LOG_RING: +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0 + return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); +#else + return 0; +#endif default: break; } return kvm_vm_ioctl_check_extension(kvm, arg); } +static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size) +{ + int r; + + if (!KVM_DIRTY_LOG_PAGE_OFFSET) + return -EINVAL; + + /* the size should be power of 2 */ + if (!size || (size & (size - 1))) + return -EINVAL; + + /* Should be bigger to keep the reserved entries, or a page */ + if (size < kvm_dirty_ring_get_rsvd_entries() * + sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE) + return -EINVAL; + + if (size > KVM_DIRTY_RING_MAX_ENTRIES * + sizeof(struct kvm_dirty_gfn)) + return -E2BIG; + + /* We only allow it to set once */ + if (kvm->dirty_ring_size) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (kvm->created_vcpus) { + /* We don't allow to change this value after vcpu created */ + r = -EINVAL; + } else { + kvm->dirty_ring_size = size; + r = 0; + } + + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + int cleared = 0; + + if (!kvm->dirty_ring_size) + return -EINVAL; + + mutex_lock(&kvm->slots_lock); + + kvm_for_each_vcpu(i, vcpu, kvm) + cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); + + mutex_unlock(&kvm->slots_lock); + + if (cleared) + kvm_flush_remote_tlbs(kvm); + + return cleared; +} + int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -3667,6 +3773,8 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, kvm->max_halt_poll_ns = cap->args[0]; return 0; } + case KVM_CAP_DIRTY_LOG_RING: + return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); default: return kvm_vm_ioctl_enable_cap(kvm, cap); } @@ -3851,6 +3959,9 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CHECK_EXTENSION: r = kvm_vm_ioctl_check_extension_generic(kvm, arg); break; + case KVM_RESET_DIRTY_RINGS: + r = kvm_vm_ioctl_reset_dirty_pages(kvm); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } -- GitLab From b2cc64c4f3829c25b618f23f472a493668d9cb80 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:24 -0400 Subject: [PATCH 0182/1894] KVM: Make dirty ring exclusive to dirty bitmap log There's no good reason to use both the dirty bitmap logging and the new dirty ring buffer to track dirty bits. We should be able to even support both of them at the same time, but it could complicate things which could actually help little. Let's simply make it the rule before we enable dirty ring on any arch, that we don't allow these two interfaces to be used together. The big world switch would be KVM_CAP_DIRTY_LOG_RING capability enablement. That's where we'll switch from the default dirty logging way to the dirty ring way. As long as kvm->dirty_ring_size is setup correctly, we'll once and for all switch to the dirty ring buffer mode for the current virtual machine. Signed-off-by: Peter Xu Message-Id: <20201001012224.5818-1-peterx@redhat.com> [Change errno from EINVAL to ENXIO. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 7 +++++++ virt/kvm/kvm_main.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e264ebc35e27a..70254eaa5229f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6489,3 +6489,10 @@ processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one needs to kick the vcpu out of KVM_RUN using a signal. The resulting vmexit ensures that all dirty GFNs are flushed to the dirty rings. + +NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding +ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls +KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling +KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual +machine will switch to ring-buffer dirty page tracking and further +KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 78ef414512bff..110aa5cc0c934 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1426,6 +1426,10 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, unsigned long n; unsigned long any = 0; + /* Dirty ring tracking is exclusive to dirty log tracking */ + if (kvm->dirty_ring_size) + return -ENXIO; + *memslot = NULL; *is_dirty = 0; @@ -1487,6 +1491,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) unsigned long *dirty_bitmap_buffer; bool flush; + /* Dirty ring tracking is exclusive to dirty log tracking */ + if (kvm->dirty_ring_size) + return -ENXIO; + as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) @@ -1595,6 +1603,10 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, unsigned long *dirty_bitmap_buffer; bool flush; + /* Dirty ring tracking is exclusive to dirty log tracking */ + if (kvm->dirty_ring_size) + return -ENXIO; + as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) -- GitLab From 044c59c409b7fd753707dc437890e94d2b0bd819 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:26 -0400 Subject: [PATCH 0183/1894] KVM: Don't allocate dirty bitmap if dirty ring is enabled Because kvm dirty rings and kvm dirty log is used in an exclusive way, Let's avoid creating the dirty_bitmap when kvm dirty ring is enabled. At the meantime, since the dirty_bitmap will be conditionally created now, we can't use it as a sign of "whether this memory slot enabled dirty tracking". Change users like that to check against the kvm memory slot flags. Note that there still can be chances where the kvm memory slot got its dirty_bitmap allocated, _if_ the memory slots are created before enabling of the dirty rings and at the same time with the dirty tracking capability enabled, they'll still with the dirty_bitmap. However it should not hurt much (e.g., the bitmaps will always be freed if they are there), and the real users normally won't trigger this because dirty bit tracking flag should in most cases only be applied to kvm slots only before migration starts, that should be far latter than kvm initializes (VM starts). Signed-off-by: Peter Xu Message-Id: <20201001012226.5868-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- include/linux/kvm_host.h | 5 +++++ virt/kvm/kvm_main.c | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 12e5cfe0995e0..5dfe0ede0e812 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -820,7 +820,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return NULL; - if (no_dirty_log && slot->dirty_bitmap) + if (no_dirty_log && kvm_slot_dirty_track_enabled(slot)) return NULL; return slot; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 864b156391c88..f3b1013fb22cf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -351,6 +351,11 @@ struct kvm_memory_slot { u16 as_id; }; +static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) +{ + return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; +} + static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) { return ALIGN(memslot->npages, BITS_PER_LONG) / 8; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 110aa5cc0c934..3abcb2ce5b7db 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1365,7 +1365,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* Allocate/free page dirty bitmap as needed */ if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) new.dirty_bitmap = NULL; - else if (!new.dirty_bitmap) { + else if (!new.dirty_bitmap && !kvm->dirty_ring_size) { r = kvm_alloc_dirty_bitmap(&new); if (r) return r; @@ -2657,7 +2657,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn) { - if (memslot && memslot->dirty_bitmap) { + if (memslot && kvm_slot_dirty_track_enabled(memslot)) { unsigned long rel_gfn = gfn - memslot->base_gfn; u32 slot = (memslot->as_id << 16) | memslot->id; -- GitLab From 60f644fb519831edff38c79755f7970c475e2ece Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:35 -0400 Subject: [PATCH 0184/1894] KVM: selftests: Introduce after_vcpu_run hook for dirty log test Provide a hook for the checks after vcpu_run() completes. Preparation for the dirty ring test because we'll need to take care of another exit reason. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012235.6063-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 36 +++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 54da9cc20db48..9215b26af10c5 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -178,6 +178,15 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); } +static void default_after_vcpu_run(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); +} + struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -187,16 +196,20 @@ struct log_mode { /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, void *bitmap, uint32_t num_pages); + /* Hook to call when after each vcpu run */ + void (*after_vcpu_run)(struct kvm_vm *vm); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", .collect_dirty_pages = dirty_log_collect_dirty_pages, + .after_vcpu_run = default_after_vcpu_run, }, { .name = "clear-log", .supported = clear_log_supported, .create_vm_done = clear_log_create_vm_done, .collect_dirty_pages = clear_log_collect_dirty_pages, + .after_vcpu_run = default_after_vcpu_run, }, }; @@ -247,6 +260,14 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, mode->collect_dirty_pages(vm, slot, bitmap, num_pages); } +static void log_mode_after_vcpu_run(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->after_vcpu_run) + mode->after_vcpu_run(vm); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -261,25 +282,16 @@ static void *vcpu_worker(void *data) struct kvm_vm *vm = data; uint64_t *guest_array; uint64_t pages_count = 0; - struct kvm_run *run; - - run = vcpu_state(vm, VCPU_ID); guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); while (!READ_ONCE(host_quit)) { + generate_random_array(guest_array, TEST_PAGES_PER_LOOP); + pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ ret = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { - pages_count += TEST_PAGES_PER_LOOP; - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - } else { - TEST_FAIL("Invalid guest sync status: " - "exit_reason=%s\n", - exit_reason_str(run->exit_reason)); - } + log_mode_after_vcpu_run(vm); } pr_info("Dirtied %"PRIu64" pages\n", pages_count); -- GitLab From 84292e565951cecfe2718e43905a6103c9e8ac29 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:37 -0400 Subject: [PATCH 0185/1894] KVM: selftests: Add dirty ring buffer test Add the initial dirty ring buffer test. The current test implements the userspace dirty ring collection, by only reaping the dirty ring when the ring is full. So it's still running synchronously like this: vcpu main thread 1. vcpu dirties pages 2. vcpu gets dirty ring full (userspace exit) 3. main thread waits until full (so hardware buffers flushed) 4. main thread collects 5. main thread continues vcpu 6. vcpu continues, goes back to 1 We can't directly collects dirty bits during vcpu execution because otherwise we can't guarantee the hardware dirty bits were flushed when we collect and we're very strict on the dirty bits so otherwise we can fail the future verify procedure. A follow up patch will make this test to support async just like the existing dirty log test, by adding a vcpu kick mechanism. Signed-off-by: Peter Xu Message-Id: <20201001012237.6111-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 254 +++++++++++++++++- .../testing/selftests/kvm/include/kvm_util.h | 3 + tools/testing/selftests/kvm/lib/kvm_util.c | 72 ++++- .../selftests/kvm/lib/kvm_util_internal.h | 4 + 4 files changed, 320 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 9215b26af10c5..a7da4d9471e1a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -12,8 +12,13 @@ #include #include #include +#include +#include +#include +#include #include #include +#include #include "test_util.h" #include "kvm_util.h" @@ -57,6 +62,8 @@ # define test_and_clear_bit_le test_and_clear_bit #endif +#define TEST_DIRTY_RING_COUNT 1024 + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -128,6 +135,25 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +/* Whether dirty ring reset is requested, or finished */ +static sem_t dirty_ring_vcpu_stop; +static sem_t dirty_ring_vcpu_cont; +/* + * This is only used for verifying the dirty pages. Dirty ring has a very + * tricky case when the ring just got full, kvm will do userspace exit due to + * ring full. When that happens, the very last PFN is set but actually the + * data is not changed (the guest WRITE is not really applied yet), because + * we found that the dirty ring is full, refused to continue the vcpu, and + * recorded the dirty gfn with the old contents. + * + * For this specific case, it's safe to skip checking this pfn for this + * bit, because it's a redundant bit, and when the write happens later the bit + * will be set again. We use this variable to always keep track of the latest + * dirty gfn we've collected, so that if a mismatch of data found later in the + * verifying process, we let it pass. + */ +static uint64_t dirty_ring_last_page; + enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ LOG_MODE_DIRTY_LOG = 0, @@ -135,6 +161,9 @@ enum log_mode_t { /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ LOG_MODE_CLEAR_LOG = 1, + /* Use dirty ring for logging */ + LOG_MODE_DIRTY_RING = 2, + LOG_MODE_NUM, /* Run all supported modes */ @@ -145,6 +174,20 @@ enum log_mode_t { static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ static enum log_mode_t host_log_mode; +static pthread_t vcpu_thread; + +/* + * In our test we do signal tricks, let's use a better version of + * sem_wait to avoid signal interrupts + */ +static void sem_wait_until(sem_t *sem) +{ + int ret; + + do + ret = sem_wait(sem); + while (ret == -1 && errno == EINTR); +} static bool clear_log_supported(void) { @@ -178,15 +221,131 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); } -static void default_after_vcpu_run(struct kvm_vm *vm) +static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); + TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), + "vcpu run failed: errno=%d", err); + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); } +static bool dirty_ring_supported(void) +{ + return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING); +} + +static void dirty_ring_create_vm_done(struct kvm_vm *vm) +{ + /* + * Switch to dirty ring mode after VM creation but before any + * of the vcpu creation. + */ + vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT * + sizeof(struct kvm_dirty_gfn)); +} + +static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags == KVM_DIRTY_GFN_F_DIRTY; +} + +static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = KVM_DIRTY_GFN_F_RESET; +} + +static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, + int slot, void *bitmap, + uint32_t num_pages, uint32_t *fetch_index) +{ + struct kvm_dirty_gfn *cur; + uint32_t count = 0; + + while (true) { + cur = &dirty_gfns[*fetch_index % TEST_DIRTY_RING_COUNT]; + if (!dirty_gfn_is_dirtied(cur)) + break; + TEST_ASSERT(cur->slot == slot, "Slot number didn't match: " + "%u != %u", cur->slot, slot); + TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " + "0x%llx >= 0x%x", cur->offset, num_pages); + //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); + set_bit_le(cur->offset, bitmap); + dirty_ring_last_page = cur->offset; + dirty_gfn_set_collected(cur); + (*fetch_index)++; + count++; + } + + return count; +} + +static void dirty_ring_wait_vcpu(void) +{ + sem_wait_until(&dirty_ring_vcpu_stop); +} + +static void dirty_ring_continue_vcpu(void) +{ + pr_info("Notifying vcpu to continue\n"); + sem_post(&dirty_ring_vcpu_cont); +} + +static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + /* We only have one vcpu */ + static uint32_t fetch_index = 0; + uint32_t count = 0, cleared; + + dirty_ring_wait_vcpu(); + + /* Only have one vcpu */ + count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID), + slot, bitmap, num_pages, &fetch_index); + + cleared = kvm_vm_reset_dirty_ring(vm); + + /* Cleared pages should be the same as collected */ + TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " + "with collected (%u)", cleared, count); + + dirty_ring_continue_vcpu(); + + pr_info("Iteration %ld collected %u pages\n", iteration, count); +} + +static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + /* A ucall-sync or ring-full event is allowed */ + if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { + /* We should allow this to continue */ + ; + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + /* Update the flag first before pause */ + sem_post(&dirty_ring_vcpu_stop); + pr_info("vcpu stops because dirty ring is full...\n"); + sem_wait_until(&dirty_ring_vcpu_cont); + pr_info("vcpu continues now.\n"); + } else { + TEST_ASSERT(false, "Invalid guest sync status: " + "exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + } +} + +static void dirty_ring_before_vcpu_join(void) +{ + /* Kick another round of vcpu just to make sure it will quit */ + sem_post(&dirty_ring_vcpu_cont); +} + struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -197,7 +356,8 @@ struct log_mode { void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, void *bitmap, uint32_t num_pages); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vm *vm); + void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err); + void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -211,6 +371,14 @@ struct log_mode { .collect_dirty_pages = clear_log_collect_dirty_pages, .after_vcpu_run = default_after_vcpu_run, }, + { + .name = "dirty-ring", + .supported = dirty_ring_supported, + .create_vm_done = dirty_ring_create_vm_done, + .collect_dirty_pages = dirty_ring_collect_dirty_pages, + .before_vcpu_join = dirty_ring_before_vcpu_join, + .after_vcpu_run = dirty_ring_after_vcpu_run, + }, }; /* @@ -260,12 +428,20 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, mode->collect_dirty_pages(vm, slot, bitmap, num_pages); } -static void log_mode_after_vcpu_run(struct kvm_vm *vm) +static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vm); + mode->after_vcpu_run(vm, ret, err); +} + +static void log_mode_before_vcpu_join(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->before_vcpu_join) + mode->before_vcpu_join(); } static void generate_random_array(uint64_t *guest_array, uint64_t size) @@ -278,20 +454,22 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size) static void *vcpu_worker(void *data) { - int ret; + int ret, vcpu_fd; struct kvm_vm *vm = data; uint64_t *guest_array; uint64_t pages_count = 0; + vcpu_fd = vcpu_get_fd(vm, VCPU_ID); + guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); while (!READ_ONCE(host_quit)) { + /* Clear any existing kick signals */ generate_random_array(guest_array, TEST_PAGES_PER_LOOP); pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - log_mode_after_vcpu_run(vm); + ret = ioctl(vcpu_fd, KVM_RUN, NULL); + log_mode_after_vcpu_run(vm, ret, errno); } pr_info("Dirtied %"PRIu64" pages\n", pages_count); @@ -304,6 +482,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) uint64_t step = vm_num_host_pages(mode, 1); uint64_t page; uint64_t *value_ptr; + uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { value_ptr = host_test_mem + page * host_page_size; @@ -318,14 +497,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) } if (test_and_clear_bit_le(page, bmap)) { + bool matched; + host_dirty_count++; + /* * If the bit is set, the value written onto * the corresponding page should be either the * previous iteration number or the current one. */ - TEST_ASSERT(*value_ptr == iteration || - *value_ptr == iteration - 1, + matched = (*value_ptr == iteration || + *value_ptr == iteration - 1); + + if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { + if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { + /* + * Short answer: this case is special + * only for dirty ring test where the + * page is the last page before a kvm + * dirty ring full in iteration N-2. + * + * Long answer: Assuming ring size R, + * one possible condition is: + * + * main thr vcpu thr + * -------- -------- + * iter=1 + * write 1 to page 0~(R-1) + * full, vmexit + * collect 0~(R-1) + * kick vcpu + * write 1 to (R-1)~(2R-2) + * full, vmexit + * iter=2 + * collect (R-1)~(2R-2) + * kick vcpu + * write 1 to (2R-2) + * (NOTE!!! "1" cached in cpu reg) + * write 2 to (2R-1)~(3R-3) + * full, vmexit + * iter=3 + * collect (2R-2)~(3R-3) + * (here if we read value on page + * "2R-2" is 1, while iter=3!!!) + * + * This however can only happen once per iteration. + */ + min_iter = iteration - 1; + continue; + } else if (page == dirty_ring_last_page) { + /* + * Please refer to comments in + * dirty_ring_last_page. + */ + continue; + } + } + + TEST_ASSERT(matched, "Set page %"PRIu64" value %"PRIu64 " incorrect (iteration=%"PRIu64")", page, *value_ptr, iteration); @@ -390,7 +619,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - pthread_t vcpu_thread; struct kvm_vm *vm; unsigned long *bmap; @@ -488,6 +716,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Tell the vcpu thread to quit */ host_quit = true; + log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " @@ -548,6 +777,9 @@ int main(int argc, char *argv[]) unsigned int mode; int opt, i, j; + sem_init(&dirty_ring_vcpu_stop, 0, 0); + sem_init(&dirty_ring_vcpu_cont, 0, 0); + #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); #endif diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index b2c1364c0402f..710009cc17fd6 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -74,6 +74,7 @@ void kvm_vm_release(struct kvm_vm *vmp); void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, uint64_t first_page, uint32_t num_pages); +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm); int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); @@ -148,6 +149,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); @@ -201,6 +203,7 @@ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state, bool ignore_error); #endif +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); const char *exit_reason_str(unsigned int exit_reason); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index ef2114e1b7ad7..a04302666b025 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -114,6 +114,16 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, return r; } +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) +{ + struct kvm_enable_cap cap = { 0 }; + + cap.cap = KVM_CAP_DIRTY_LOG_RING; + cap.args[0] = ring_size; + vm_enable_cap(vm, &cap); + vm->dirty_ring_size = ring_size; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); @@ -328,6 +338,11 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, __func__, strerror(-ret)); } +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +{ + return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS); +} + /* * Userspace Memory Region Find * @@ -432,10 +447,17 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) * * Removes a vCPU from a VM and frees its resources. */ -static void vm_vcpu_rm(struct vcpu *vcpu) +static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) { int ret; + if (vcpu->dirty_gfns) { + ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); + TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, " + "rc: %i errno: %i", ret, errno); + vcpu->dirty_gfns = NULL; + } + ret = munmap(vcpu->state, sizeof(*vcpu->state)); TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " "errno: %i", ret, errno); @@ -453,7 +475,7 @@ void kvm_vm_release(struct kvm_vm *vmp) int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) - vm_vcpu_rm(vcpu); + vm_vcpu_rm(vmp, vcpu); ret = close(vmp->fd); TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" @@ -1233,6 +1255,15 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) return rc; } +int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + return vcpu->fd; +} + void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); @@ -1561,6 +1592,42 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, return ret; } +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu; + uint32_t size = vm->dirty_ring_size; + + TEST_ASSERT(size > 0, "Should enable dirty ring first"); + + vcpu = vcpu_find(vm, vcpuid); + + TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid); + + if (!vcpu->dirty_gfns) { + void *addr; + + addr = mmap(NULL, size, PROT_READ, + MAP_PRIVATE, vcpu->fd, + vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); + + addr = mmap(NULL, size, PROT_READ | PROT_EXEC, + MAP_PRIVATE, vcpu->fd, + vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); + + addr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd, + vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + + vcpu->dirty_gfns = addr; + vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); + } + + return vcpu->dirty_gfns; +} + /* * VM Ioctl * @@ -1680,6 +1747,7 @@ static struct exit_reason { {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, {KVM_EXIT_OSI, "OSI"}, {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, + {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, #ifdef KVM_EXIT_MEMORY_NOT_PRESENT {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, #endif diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index f07d383d03a11..34465dc562d8c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -28,6 +28,9 @@ struct vcpu { uint32_t id; int fd; struct kvm_run *state; + struct kvm_dirty_gfn *dirty_gfns; + uint32_t fetch_index; + uint32_t dirty_gfns_count; }; struct kvm_vm { @@ -52,6 +55,7 @@ struct kvm_vm { vm_vaddr_t tss; vm_vaddr_t idt; vm_vaddr_t handlers; + uint32_t dirty_ring_size; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); -- GitLab From 019d321a68ea07efcfcbc308443251644ff3e71c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:39 -0400 Subject: [PATCH 0186/1894] KVM: selftests: Run dirty ring test asynchronously Previously the dirty ring test was working in synchronous way, because only with a vmexit (with that it was the ring full event) we'll know the hardware dirty bits will be flushed to the dirty ring. With this patch we first introduce a vcpu kick mechanism using SIGUSR1, which guarantees a vmexit and also therefore the flushing of hardware dirty bits. Once this is in place, we can keep the vcpu dirty work asynchronous of the whole collection procedure now. Still, we need to be very careful that when reaching the ring buffer soft limit (KVM_EXIT_DIRTY_RING_FULL) we must collect the dirty bits before continuing the vcpu. Further increase the dirty ring size to current maximum to make sure we torture more on the no-ring-full case, which should be the major scenario when the hypervisors like QEMU would like to use this feature. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012239.6159-1-peterx@redhat.com> [Use KVM_SET_SIGNAL_MASK+sigwait instead of a signal handler. - Paolo] Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 64 ++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index a7da4d9471e1a..85c5d07dd243f 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -62,7 +62,9 @@ # define test_and_clear_bit_le test_and_clear_bit #endif -#define TEST_DIRTY_RING_COUNT 1024 +#define TEST_DIRTY_RING_COUNT 65536 + +#define SIG_IPI SIGUSR1 /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or @@ -138,6 +140,12 @@ static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ static sem_t dirty_ring_vcpu_stop; static sem_t dirty_ring_vcpu_cont; +/* + * This is updated by the vcpu thread to tell the host whether it's a + * ring-full event. It should only be read until a sem_wait() of + * dirty_ring_vcpu_stop and before vcpu continues to run. + */ +static bool dirty_ring_vcpu_ring_full; /* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to @@ -176,6 +184,11 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; +static void vcpu_kick(void) +{ + pthread_kill(vcpu_thread, SIG_IPI); +} + /* * In our test we do signal tricks, let's use a better version of * sem_wait to avoid signal interrupts @@ -286,6 +299,8 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, static void dirty_ring_wait_vcpu(void) { + /* This makes sure that hardware PML cache flushed */ + vcpu_kick(); sem_wait_until(&dirty_ring_vcpu_stop); } @@ -301,9 +316,19 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, /* We only have one vcpu */ static uint32_t fetch_index = 0; uint32_t count = 0, cleared; + bool continued_vcpu = false; dirty_ring_wait_vcpu(); + if (!dirty_ring_vcpu_ring_full) { + /* + * This is not a ring-full event, it's safe to allow + * vcpu to continue + */ + dirty_ring_continue_vcpu(); + continued_vcpu = true; + } + /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID), slot, bitmap, num_pages, &fetch_index); @@ -314,7 +339,11 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); - dirty_ring_continue_vcpu(); + if (!continued_vcpu) { + TEST_ASSERT(dirty_ring_vcpu_ring_full, + "Didn't continue vcpu even without ring full"); + dirty_ring_continue_vcpu(); + } pr_info("Iteration %ld collected %u pages\n", iteration, count); } @@ -327,10 +356,15 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { /* We should allow this to continue */ ; - } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || + (ret == -1 && err == EINTR)) { /* Update the flag first before pause */ + WRITE_ONCE(dirty_ring_vcpu_ring_full, + run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); sem_post(&dirty_ring_vcpu_stop); - pr_info("vcpu stops because dirty ring is full...\n"); + pr_info("vcpu stops because %s...\n", + dirty_ring_vcpu_ring_full ? + "dirty ring is full" : "vcpu is kicked out"); sem_wait_until(&dirty_ring_vcpu_cont); pr_info("vcpu continues now.\n"); } else { @@ -458,9 +492,26 @@ static void *vcpu_worker(void *data) struct kvm_vm *vm = data; uint64_t *guest_array; uint64_t pages_count = 0; + struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) + + sizeof(sigset_t)); + sigset_t *sigset = (sigset_t *) &sigmask->sigset; vcpu_fd = vcpu_get_fd(vm, VCPU_ID); + /* + * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the + * ioctl to return with -EINTR, but it is still pending and we need + * to accept it with the sigwait. + */ + sigmask->len = 8; + pthread_sigmask(0, NULL, sigset); + vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask); + sigaddset(sigset, SIG_IPI); + pthread_sigmask(SIG_BLOCK, sigset, NULL); + + sigemptyset(sigset); + sigaddset(sigset, SIG_IPI); + guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); while (!READ_ONCE(host_quit)) { @@ -469,6 +520,11 @@ static void *vcpu_worker(void *data) pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ ret = ioctl(vcpu_fd, KVM_RUN, NULL); + if (ret == -1 && errno == EINTR) { + int sig = -1; + sigwait(sigset, &sig); + assert(sig == SIG_IPI); + } log_mode_after_vcpu_run(vm, ret, errno); } -- GitLab From edd3de6fc3d57deddb5cc7c7f1d8316ad26ac4e4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:41 -0400 Subject: [PATCH 0187/1894] KVM: selftests: Add "-c" parameter to dirty log test It's only used to override the existing dirty ring size/count. If with a bigger ring count, we test async of dirty ring. If with a smaller ring count, we test ring full code path. Async is default. It has no use for non-dirty-ring tests. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012241.6208-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 85c5d07dd243f..b1f8731721b96 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -183,6 +183,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; +static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; static void vcpu_kick(void) { @@ -257,7 +258,7 @@ static void dirty_ring_create_vm_done(struct kvm_vm *vm) * Switch to dirty ring mode after VM creation but before any * of the vcpu creation. */ - vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT * + vm_enable_dirty_ring(vm, test_dirty_ring_count * sizeof(struct kvm_dirty_gfn)); } @@ -279,7 +280,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, uint32_t count = 0; while (true) { - cur = &dirty_gfns[*fetch_index % TEST_DIRTY_RING_COUNT]; + cur = &dirty_gfns[*fetch_index % test_dirty_ring_count]; if (!dirty_gfn_is_dirtied(cur)) break; TEST_ASSERT(cur->slot == slot, "Slot number didn't match: " @@ -803,6 +804,9 @@ static void help(char *name) printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); puts(""); + printf(" -c: specify dirty ring size, in number of entries\n"); + printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", + TEST_DIRTY_RING_COUNT); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", @@ -858,8 +862,11 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { + while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { + case 'c': + test_dirty_ring_count = strtol(optarg, NULL, 10); + break; case 'i': iterations = strtol(optarg, NULL, 10); break; -- GitLab From 8aa426e854c475504033c176a66d038259bf64ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Nov 2020 07:39:26 -0500 Subject: [PATCH 0188/1894] selftests: kvm: keep .gitignore add to date Add tsc_msrs_test, remove clear_dirty_log_test and alphabetize everything. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 7a2c242b7152e..5468db7dd6742 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -14,17 +14,17 @@ /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test -/x86_64/user_msr_test -/x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test +/x86_64/tsc_msrs_test +/x86_64/user_msr_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test +/x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test /x86_64/xss_msr_test -/clear_dirty_log_test /demand_paging_test /dirty_log_test /dirty_log_perf_test -- GitLab From 2259c17f01887666220a35619c44c576aeed2a30 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 29 Oct 2020 10:06:48 -0700 Subject: [PATCH 0189/1894] kvm: x86: Sink cpuid update into vendor-specific set_cr4 functions On emulated VM-entry and VM-exit, update the CPUID bits that reflect CR4.OSXSAVE and CR4.PKE. This fixes a bug where the CPUID bits could continue to reflect L2 CR4 values after emulated VM-exit to L1. It also fixes a related bug where the CPUID bits could continue to reflect L1 CR4 values after emulated VM-entry to L2. The latter bug is mainly relevant to SVM, wherein CPUID is not a required intercept. However, it could also be relevant to VMX, because the code to conditionally update these CPUID bits assumes that the guest CPUID and the guest CR4 are always in sync. Fixes: 8eb3f87d903168 ("KVM: nVMX: fix guest CR4 loading when emulating L2 to L1 exit") Fixes: 2acf923e38fb6a ("KVM: VMX: Enable XSAVE/XRSTOR for guest") Fixes: b9baba86148904 ("KVM, pkeys: expose CPUID/CR4 to guest") Reported-by: Abhiroop Dabral Signed-off-by: Jim Mattson Reviewed-by: Ricardo Koller Reviewed-by: Peter Shier Cc: Haozhong Zhang Cc: Dexuan Cui Cc: Huaitong Han Message-Id: <20201029170648.483210-1-jmattson@google.com> --- arch/x86/kvm/cpuid.c | 1 + arch/x86/kvm/svm/svm.c | 3 +++ arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 8 -------- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f87b5dfbaba4f..5d352cc204cec 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -146,6 +146,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) MSR_IA32_MISC_ENABLE_MWAIT); } } +EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime); static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8c858f80f3992..253809216cc73 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1701,6 +1701,9 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); + + if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + kvm_update_cpuid_runtime(vcpu); } static void svm_set_segment(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2b6d538454a61..c3441e7e5a87b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3113,6 +3113,7 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { + unsigned long old_cr4 = vcpu->arch.cr4; struct vcpu_vmx *vmx = to_vmx(vcpu); /* * Pass through host's Machine Check Enable value to hw_cr4, which @@ -3169,6 +3170,9 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); + + if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + kvm_update_cpuid_runtime(vcpu); } void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c704a597b7ca..a3fdc16cfd6f3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1012,9 +1012,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); - if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - kvm_update_cpuid_runtime(vcpu); - return 0; } EXPORT_SYMBOL_GPL(kvm_set_cr4); @@ -9576,7 +9573,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; - int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; @@ -9611,11 +9607,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; - cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & - (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops.set_cr4(vcpu, sregs->cr4); - if (cpuid_update_needed) - kvm_update_cpuid_runtime(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); if (is_pae_paging(vcpu)) { -- GitLab From f63f0b68c864edea801de678bed279a3d7674f1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Nov 2020 11:36:49 -0500 Subject: [PATCH 0190/1894] KVM: selftests: always use manual clear in dirty_log_perf_test Nothing sets USE_CLEAR_DIRTY_LOG anymore, so anything it surrounds is dead code. However, it is the recommended way to use the dirty page bitmap for new enough kernel, so use it whenever KVM has the KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 capability. Signed-off-by: Paolo Bonzini --- .../selftests/kvm/dirty_log_perf_test.c | 55 ++++++++----------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 85c9b8f731420..9c6a7be31e033 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -27,6 +27,7 @@ #define TEST_HOST_LOOP_N 2UL /* Host variables */ +static u64 dirty_log_manual_caps; static bool host_quit; static uint64_t iteration; static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; @@ -88,10 +89,6 @@ static void *vcpu_worker(void *data) return NULL; } -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, uint64_t phys_offset, int wr_fract) { @@ -106,10 +103,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct timespec get_dirty_log_total = (struct timespec){0}; struct timespec vcpu_dirty_total = (struct timespec){0}; struct timespec avg; -#ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; -#endif vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); @@ -120,11 +115,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_num_pages = vm_num_host_pages(mode, guest_num_pages); bmap = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif + if (dirty_log_manual_caps) { + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); + } vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); @@ -190,17 +185,17 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", iteration, ts_diff.tv_sec, ts_diff.tv_nsec); -#ifdef USE_CLEAR_DIRTY_LOG - clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); + if (dirty_log_manual_caps) { + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); - ts_diff = timespec_diff_now(start); - clear_dirty_log_total = timespec_add(clear_dirty_log_total, - ts_diff); - pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", - iteration, ts_diff.tv_sec, ts_diff.tv_nsec); -#endif + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + } } /* Tell the vcpu thread to quit */ @@ -220,12 +215,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, iterations, get_dirty_log_total.tv_sec, get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); -#ifdef USE_CLEAR_DIRTY_LOG - avg = timespec_div(clear_dirty_log_total, iterations); - pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, clear_dirty_log_total.tv_sec, - clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); -#endif + if (dirty_log_manual_caps) { + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + } free(bmap); free(vcpu_threads); @@ -284,16 +279,10 @@ int main(int argc, char *argv[]) int opt, i; int wr_fract = 1; -#ifdef USE_CLEAR_DIRTY_LOG dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); -#endif #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); -- GitLab From ec2f18bb4783648041498b06d4bff222821efed1 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:29 +0100 Subject: [PATCH 0191/1894] KVM: selftests: Make vm_create_default common The code is almost 100% the same anyway. Just move it to common and add a few arch-specific macros. Reviewed-by: Peter Xu Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-5-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util.h | 23 ++++++++++--- .../selftests/kvm/lib/aarch64/processor.c | 17 ---------- tools/testing/selftests/kvm/lib/kvm_util.c | 26 +++++++++++++++ .../selftests/kvm/lib/s390x/processor.c | 22 ------------- .../selftests/kvm/lib/x86_64/processor.c | 32 ------------------- 5 files changed, 45 insertions(+), 75 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 710009cc17fd6..8154785196cbd 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -45,13 +45,28 @@ enum vm_guest_mode { }; #if defined(__aarch64__) -#define VM_MODE_DEFAULT VM_MODE_P40V48_4K + +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + #elif defined(__x86_64__) -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K -#else -#define VM_MODE_DEFAULT VM_MODE_P52V48_4K + +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__s390x__) + +#define VM_MODE_DEFAULT VM_MODE_P52V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 16) + #endif +#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) +#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) + #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index d6c32c328e9a8..cee92d477dc0c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -5,8 +5,6 @@ * Copyright (C) 2018, Red Hat, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include #include "kvm_util.h" @@ -219,21 +217,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - uint64_t ptrs_per_4k_pte = 512; - uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; - struct kvm_vm *vm; - - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); - - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); - vm_vcpu_add_default(vm, vcpuid, guest_code); - - return vm; -} - void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a04302666b025..70676a3055235 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -5,6 +5,7 @@ * Copyright (C) 2018, Google LLC. */ +#define _GNU_SOURCE /* for program_invocation_name */ #include "test_util.h" #include "kvm_util.h" #include "kvm_util_internal.h" @@ -281,6 +282,31 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + /* The maximum page table size for a memory region will be when the + * smallest pages are used. Considering each page contains x page + * table descriptors, the total extra size for page tables (for extra + * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller + * than N/x*2. + */ + uint64_t extra_pg_pages = (extra_mem_pages / PTES_PER_MIN_PAGE) * 2; + struct kvm_vm *vm; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + /* * VM Restart * diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 7349bb2e1a243..0152f356c0994 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -5,8 +5,6 @@ * Copyright (C) 2019, Red Hat, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include "processor.h" #include "kvm_util.h" #include "../kvm_util_internal.h" @@ -160,26 +158,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) virt_dump_region(stream, vm, indent, vm->pgd); } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - /* - * The additional amount of pages required for the page tables is: - * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ... - * which is definitely smaller than (n / 256) * 2. - */ - uint64_t extra_pg_pages = extra_mem_pages / 256 * 2; - struct kvm_vm *vm; - - vm = vm_create(VM_MODE_DEFAULT, - DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); - - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); - vm_vcpu_add_default(vm, vcpuid, guest_code); - - return vm; -} - void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d10c5c05bdf0b..95e1a757c6294 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -5,8 +5,6 @@ * Copyright (C) 2018, Google LLC. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include "test_util.h" #include "kvm_util.h" #include "../kvm_util_internal.h" @@ -731,36 +729,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm, } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - struct kvm_vm *vm; - /* - * For x86 the maximum page table size for a memory region - * will be when only 4K pages are used. In that case the - * total extra size for page tables (for extra N pages) will - * be: N/512+N/512^2+N/512^3+... which is definitely smaller - * than N/512*2. - */ - uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - - /* Create VM */ - vm = vm_create(VM_MODE_DEFAULT, - DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR); - - /* Setup guest code */ - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); - - /* Setup IRQ Chip */ - vm_create_irqchip(vm); - - /* Add the first vCPU. */ - vm_vcpu_add_default(vm, vcpuid, guest_code); - - return vm; -} - /* * VCPU Get MSR * -- GitLab From 0aa9ec45d42779af711c7a209b5780ff7391b5bd Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:30 +0100 Subject: [PATCH 0192/1894] KVM: selftests: Introduce vm_create_[default_]_with_vcpus Introduce new vm_create variants that also takes a number of vcpus, an amount of per-vcpu pages, and optionally a list of vcpuids. These variants will create default VMs with enough additional pages to cover the vcpu stacks, per-vcpu pages, and pagetable pages for all. The new 'default' variant uses VM_MODE_DEFAULT, whereas the other new variant accepts the mode as a parameter. Reviewed-by: Peter Xu Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-6-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util.h | 10 ++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 35 ++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 8154785196cbd..dfa9d369e8fc6 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -266,6 +266,16 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, void *guest_code); +/* Same as vm_create_default, but can be used for more than one vcpu */ +struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]); + +/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */ +struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t extra_mem_pages, uint32_t num_percpu_pages, + void *guest_code, uint32_t vcpuids[]); + /* * Adds a vCPU with reasonable defaults (e.g. a stack) * diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 70676a3055235..df2035ac54a63 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -282,8 +282,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) +struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t extra_mem_pages, uint32_t num_percpu_pages, + void *guest_code, uint32_t vcpuids[]) { /* The maximum page table size for a memory region will be when the * smallest pages are used. Considering each page contains x page @@ -291,10 +292,18 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller * than N/x*2. */ - uint64_t extra_pg_pages = (extra_mem_pages / PTES_PER_MIN_PAGE) * 2; + uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; + uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; struct kvm_vm *vm; + int i; + + TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), + "nr_vcpus = %d too large for host, max-vcpus = %d", + nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + pages = vm_adjust_num_guest_pages(mode, pages); + vm = vm_create(mode, pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); @@ -302,11 +311,27 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, vm_create_irqchip(vm); #endif - vm_vcpu_add_default(vm, vcpuid, guest_code); + for (i = 0; i < nr_vcpus; ++i) + vm_vcpu_add_default(vm, vcpuids ? vcpuids[i] : i, guest_code); return vm; } +struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]) +{ + return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages, + num_percpu_pages, guest_code, vcpuids); +} + +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code, + (uint32_t []){ vcpuid }); +} + /* * VM Restart * -- GitLab From 87c5f35e5c958278174979f13a9e40d3c9962c0f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:34 +0100 Subject: [PATCH 0193/1894] KVM: selftests: Also build dirty_log_perf_test on AArch64 Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-10-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3d14ef77755e5..4febf4d5ead9a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -70,6 +70,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += dirty_log_perf_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time -- GitLab From 8934c8454064757efd8d3fb0a729db7eb2d0e5f5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:38 +0000 Subject: [PATCH 0194/1894] KVM: arm64: Remove redundant Spectre-v2 code from kvm_map_vector() '__kvm_bp_vect_base' is only used when dealing with the hardened vectors so remove the redundant assignments in kvm_map_vectors(). Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-2-will@kernel.org --- arch/arm64/kvm/arm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e9..b43b637ded144 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1306,11 +1306,6 @@ static int kvm_map_vectors(void) * !SV2 + HEL2 -> allocate one vector slot and use exec mapping * SV2 + HEL2 -> use hardened vectors and use exec mapping */ - if (cpus_have_const_cap(ARM64_SPECTRE_V2)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); unsigned long size = __BP_HARDEN_HYP_VECS_SZ; -- GitLab From de5bcdb48498abeb019ae075d139850c52661627 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:39 +0000 Subject: [PATCH 0195/1894] KVM: arm64: Tidy up kvm_map_vector() The bulk of the work in kvm_map_vector() is conditional on the ARM64_HARDEN_EL2_VECTORS capability, so return early if that is not set and make the code a bit easier to read. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-3-will@kernel.org --- arch/arm64/kvm/arm.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b43b637ded144..476bc613d0e65 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1297,6 +1297,8 @@ static unsigned long nvhe_percpu_order(void) static int kvm_map_vectors(void) { + int slot; + /* * SV2 = ARM64_SPECTRE_V2 * HEL2 = ARM64_HARDEN_EL2_VECTORS @@ -1306,22 +1308,20 @@ static int kvm_map_vectors(void) * !SV2 + HEL2 -> allocate one vector slot and use exec mapping * SV2 + HEL2 -> use hardened vectors and use exec mapping */ - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; + if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) + return 0; - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); - } + /* + * Always allocate a spare vector slot, as we don't know yet which CPUs + * have a BP hardening slot that we can reuse. + */ + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); + __kvm_harden_el2_vector_slot = slot; - return 0; + return create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), + __BP_HARDEN_HYP_VECS_SZ, + &__kvm_bp_vect_base); } static void cpu_init_hyp_mode(void) -- GitLab From 042c76a9502bf281befc0ae2793ef1de55b65544 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:40 +0000 Subject: [PATCH 0196/1894] KVM: arm64: Move kvm_get_hyp_vector() out of header file kvm_get_hyp_vector() has only one caller, so move it out of kvm_mmu.h and inline it into a new function, cpu_set_hyp_vector(), for setting the vector. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-4-will@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 43 ----------------------------- arch/arm64/kvm/arm.c | 46 ++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 331394306ccee..23182e7d94134 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -208,52 +208,9 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -/* - * EL2 vectors can be mapped and rerouted in a number of ways, - * depending on the kernel configuration and CPU present: - * - * - If the CPU is affected by Spectre-v2, the hardening sequence is - * placed in one of the vector slots, which is executed before jumping - * to the real vectors. - * - * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot - * containing the hardening sequence is mapped next to the idmap page, - * and executed before jumping to the real vectors. - * - * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an - * empty slot is selected, mapped next to the idmap page, and - * executed before jumping to the real vectors. - * - * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with - * VHE, as we don't have hypervisor-specific mappings. If the system - * is VHE and yet selects this capability, it will be ignored. - */ extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -static inline void *kvm_get_hyp_vector(void) -{ - struct bp_hardening_data *data = arm64_get_bp_hardening_data(); - void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); - int slot = -1; - - if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { - vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); - slot = data->hyp_vectors_slot; - } - - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { - vect = __kvm_bp_vect_base; - if (slot == -1) - slot = __kvm_harden_el2_vector_slot; - } - - if (slot != -1) - vect += slot * SZ_2K; - - return vect; -} - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 476bc613d0e65..c63c0b3c9b17b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1375,13 +1375,55 @@ static void cpu_hyp_reset(void) __hyp_reset_vectors(); } +/* + * EL2 vectors can be mapped and rerouted in a number of ways, + * depending on the kernel configuration and CPU present: + * + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. + * + * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. + * + * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an + * empty slot is selected, mapped next to the idmap page, and + * executed before jumping to the real vectors. + * + * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with + * VHE, as we don't have hypervisor-specific mappings. If the system + * is VHE and yet selects this capability, it will be ignored. + */ +static void cpu_set_hyp_vector(void) +{ + struct bp_hardening_data *data = arm64_get_bp_hardening_data(); + void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + int slot = -1; + + if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { + vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); + slot = data->hyp_vectors_slot; + } + + if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { + vect = __kvm_bp_vect_base; + if (slot == -1) + slot = __kvm_harden_el2_vector_slot; + } + + if (slot != -1) + vect += slot * SZ_2K; + + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vect; +} + static void cpu_hyp_reinit(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); cpu_hyp_reset(); - - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector(); + cpu_set_hyp_vector(); if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); -- GitLab From 07cf8aa922db7747cd6e100d2e3f7ca839c7a419 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:41 +0000 Subject: [PATCH 0197/1894] KVM: arm64: Make BP hardening globals static instead Branch predictor hardening of the hyp vectors is partially driven by a couple of global variables ('__kvm_bp_vect_base' and '__kvm_harden_el2_vector_slot'). However, these are only used within a single compilation unit, so internalise them there instead. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-5-will@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 3 --- arch/arm64/kvm/arm.c | 8 ++++++++ arch/arm64/kvm/va_layout.c | 3 --- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 23182e7d94134..db721be0df62e 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -208,9 +208,6 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -extern void *__kvm_bp_vect_base; -extern int __kvm_harden_el2_vector_slot; - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c63c0b3c9b17b..3262c16f04492 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -51,6 +51,14 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +/* Hypervisor VA of the indirect vector trampoline page */ +static void *__kvm_bp_vect_base; +/* + * Slot in the hyp vector page for use by the indirect vector trampoline + * when mitigation against Spectre-v2 is not required. + */ +static int __kvm_harden_el2_vector_slot; + /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index e0404bcab019a..d1195c288c9f0 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -131,9 +131,6 @@ void __init kvm_update_va_mask(struct alt_instr *alt, } } -void *__kvm_bp_vect_base; -int __kvm_harden_el2_vector_slot; - void kvm_patch_vector_branch(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { -- GitLab From 6279017e807708a07db5edace462713a93625da3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:42 +0000 Subject: [PATCH 0198/1894] KVM: arm64: Move BP hardening helpers into spectre.h The BP hardening helpers are an integral part of the Spectre-v2 mitigation, so move them into asm/spectre.h and inline the arm64_get_bp_hardening_data() function at the same time. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-6-will@kernel.org --- arch/arm64/include/asm/mmu.h | 29 ----------------------------- arch/arm64/include/asm/spectre.h | 30 ++++++++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/hyp-entry.S | 1 + 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index b2e91c187e2a6..75beffe2ee8a8 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -12,9 +12,6 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) -#define BP_HARDEN_EL2_SLOTS 4 -#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) - #ifndef __ASSEMBLY__ #include @@ -41,32 +38,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void) return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } -typedef void (*bp_hardening_cb_t)(void); - -struct bp_hardening_data { - int hyp_vectors_slot; - bp_hardening_cb_t fn; -}; - -DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - -static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) -{ - return this_cpu_ptr(&bp_hardening_data); -} - -static inline void arm64_apply_bp_hardening(void) -{ - struct bp_hardening_data *d; - - if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) - return; - - d = arm64_get_bp_hardening_data(); - if (d->fn) - d->fn(); -} - extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fcdfbce302bdf..d22f8b7d9c50d 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -9,7 +9,15 @@ #ifndef __ASM_SPECTRE_H #define __ASM_SPECTRE_H +#define BP_HARDEN_EL2_SLOTS 4 +#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) + +#ifndef __ASSEMBLY__ + +#include + #include +#include /* Watch out, ordering is important here. */ enum mitigation_state { @@ -20,6 +28,27 @@ enum mitigation_state { struct task_struct; +typedef void (*bp_hardening_cb_t)(void); + +struct bp_hardening_data { + int hyp_vectors_slot; + bp_hardening_cb_t fn; +}; + +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +static inline void arm64_apply_bp_hardening(void) +{ + struct bp_hardening_data *d; + + if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) + return; + + d = this_cpu_ptr(&bp_hardening_data); + if (d->fn) + d->fn(); +} + enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); @@ -29,4 +58,5 @@ bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); void spectre_v4_enable_task_mitigation(struct task_struct *tsk); +#endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3262c16f04492..044c5fc81f90f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1405,7 +1405,7 @@ static void cpu_hyp_reset(void) */ static void cpu_set_hyp_vector(void) { - struct bp_hardening_data *data = arm64_get_bp_hardening_data(); + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); int slot = -1; diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 0a5b36eb54b3e..874eacdabc64f 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -13,6 +13,7 @@ #include #include #include +#include .macro save_caller_saved_regs_vect /* x0 and x1 were saved in the vector entry */ -- GitLab From da592e68a5a333b81111bd6336838764732f723e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:43 +0000 Subject: [PATCH 0199/1894] KVM: arm64: Re-jig logic when patching hardened hyp vectors The hardened hyp vectors are not used on systems running with VHE or CPUs without the ARM64_HARDEN_EL2_VECTORS capability. Re-jig the checking logic slightly in kvm_patch_vector_branch() so that it's a bit clearer what we're looking for. This is purely cosmetic. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-7-will@kernel.org --- arch/arm64/kvm/va_layout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index d1195c288c9f0..760db2c84b9bd 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -139,8 +139,8 @@ void kvm_patch_vector_branch(struct alt_instr *alt, BUG_ON(nr_inst != 5); - if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)); + if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS) || + WARN_ON_ONCE(has_vhe())) { return; } -- GitLab From b881cdce77b48bd488f268041f32951bab89bb0f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:44 +0000 Subject: [PATCH 0200/1894] KVM: arm64: Allocate hyp vectors statically The EL2 vectors installed when a guest is running point at one of the following configurations for a given CPU: - Straight at __kvm_hyp_vector - A trampoline containing an SMC sequence to mitigate Spectre-v2 and then a direct branch to __kvm_hyp_vector - A dynamically-allocated trampoline which has an indirect branch to __kvm_hyp_vector - A dynamically-allocated trampoline containing an SMC sequence to mitigate Spectre-v2 and then an indirect branch to __kvm_hyp_vector The indirect branches mean that VA randomization at EL2 isn't trivially bypassable using Spectre-v3a (where the vector base is readable by the guest). Rather than populate these vectors dynamically, configure everything statically and use an enumerated type to identify the vector "slot" corresponding to one of the configurations above. This both simplifies the code, but also makes it much easier to implement at EL2 later on. Signed-off-by: Will Deacon [maz: fixed double call to kvm_init_vector_slots() on nVHE] Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-8-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 5 -- arch/arm64/include/asm/spectre.h | 36 ++++++++++++- arch/arm64/kernel/cpu_errata.c | 2 + arch/arm64/kernel/proton-pack.c | 63 +++++------------------ arch/arm64/kvm/arm.c | 86 +++++++++++++------------------- arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/hyp-entry.S | 72 ++++++++++++++------------ arch/arm64/kvm/hyp/smccc_wa.S | 32 ------------ arch/arm64/kvm/va_layout.c | 11 +--- 9 files changed, 126 insertions(+), 183 deletions(-) delete mode 100644 arch/arm64/kvm/hyp/smccc_wa.S diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 54387ccd1ab26..94b7c9a99576b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -34,8 +34,6 @@ */ #define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE) -#define __SMCCC_WORKAROUND_1_SMC_SZ 36 - #define KVM_HOST_SMCCC_ID(id) \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_64, \ @@ -175,7 +173,6 @@ extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); -extern atomic_t arm64_el2_vector_last_slot; DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) @@ -198,8 +195,6 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; - /* * Obtain the PC-relative address of a kernel symbol * s: symbol diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index d22f8b7d9c50d..fa86b8f655b7b 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -28,11 +28,41 @@ enum mitigation_state { struct task_struct; +/* + * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and + * we rely on having the direct vectors first. + */ +enum arm64_hyp_spectre_vector { + /* + * Take exceptions directly to __kvm_hyp_vector. This must be + * 0 so that it used by default when mitigations are not needed. + */ + HYP_VECTOR_DIRECT, + + /* + * Bounce via a slot in the hypervisor text mapping of + * __bp_harden_hyp_vecs, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_DIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page. + */ + HYP_VECTOR_INDIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_INDIRECT, +}; + typedef void (*bp_hardening_cb_t)(void); struct bp_hardening_data { - int hyp_vectors_slot; - bp_hardening_cb_t fn; + enum arm64_hyp_spectre_vector slot; + bp_hardening_cb_t fn; }; DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); @@ -53,6 +83,8 @@ enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused); + enum mitigation_state arm64_get_spectre_v4_state(void); bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61314fd70f13b..7a040abaedeac 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -459,9 +459,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #ifdef CONFIG_RANDOMIZE_BASE { + /* Must come after the Spectre-v2 entry */ .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, ERRATA_MIDR_RANGE_LIST(ca57_a72), + .cpu_enable = cpu_el2_vector_harden_enable, }, #endif { diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index c18eb7d41274b..a4ba941297509 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -26,6 +26,7 @@ #include #include +#include /* * We try to ensure that the mitigation state can never change as the result of @@ -169,72 +170,26 @@ bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope) return true; } -DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - enum mitigation_state arm64_get_spectre_v2_state(void) { return spectre_v2_state; } -#ifdef CONFIG_KVM -#include -#include - -atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); - -static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); - int i; - - for (i = 0; i < SZ_2K; i += 0x80) - memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - - __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); -} +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static void install_bp_hardening_cb(bp_hardening_cb_t fn) { - static DEFINE_RAW_SPINLOCK(bp_lock); - int cpu, slot = -1; - const char *hyp_vecs_start = __smccc_workaround_1_smc; - const char *hyp_vecs_end = __smccc_workaround_1_smc + - __SMCCC_WORKAROUND_1_SMC_SZ; + __this_cpu_write(bp_hardening_data.fn, fn); /* * Vinz Clortho takes the hyp_vecs start/end "keys" at * the door when we're a guest. Skip the hyp-vectors work. */ - if (!is_hyp_mode_available()) { - __this_cpu_write(bp_hardening_data.fn, fn); + if (!is_hyp_mode_available()) return; - } - raw_spin_lock(&bp_lock); - for_each_possible_cpu(cpu) { - if (per_cpu(bp_hardening_data.fn, cpu) == fn) { - slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); - break; - } - } - - if (slot == -1) { - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); - } - - __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.fn, fn); - raw_spin_unlock(&bp_lock); + __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -#else -static void install_bp_hardening_cb(bp_hardening_cb_t fn) -{ - __this_cpu_write(bp_hardening_data.fn, fn); -} -#endif /* CONFIG_KVM */ static void call_smc_arch_workaround_1(void) { @@ -315,6 +270,14 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) update_mitigation_state(&spectre_v2_state, state); } +void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused) +{ + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + + if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS)) + data->slot += HYP_VECTOR_INDIRECT; +} + /* * Spectre v4. * diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 044c5fc81f90f..5e6fe5eef3ec4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -51,14 +51,6 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; -/* Hypervisor VA of the indirect vector trampoline page */ -static void *__kvm_bp_vect_base; -/* - * Slot in the hyp vector page for use by the indirect vector trampoline - * when mitigation against Spectre-v2 is not required. - */ -static int __kvm_harden_el2_vector_slot; - /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; @@ -1303,33 +1295,38 @@ static unsigned long nvhe_percpu_order(void) return size ? get_order(size) : 0; } -static int kvm_map_vectors(void) +/* A lookup table holding the hypervisor VA for each vector slot */ +static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; + +static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) { - int slot; + hyp_spectre_vector_selector[slot] = base + (slot * SZ_2K); +} + +static int kvm_init_vector_slots(void) +{ + int err; + void *base; + + base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + kvm_init_vector_slot(base, HYP_VECTOR_DIRECT); + + base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - /* - * SV2 = ARM64_SPECTRE_V2 - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !SV2 + !HEL2 -> use direct vectors - * SV2 + !HEL2 -> use hardened vectors in place - * !SV2 + HEL2 -> allocate one vector slot and use exec mapping - * SV2 + HEL2 -> use hardened vectors and use exec mapping - */ if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) return 0; - /* - * Always allocate a spare vector slot, as we don't know yet which CPUs - * have a BP hardening slot that we can reuse. - */ - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __kvm_harden_el2_vector_slot = slot; + if (!has_vhe()) { + err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), + __BP_HARDEN_HYP_VECS_SZ, &base); + if (err) + return err; + } - return create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), - __BP_HARDEN_HYP_VECS_SZ, - &__kvm_bp_vect_base); + kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT); + return 0; } static void cpu_init_hyp_mode(void) @@ -1406,24 +1403,9 @@ static void cpu_hyp_reset(void) static void cpu_set_hyp_vector(void) { struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); - void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); - int slot = -1; - - if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { - vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); - slot = data->hyp_vectors_slot; - } - - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { - vect = __kvm_bp_vect_base; - if (slot == -1) - slot = __kvm_harden_el2_vector_slot; - } - - if (slot != -1) - vect += slot * SZ_2K; + void *vector = hyp_spectre_vector_selector[data->slot]; - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vect; + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; } static void cpu_hyp_reinit(void) @@ -1661,12 +1643,6 @@ static int init_hyp_mode(void) goto out_err; } - err = kvm_map_vectors(); - if (err) { - kvm_err("Cannot map vectors\n"); - goto out_err; - } - /* * Map the Hyp stack pages */ @@ -1810,6 +1786,12 @@ int kvm_arch_init(void *opaque) goto out_err; } + err = kvm_init_vector_slots(); + if (err) { + kvm_err("Cannot initialise vector slots\n"); + goto out_err; + } + err = init_subsystems(); if (err) goto out_hyp; diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 4a81eddabcd83..687598e41b21f 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 874eacdabc64f..d0a3660c72568 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -188,52 +188,62 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) -.macro hyp_ventry - .align 7 +.macro spectrev2_smccc_wa1_smc + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +.endm + +.macro hyp_ventry indirect, spectrev2 + .align 7 1: esb - .rept 26 - nop - .endr -/* - * The default sequence is to directly branch to the KVM vectors, - * using the computed offset. This applies for VHE as well as - * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble. - * - * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced - * with: - * - * stp x0, x1, [sp, #-16]! - * movz x0, #(addr & 0xffff) - * movk x0, #((addr >> 16) & 0xffff), lsl #16 - * movk x0, #((addr >> 32) & 0xffff), lsl #32 - * br x0 - * - * Where: - * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. - * See kvm_patch_vector_branch for details. - */ -alternative_cb kvm_patch_vector_branch + .if \spectrev2 != 0 + spectrev2_smccc_wa1_smc + .else stp x0, x1, [sp, #-16]! - b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) + .endif + .if \indirect != 0 + alternative_cb kvm_patch_vector_branch + /* + * For ARM64_HARDEN_EL2_VECTORS configurations, these NOPs get replaced + * with: + * + * movz x0, #(addr & 0xffff) + * movk x0, #((addr >> 16) & 0xffff), lsl #16 + * movk x0, #((addr >> 32) & 0xffff), lsl #32 + * br x0 + * + * Where: + * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. + * See kvm_patch_vector_branch for details. + */ nop nop nop -alternative_cb_end + nop + alternative_cb_end + .endif + b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) .endm -.macro generate_vectors +.macro generate_vectors indirect, spectrev2 0: .rept 16 - hyp_ventry + hyp_ventry \indirect, \spectrev2 .endr .org 0b + SZ_2K // Safety measure .endm .align 11 SYM_CODE_START(__bp_harden_hyp_vecs) - .rept BP_HARDEN_EL2_SLOTS - generate_vectors - .endr + generate_vectors indirect = 0, spectrev2 = 0 // HYP_VECTOR_DIRECT + generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT + generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT + generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S deleted file mode 100644 index b0441dbdf68bd..0000000000000 --- a/arch/arm64/kvm/hyp/smccc_wa.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015-2018 - ARM Ltd - * Author: Marc Zyngier - */ - -#include -#include - -#include -#include - - /* - * This is not executed directly and is instead copied into the vectors - * by install_bp_hardening_cb(). - */ - .data - .pushsection .rodata - .global __smccc_workaround_1_smc -SYM_DATA_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 760db2c84b9bd..cc8e8756600fc 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -137,7 +137,7 @@ void kvm_patch_vector_branch(struct alt_instr *alt, u64 addr; u32 insn; - BUG_ON(nr_inst != 5); + BUG_ON(nr_inst != 4); if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS) || WARN_ON_ONCE(has_vhe())) { @@ -160,15 +160,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt, */ addr += KVM_VECTOR_PREAMBLE; - /* stp x0, x1, [sp, #-16]! */ - insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0, - AARCH64_INSN_REG_1, - AARCH64_INSN_REG_SP, - -16, - AARCH64_INSN_VARIANT_64BIT, - AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX); - *updptr++ = cpu_to_le32(insn); - /* movz x0, #(addr & 0xffff) */ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, (u16)addr, -- GitLab From c4792b6dbc5070fe67f4cdcfdad39416333acbe0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:45 +0000 Subject: [PATCH 0201/1894] arm64: spectre: Rename ARM64_HARDEN_EL2_VECTORS to ARM64_SPECTRE_V3A Since ARM64_HARDEN_EL2_VECTORS is really a mitigation for Spectre-v3a, rename it accordingly for consistency with the v2 and v4 mitigation. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-9-will@kernel.org --- Documentation/arm64/memory.rst | 2 +- arch/arm64/include/asm/cpucaps.h | 2 +- arch/arm64/include/asm/spectre.h | 2 +- arch/arm64/kernel/cpu_errata.c | 6 +++--- arch/arm64/kernel/proton-pack.c | 13 ++++++++++--- arch/arm64/kvm/arm.c | 8 ++++---- arch/arm64/kvm/hyp/hyp-entry.S | 3 +-- arch/arm64/kvm/va_layout.c | 4 +--- 8 files changed, 22 insertions(+), 18 deletions(-) diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index cf03b3290800c..75df7fb30a7ba 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -100,7 +100,7 @@ hypervisor maps kernel pages in EL2 at a fixed (and potentially random) offset from the linear mapping. See the kern_hyp_va macro and kvm_update_va_mask function for more details. MMIO devices such as GICv2 gets mapped next to the HYP idmap page, as do vectors when -ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs. +ARM64_SPECTRE_V3A is enabled for particular CPUs. When using KVM with the Virtualization Host Extensions, no additional mappings are created, since the host kernel runs directly in EL2. diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index e7d98997c09c3..162539d4c8cd0 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -21,7 +21,7 @@ #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 -#define ARM64_HARDEN_EL2_VECTORS 14 +#define ARM64_SPECTRE_V3A 14 #define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fa86b8f655b7b..b4df683ed8009 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -83,7 +83,7 @@ enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); -void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused); +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused); enum mitigation_state arm64_get_spectre_v4_state(void); bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7a040abaedeac..949d5615a47eb 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -460,10 +460,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_RANDOMIZE_BASE { /* Must come after the Spectre-v2 entry */ - .desc = "EL2 vector hardening", - .capability = ARM64_HARDEN_EL2_VECTORS, + .desc = "Spectre-v3a", + .capability = ARM64_SPECTRE_V3A, ERRATA_MIDR_RANGE_LIST(ca57_a72), - .cpu_enable = cpu_el2_vector_harden_enable, + .cpu_enable = spectre_v3a_enable_mitigation, }, #endif { diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index a4ba941297509..cf9f8b885aea2 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as + * Handle detection, reporting and mitigation of Spectre v1, v2, v3a and v4, as * detailed at: * * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability @@ -270,11 +270,18 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) update_mitigation_state(&spectre_v2_state, state); } -void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused) +/* + * Spectre-v3a. + * + * Phew, there's not an awful lot to do here! We just instruct EL2 to use + * an indirect trampoline for the hyp vectors so that guests can't read + * VBAR_EL2 to defeat randomisation of the hypervisor VA layout. + */ +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused) { struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS)) + if (this_cpu_has_cap(ARM64_SPECTRE_V3A)) data->slot += HYP_VECTOR_INDIRECT; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5e6fe5eef3ec4..4cc29300f5337 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1314,7 +1314,7 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) return 0; if (!has_vhe()) { @@ -1388,15 +1388,15 @@ static void cpu_hyp_reset(void) * placed in one of the vector slots, which is executed before jumping * to the real vectors. * - * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot + * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot * containing the hardening sequence is mapped next to the idmap page, * and executed before jumping to the real vectors. * - * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an + * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an * empty slot is selected, mapped next to the idmap page, and * executed before jumping to the real vectors. * - * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with + * Note that ARM64_SPECTRE_V3A is somewhat incompatible with * VHE, as we don't have hypervisor-specific mappings. If the system * is VHE and yet selects this capability, it will be ignored. */ diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index d0a3660c72568..e3249e2dda09e 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -209,8 +209,7 @@ SYM_CODE_END(__kvm_hyp_vector) .if \indirect != 0 alternative_cb kvm_patch_vector_branch /* - * For ARM64_HARDEN_EL2_VECTORS configurations, these NOPs get replaced - * with: + * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with: * * movz x0, #(addr & 0xffff) * movk x0, #((addr >> 16) & 0xffff), lsl #16 diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index cc8e8756600fc..02362fa27be46 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -139,10 +139,8 @@ void kvm_patch_vector_branch(struct alt_instr *alt, BUG_ON(nr_inst != 4); - if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS) || - WARN_ON_ONCE(has_vhe())) { + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe())) return; - } /* * Compute HYP VA by using the same computation as kern_hyp_va() -- GitLab From cd1f56b930e857c170d8a04f0f989bfb8a1b5ac1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:46 +0000 Subject: [PATCH 0202/1894] arm64: spectre: Consolidate spectre-v3a detection The spectre-v3a mitigation is split between cpu_errata.c and spectre.c, with the former handling detection of the problem and the latter handling enabling of the workaround. Move the detection logic alongside the enabling logic, like we do for the other spectre mitigations. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-10-will@kernel.org --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/cpu_errata.c | 13 ++----------- arch/arm64/kernel/proton-pack.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index b4df683ed8009..12a4eb5e4e6b1 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -83,6 +83,7 @@ enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused); enum mitigation_state arm64_get_spectre_v4_state(void); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 949d5615a47eb..0709c827f2b3e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -196,16 +196,6 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#ifdef CONFIG_RANDOMIZE_BASE - -static const struct midr_range ca57_a72[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - {}, -}; - -#endif - #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 @@ -462,7 +452,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Must come after the Spectre-v2 entry */ .desc = "Spectre-v3a", .capability = ARM64_SPECTRE_V3A, - ERRATA_MIDR_RANGE_LIST(ca57_a72), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_spectre_v3a, .cpu_enable = spectre_v3a_enable_mitigation, }, #endif diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index cf9f8b885aea2..d89be98829980 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -277,6 +277,18 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) * an indirect trampoline for the hyp vectors so that guests can't read * VBAR_EL2 to defeat randomisation of the hypervisor VA layout. */ +bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope) +{ + static const struct midr_range spectre_v3a_unsafe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, + }; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list); +} + void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused) { struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); -- GitLab From 4f6a36fed71dfe51df0ae9a282dc87c76d629bff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:47 +0000 Subject: [PATCH 0203/1894] KVM: arm64: Remove redundant hyp vectors entry The hyp vectors entry corresponding to HYP_VECTOR_DIRECT (i.e. when neither Spectre-v2 nor Spectre-v3a are present) is unused, as we can simply dispatch straight to __kvm_hyp_vector in this case. Remove the redundant vector, and massage the logic for resolving a slot to a vectors entry. Reported-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201113113847.21619-11-will@kernel.org --- arch/arm64/include/asm/spectre.h | 2 +- arch/arm64/kvm/arm.c | 9 ++++++++- arch/arm64/kvm/hyp/hyp-entry.S | 1 - 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 12a4eb5e4e6b1..4e6d90a4fbe07 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -10,7 +10,7 @@ #define __ASM_SPECTRE_H #define BP_HARDEN_EL2_SLOTS 4 -#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) +#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4cc29300f5337..9af9652ab9a31 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1298,9 +1298,16 @@ static unsigned long nvhe_percpu_order(void) /* A lookup table holding the hypervisor VA for each vector slot */ static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; +static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot) +{ + return slot - (slot != HYP_VECTOR_DIRECT); +} + static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) { - hyp_spectre_vector_selector[slot] = base + (slot * SZ_2K); + int idx = __kvm_vector_slot2idx(slot); + + hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K); } static int kvm_init_vector_slots(void) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index e3249e2dda09e..d179056e1af81 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -239,7 +239,6 @@ SYM_CODE_END(__kvm_hyp_vector) .align 11 SYM_CODE_START(__bp_harden_hyp_vecs) - generate_vectors indirect = 0, spectrev2 = 0 // HYP_VECTOR_DIRECT generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT -- GitLab From 601366678c93618f37a685332c0ba07e5556798c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 30 Oct 2020 14:47:42 +0900 Subject: [PATCH 0204/1894] perf data: Allow to use stdio functions for pipe mode When perf data is in a pipe, it reads each event separately using read(2) syscall. This is a huge performance bottleneck when processing large data like in perf inject. Also perf inject needs to use write(2) syscall for the output. So convert it to use buffer I/O functions in stdio library for pipe data. This makes inject-build-id bench time drops from 20ms to 8ms. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.074 msec (+- 0.013 msec) Average time per event: 0.792 usec (+- 0.001 usec) Average memory usage: 8328 KB (+- 0 KB) Average build-id-all injection took: 5.490 msec (+- 0.008 msec) Average time per event: 0.538 usec (+- 0.001 usec) Average memory usage: 7563 KB (+- 0 KB) This patch enables it just for perf inject when used with pipe (it's a default behavior). Maybe we could do it for perf record and/or report later.. Committer testing: Before: $ perf stat -r 5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.605 msec (+- 0.064 msec) Average time per event: 1.334 usec (+- 0.006 usec) Average memory usage: 12220 KB (+- 7 KB) Average build-id-all injection took: 11.458 msec (+- 0.058 msec) Average time per event: 1.123 usec (+- 0.006 usec) Average memory usage: 11546 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.673 msec (+- 0.057 msec) Average time per event: 1.341 usec (+- 0.006 usec) Average memory usage: 12508 KB (+- 8 KB) Average build-id-all injection took: 11.437 msec (+- 0.046 msec) Average time per event: 1.121 usec (+- 0.004 usec) Average memory usage: 11812 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.641 msec (+- 0.069 msec) Average time per event: 1.337 usec (+- 0.007 usec) Average memory usage: 12302 KB (+- 8 KB) Average build-id-all injection took: 10.820 msec (+- 0.106 msec) Average time per event: 1.061 usec (+- 0.010 usec) Average memory usage: 11616 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.379 msec (+- 0.074 msec) Average time per event: 1.312 usec (+- 0.007 usec) Average memory usage: 12334 KB (+- 8 KB) Average build-id-all injection took: 11.288 msec (+- 0.071 msec) Average time per event: 1.107 usec (+- 0.007 usec) Average memory usage: 11657 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.534 msec (+- 0.058 msec) Average time per event: 1.327 usec (+- 0.006 usec) Average memory usage: 12264 KB (+- 8 KB) Average build-id-all injection took: 11.557 msec (+- 0.076 msec) Average time per event: 1.133 usec (+- 0.007 usec) Average memory usage: 11593 KB (+- 8 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 4,060.05 msec task-clock:u # 1.566 CPUs utilized ( +- 0.65% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 101,888 page-faults:u # 0.025 M/sec ( +- 0.12% ) 3,745,833,163 cycles:u # 0.923 GHz ( +- 0.10% ) (83.22%) 194,346,613 stalled-cycles-frontend:u # 5.19% frontend cycles idle ( +- 0.57% ) (83.30%) 708,495,034 stalled-cycles-backend:u # 18.91% backend cycles idle ( +- 0.48% ) (83.48%) 5,629,328,628 instructions:u # 1.50 insn per cycle # 0.13 stalled cycles per insn ( +- 0.21% ) (83.57%) 1,236,697,927 branches:u # 304.602 M/sec ( +- 0.16% ) (83.44%) 17,564,877 branch-misses:u # 1.42% of all branches ( +- 0.23% ) (82.99%) 2.5934 +- 0.0128 seconds time elapsed ( +- 0.49% ) $ After: $ perf stat -r 5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.560 msec (+- 0.125 msec) Average time per event: 0.839 usec (+- 0.012 usec) Average memory usage: 12520 KB (+- 8 KB) Average build-id-all injection took: 5.789 msec (+- 0.054 msec) Average time per event: 0.568 usec (+- 0.005 usec) Average memory usage: 11919 KB (+- 9 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.639 msec (+- 0.111 msec) Average time per event: 0.847 usec (+- 0.011 usec) Average memory usage: 12732 KB (+- 8 KB) Average build-id-all injection took: 5.647 msec (+- 0.069 msec) Average time per event: 0.554 usec (+- 0.007 usec) Average memory usage: 12093 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.551 msec (+- 0.096 msec) Average time per event: 0.838 usec (+- 0.009 usec) Average memory usage: 12739 KB (+- 8 KB) Average build-id-all injection took: 5.617 msec (+- 0.061 msec) Average time per event: 0.551 usec (+- 0.006 usec) Average memory usage: 12105 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.403 msec (+- 0.097 msec) Average time per event: 0.824 usec (+- 0.010 usec) Average memory usage: 12770 KB (+- 8 KB) Average build-id-all injection took: 5.611 msec (+- 0.085 msec) Average time per event: 0.550 usec (+- 0.008 usec) Average memory usage: 12134 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.518 msec (+- 0.102 msec) Average time per event: 0.835 usec (+- 0.010 usec) Average memory usage: 12518 KB (+- 10 KB) Average build-id-all injection took: 5.503 msec (+- 0.073 msec) Average time per event: 0.540 usec (+- 0.007 usec) Average memory usage: 11882 KB (+- 8 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 2,394.88 msec task-clock:u # 1.577 CPUs utilized ( +- 0.83% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 103,181 page-faults:u # 0.043 M/sec ( +- 0.11% ) 3,548,172,030 cycles:u # 1.482 GHz ( +- 0.30% ) (83.26%) 81,537,700 stalled-cycles-frontend:u # 2.30% frontend cycles idle ( +- 1.54% ) (83.24%) 876,631,544 stalled-cycles-backend:u # 24.71% backend cycles idle ( +- 1.14% ) (83.45%) 5,960,361,707 instructions:u # 1.68 insn per cycle # 0.15 stalled cycles per insn ( +- 0.27% ) (83.26%) 1,269,413,491 branches:u # 530.054 M/sec ( +- 0.10% ) (83.48%) 11,372,453 branch-misses:u # 0.90% of all branches ( +- 0.52% ) (83.31%) 1.51874 +- 0.00642 seconds time elapsed ( +- 0.42% ) $ Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201030054742.87740-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 2 ++ tools/perf/util/data.c | 41 ++++++++++++++++++++++++++++++++++--- tools/perf/util/data.h | 11 +++++++++- tools/perf/util/header.c | 8 ++++---- tools/perf/util/session.c | 7 ++++--- 5 files changed, 58 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 452a75fe68e5f..14d6c88fed76f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -853,10 +853,12 @@ int cmd_inject(int argc, const char **argv) .output = { .path = "-", .mode = PERF_DATA_MODE_WRITE, + .use_stdio = true, }, }; struct perf_data data = { .mode = PERF_DATA_MODE_READ, + .use_stdio = true, }; int ret; diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index c47aa34fdc0a7..05bbcb663c41c 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -174,8 +174,21 @@ static bool check_pipe(struct perf_data *data) is_pipe = true; } - if (is_pipe) - data->file.fd = fd; + if (is_pipe) { + if (data->use_stdio) { + const char *mode; + + mode = perf_data__is_read(data) ? "r" : "w"; + data->file.fptr = fdopen(fd, mode); + + if (data->file.fptr == NULL) { + data->file.fd = fd; + data->use_stdio = false; + } + } else { + data->file.fd = fd; + } + } return data->is_pipe = is_pipe; } @@ -334,6 +347,9 @@ int perf_data__open(struct perf_data *data) if (check_pipe(data)) return 0; + /* currently it allows stdio for pipe only */ + data->use_stdio = false; + if (!data->path) data->path = "perf.data"; @@ -353,7 +369,21 @@ void perf_data__close(struct perf_data *data) perf_data__close_dir(data); zfree(&data->file.path); - close(data->file.fd); + + if (data->use_stdio) + fclose(data->file.fptr); + else + close(data->file.fd); +} + +ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size) +{ + if (data->use_stdio) { + if (fread(buf, size, 1, data->file.fptr) == 1) + return size; + return feof(data->file.fptr) ? 0 : -1; + } + return readn(data->file.fd, buf, size); } ssize_t perf_data_file__write(struct perf_data_file *file, @@ -365,6 +395,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file, ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size) { + if (data->use_stdio) { + if (fwrite(buf, size, 1, data->file.fptr) == 1) + return size; + return -1; + } return perf_data_file__write(&data->file, buf, size); } diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 75947ef6bc170..c563fcbb0288c 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -2,6 +2,7 @@ #ifndef __PERF_DATA_H #define __PERF_DATA_H +#include #include enum perf_data_mode { @@ -16,7 +17,10 @@ enum perf_dir_version { struct perf_data_file { char *path; - int fd; + union { + int fd; + FILE *fptr; + }; unsigned long size; }; @@ -26,6 +30,7 @@ struct perf_data { bool is_pipe; bool is_dir; bool force; + bool use_stdio; enum perf_data_mode mode; struct { @@ -62,11 +67,15 @@ static inline bool perf_data__is_single_file(struct perf_data *data) static inline int perf_data__fd(struct perf_data *data) { + if (data->use_stdio) + return fileno(data->file.fptr); + return data->file.fd; } int perf_data__open(struct perf_data *data); void perf_data__close(struct perf_data *data); +ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size); ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size); ssize_t perf_data_file__write(struct perf_data_file *file, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 598285a21dadd..b9171bd11fe69 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3647,7 +3647,8 @@ static int perf_file_section__process(struct perf_file_section *section, } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, - struct perf_header *ph, int fd, + struct perf_header *ph, + struct perf_data* data, bool repipe) { struct feat_fd ff = { @@ -3656,7 +3657,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, }; ssize_t ret; - ret = readn(fd, header, sizeof(*header)); + ret = perf_data__read(data, header, sizeof(*header)); if (ret <= 0) return -1; @@ -3679,8 +3680,7 @@ static int perf_header__read_pipe(struct perf_session *session) struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, header, - perf_data__fd(session->data), + if (perf_file_header__read_pipe(&f_header, header, session->data, session->repipe) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 098080287c687..5cc722b6fe7c0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1937,7 +1937,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data__fd(session->data); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1957,7 +1956,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session) ordered_events__set_copy_on_queue(oe, true); more: event = buf; - err = readn(fd, event, sizeof(struct perf_event_header)); + err = perf_data__read(session->data, event, + sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) goto done; @@ -1989,7 +1989,8 @@ more: p += sizeof(struct perf_event_header); if (size - sizeof(struct perf_event_header)) { - err = readn(fd, p, size - sizeof(struct perf_event_header)); + err = perf_data__read(session->data, p, + size - sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) { pr_err("unexpected end of event stream\n"); -- GitLab From 3d05181a085c7a070746c838ea25aebf25f17d52 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 2 Nov 2020 16:00:25 +0800 Subject: [PATCH 0205/1894] perf vendor events: Update Skylake client events to v50 - Update Skylake events to v50. - Update Skylake JSON metrics from TMAM 4.0. - Fix the issue in DRAM_Parallel_Reads - Fix the perf test warning Before: root@kbl-ppc:~# perf stat -M DRAM_Parallel_Reads -- sleep 1 event syntax error: '{arb/event=0x80,umask=0x2/,arb/event=0x80,umask=0x2,thresh=1/}:W' \___ unknown term 'thresh' for pmu 'uncore_arb' valid terms: event,edge,inv,umask,cmask,config,config1,config2,name,period,percore Initial error: event syntax error: '..umask=0x2/,arb/event=0x80,umask=0x2,thresh=1/}:W' \___ Cannot find PMU `arb'. Missing kernel support? root@kbl-ppc:~# perf test metrics 10: PMU events : 10.3: Parsing of PMU event table metrics : Skip (some metrics failed) 10.4: Parsing of PMU event table metrics with fake PMUs: Ok 67: Parse and process metrics : Ok After: root@kbl-ppc:~# perf stat -M MEM_Parallel_Reads -- sleep 1 Performance counter stats for 'system wide': 4,951,646 arb/event=0x80,umask=0x2/ # 26.30 MEM_Parallel_Reads (50.04%) 188,251 arb/event=0x80,umask=0x2,cmask=1/ (49.96%) 1.000867010 seconds time elapsed root@kbl-ppc:~# perf test metrics 10: PMU events : 10.3: Parsing of PMU event table metrics : Ok 10.4: Parsing of PMU event table metrics with fake PMUs: Ok 67: Parse and process metrics : Ok Signed-off-by: Jin Yao Tested-by: Namhyung Kim Acked-by: Andi Kleen Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/93fae76f-ce2b-ab0b-3ae9-cc9a2b4cbaec@linux.intel.com/ Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/skylake/cache.json | 4100 ++++++++--------- .../arch/x86/skylake/floating-point.json | 76 +- .../pmu-events/arch/x86/skylake/frontend.json | 644 +-- .../pmu-events/arch/x86/skylake/memory.json | 2279 ++++----- .../pmu-events/arch/x86/skylake/other.json | 60 +- .../pmu-events/arch/x86/skylake/pipeline.json | 1266 ++--- .../arch/x86/skylake/skl-metrics.json | 271 +- .../arch/x86/skylake/virtual-memory.json | 374 +- 8 files changed, 4560 insertions(+), 4510 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json index 720458139049c..27ea2b00ad007 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json @@ -1,2928 +1,2926 @@ [ { - "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read miss L2, no rejects", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts L2 cache misses when fetching instructions.", - "EventCode": "0x24", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400108000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Demand requests that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x27", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x38", - "EventName": "L2_RQSTS.PF_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0080001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "All requests that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", - "EventCode": "0x24", + "BriefDescription": "Counts all demand code readshave any response type.", "Counter": "0,1,2,3", - "UMask": "0xc1", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010004", + "Offcore": "1", + "PublicDescription": "Counts all demand code readshave any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0xc2", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", - "EventCode": "0x24", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0xc4", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xd8", - "EventName": "L2_RQSTS.PF_HIT", + "EventName": "L2_RQSTS.PF_MISS", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x38" }, { - "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0xe1", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C0004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", - "EventCode": "0x24", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0xe2", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100040001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the total number of L2 code requests.", - "EventCode": "0x24", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0xe4", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040100001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Demand requests to L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Demand requests that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0xe7", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xf8", - "EventName": "L2_RQSTS.ALL_PF", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "PublicDescription": "Demand requests that miss L2 cache.", "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x27" }, { - "PublicDescription": "All L2 requests.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400100002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", - "EventCode": "0x2E", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x41", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests missed L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", - "EventCode": "0x2E", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4f", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests that refer to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", - "EventCode": "0x48", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D miss outstandings duration in cycles", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", - "EventCode": "0x48", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x48", + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", + "SampleAfterValue": "50021", + "UMask": "0x4" }, { - "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", - "EventCode": "0x48", + "BriefDescription": "L2 writebacks that access L2 cache", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF0", + "EventName": "L2_TRANS.L2_WB", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x40" }, { - "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", - "EventCode": "0x51", + "BriefDescription": "L2 cache lines filling L2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D.REPLACEMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D data line replacements", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF1", + "EventName": "L2_LINES_IN.ALL", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "SampleAfterValue": "100003", + "UMask": "0x1f" }, { - "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", - "EventCode": "0x60", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", - "EventCode": "0x60", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C0002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400040004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Demand Data Read requests sent to uncore", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080080002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080100001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C0004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests sent to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C0002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "Cacheable and noncachaeble code read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080100004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", - "EventCode": "0xB0", + "BriefDescription": "Retired load instructions missed L3 cache as data sources", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand and prefetch data reads", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "100007", + "UMask": "0x20" }, { - "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", - "EventCode": "0xB0", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040020001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", "SampleAfterValue": "100003", - "BriefDescription": "Any memory transaction that reached the SQ.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", - "EventCode": "0xB2", + "BriefDescription": "All retired store instructions.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x82" }, { - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "EventCode": "0xB7, 0xBB", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080020004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions that miss the STLB.", - "EventCode": "0xD0", + "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", "Counter": "0,1,2,3", - "UMask": "0x11", - "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.SILENT", + "SampleAfterValue": "200003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired store instructions that miss the STLB.", - "EventCode": "0xD0", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x12", - "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040048000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "MEM_INST_RETIRED.LOCK_LOADS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions with locked access. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000088000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C0002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "MEM_INST_RETIRED.ALL_LOADS", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired load instructions. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080020001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "All retired store instructions.", - "EventCode": "0xD0", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "MEM_INST_RETIRED.ALL_STORES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store instructions. (Precise Event)", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" + "UMask": "0x8" }, { - "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", - "EventCode": "0xD1", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C0004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions with L3 cache hits as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", - "EventCode": "0xD1", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions missed L1 cache as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L2 cache as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions missed L2 cache as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C0001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L3 cache as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions missed L3 cache as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", - "EventCode": "0xD1", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C8000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD2", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000080001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", - "EventCode": "0xD2", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200020001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", - "EventCode": "0xD2", + "BriefDescription": "Core-originated cacheable demand requests missed L3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL057", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", - "EventCode": "0xD2", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_PF", + "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "SampleAfterValue": "200003", + "UMask": "0xf8" }, { - "PEBS": "1", - "EventCode": "0xD4", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts L2 writebacks that access L2 cache.", - "EventCode": "0xF0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "L2_TRANS.L2_WB", - "SampleAfterValue": "200003", - "BriefDescription": "L2 writebacks that access L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", - "EventCode": "0xF1", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1f", - "EventName": "L2_LINES_IN.ALL", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "L2 cache lines filling L2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xF2", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L2_LINES_OUT.SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xF2", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L2_LINES_OUT.NON_SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", - "EventCode": "0xF2", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_PREF", - "SampleAfterValue": "200003", - "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "EventCode": "0xF2", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_HWPF", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", - "EventCode": "0xF4", + "BriefDescription": "RFO requests that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "SQ_MISC.SPLIT_LOCK", - "SampleAfterValue": "100003", - "BriefDescription": "Number of cache line split locks sent to uncore.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000408000", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.NON_SILENT", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040028000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400408000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200408000", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100408000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080408000", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C8000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C8000", + "BriefDescription": "Counts all demand data writes (RFOs)have any response type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)have any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C8000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C8000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C8000", + "BriefDescription": "All requests that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "All requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x3f" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C8000", + "BriefDescription": "L2 code requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C8000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0108000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000108000", + "BriefDescription": "RFO requests that hit L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400108000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200108000", + "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100108000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080108000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040108000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0028000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0088000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000088000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400088000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200088000", + "BriefDescription": "Retired load instructions missed L1 cache as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100088000", + "BriefDescription": "L2 cache misses when fetching instructions", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "UMask": "0x24" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080088000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040088000", + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000048000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400048000", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200048000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080048000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0028000", + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000028000", + "BriefDescription": "Demand requests to L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "PublicDescription": "Demand requests to L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xe7" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400028000", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100048000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200028000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100028000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080028000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040028000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000018000", + "AnyThread": "1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400004", + "BriefDescription": "Core-originated cacheable demand requests that refer to L3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL057", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4f" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C0004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C0004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C0004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C0004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C0004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C0004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C0004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0100004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100004", + "BriefDescription": "Retired load instructions that miss the STLB.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PEBS": "1", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x11" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100004", + "BriefDescription": "Counts demand data readshave any response type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data readshave any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0080004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080004", + "BriefDescription": "L1D data line replacements", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x51", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080004", + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x4" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080004", + "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0040004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040004", + "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.USELESS_PREF", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040004", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.PF_HIT", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xd8" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040004", + "BriefDescription": "Demand Data Read miss L2, no rejects", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0x21" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0020004", + "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020004", + "BriefDescription": "All retired load instructions.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x81" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020004", + "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "SampleAfterValue": "20011", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020004", + "BriefDescription": "Demand Data Read requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0xe1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020004", + "BriefDescription": "All L2 requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "All L2 requests.", + "SampleAfterValue": "200003", + "UMask": "0xff" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010004", + "BriefDescription": "Cycles with L1D load Misses outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400002", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400002", + "BriefDescription": "Number of cache line split locks sent to uncore.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF4", + "EventName": "SQ_MISC.SPLIT_LOCK", + "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C0002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C0002", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C0002", + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C0002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0100002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100002", + "BriefDescription": "L1D miss outstandings duration in cycles", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100002", + "BriefDescription": "Demand Data Read requests that hit L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", + "SampleAfterValue": "200003", + "UMask": "0xc1" + }, + { "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0080002", + "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080002", + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x42" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0040002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040002", + "BriefDescription": "Any memory transaction that reached the SQ.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x80" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040002", + "BriefDescription": "Counts any other requestshave any response type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000018000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requestshave any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040002", + "BriefDescription": "Cacheable and noncachaeble code read requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0020002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020002", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020002", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "UMask": "0xc4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs) have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400001", + "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C0001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C0001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C0001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C0001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C0001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C0001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0100001", + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100001", + "BriefDescription": "Retired load instructions with locked access.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x21" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0080001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080001", + "BriefDescription": "Demand and prefetch data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080001", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080001", + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x41" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0040001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040001", + "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB2", + "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040001", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040001", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200080002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040001", + "BriefDescription": "Retired store instructions that miss the STLB.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", "SampleAfterValue": "100003", + "UMask": "0x12" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0020001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020001", + "BriefDescription": "RFO requests to L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "UMask": "0xe2" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020001", + "BriefDescription": "Retired load instructions missed L2 cache as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "50021", + "UMask": "0x10" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020001", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json index 213dd6230cf2b..834e1cd841fc9 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json @@ -1,67 +1,67 @@ [ { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x2", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0xC7", + "BriefDescription": "Cycles with any input/output SSE or FP assist", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xCA", + "EventName": "FP_ASSIST.ANY", + "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", + "SampleAfterValue": "100003", + "UMask": "0x1e" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", - "EventCode": "0xCA", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x1e", - "EventName": "FP_ASSIST.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Cycles with any input/output SSE or FP assist", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "UMask": "0x8" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json index 7fa95a35e3cac..e84504d6adea2 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json @@ -1,482 +1,516 @@ [ { - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE_16B.IFDATA_STALL", + "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Retired Instructions who experienced iTLB true miss.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x14", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "MSRIndex": "0x3F7", + "MSRValue": "0x408006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "UMask": "0x8", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", "EventName": "IDQ.DSB_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xE6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x11", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering 4 Uops", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", + "MSRIndex": "0x3F7", + "MSRValue": "0x401006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering any Uop", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", - "EventCode": "0x79", + "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "EventName": "IDQ.MS_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x30" }, { - "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", - "EventCode": "0x80", + "BriefDescription": "Cycles MITE is delivering any Uop", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE_16B.IFDATA_STALL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x24" }, { - "EventCode": "0x83", + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x83", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ICACHE_64B.IFTAG_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_SWITCHES", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "SampleAfterValue": "2000003", + "UMask": "0x30" }, { - "EventCode": "0x83", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE_64B.IFTAG_STALL", - "SampleAfterValue": "200003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L2_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x13", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", - "EventCode": "0x9C", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", - "EventCode": "0x9C", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "MSRIndex": "0x3F7", + "MSRValue": "0x404006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", - "EventCode": "0x9C", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", - "EventCode": "0x9C", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", - "EventCode": "0x9C", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_STALL", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "EventCode": "0x9C", - "Invert": "1", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", - "EventCode": "0xAB", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x18" }, { - "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", - "EventCode": "0xC6", - "MSRValue": "0x11", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.DSB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", + "MSRValue": "0x15", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x12", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.L1I_MISS", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x13", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.L2_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", + "MSRValue": "0x420006", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", - "EventCode": "0xC6", - "MSRValue": "0x14", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", + "MSRValue": "0x400806", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", - "EventCode": "0xC6", - "MSRValue": "0x15", + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.STLB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", "MSRIndex": "0x3F7", + "MSRValue": "0x400106", + "PEBS": "2", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x400206", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", + "MSRValue": "0x400206", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x200206", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", + "MSRValue": "0x400406", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x400406", + "BriefDescription": "Cycles MITE is delivering 4 Uops", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x24" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", - "EventCode": "0xC6", - "MSRValue": "0x400806", + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_DSB_CYCLES", + "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", - "EventCode": "0xC6", - "MSRValue": "0x401006", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "SampleAfterValue": "2000003", + "UMask": "0x30" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", - "EventCode": "0xC6", - "MSRValue": "0x402006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", + "MSRValue": "0x410006", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x404006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", "MSRIndex": "0x3F7", + "MSRValue": "0x200206", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x408006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", "MSRIndex": "0x3F7", + "MSRValue": "0x300206", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x410006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", + "MSRValue": "0x100206", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x420006", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x18" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", - "EventCode": "0xC6", - "MSRValue": "0x100206", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.COUNT", + "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", + "MSRValue": "0x402006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x300206", + "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_MISS", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", + "MSRValue": "0x12", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json index f197b4c7695be..7bd3ae3383437 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json @@ -1,1604 +1,1611 @@ [ { - "PublicDescription": "Number of times a TSX line had a cache conflict.", - "EventCode": "0x54", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TX_MEM.ABORT_CONFLICT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "TX_MEM.ABORT_CAPACITY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", - "EventCode": "0x54", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", - "EventCode": "0x54", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000100002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", - "EventCode": "0x54", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084008000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", - "EventCode": "0x54", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times we could not allocate Lock Buffer.", - "EventCode": "0x54", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x5d", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TX_EXEC.MISC1", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204000004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", - "EventCode": "0x5d", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "TX_EXEC.MISC2", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000088000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", - "EventCode": "0x5d", + "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "TX_EXEC.MISC3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "RTM region detected inside HLE.", - "EventCode": "0x5d", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "TX_EXEC.MISC4", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", - "EventCode": "0x5d", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "TX_EXEC.MISC5", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", + "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_TIMER", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x60", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204000002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_MEM", + "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xA3", + "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "PublicDescription": "Demand Data Read requests who miss L3 cache.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000080002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests who miss L3 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", - "EventCode": "0xC3", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "SKL089", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004008000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", - "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", - "EventCode": "0xC8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "HLE_RETIRED.START", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution started.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of times HLE commit succeeded.", - "EventCode": "0xC8", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "HLE_RETIRED.COMMIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution successfully committed", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", - "EventCode": "0xC8", + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "HLE_RETIRED.ABORTED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xC8", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "HLE_RETIRED.ABORTED_MEM", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "HLE_RETIRED.ABORTED_TIMER", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", - "EventCode": "0xC8", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", - "EventCode": "0xC8", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "HLE_RETIRED.ABORTED_EVENTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000020004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", - "EventCode": "0xC9", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RTM_RETIRED.START", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution started.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044000002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times RTM commit succeeded.", - "EventCode": "0xC9", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "RTM_RETIRED.COMMIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution successfully committed", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204008000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", - "EventCode": "0xC9", + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "RTM_RETIRED.ABORTED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", - "EventCode": "0xC9", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "RTM_RETIRED.ABORTED_MEM", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC9", + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "RTM_RETIRED.ABORTED_TIMER", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CONFLICT", + "PublicDescription": "Number of times a TSX line had a cache conflict.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", - "EventCode": "0xC9", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", - "EventCode": "0xC9", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000080001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", - "EventCode": "0xC9", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "RTM_RETIRED.ABORTED_EVENTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "2003", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x4", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", - "MSRIndex": "0x3F6", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", "SampleAfterValue": "100003", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x8", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", - "MSRIndex": "0x3F6", - "SampleAfterValue": "50021", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x10", + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", - "MSRIndex": "0x3F6", - "SampleAfterValue": "20011", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x20", + "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", - "MSRIndex": "0x3F6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC5", + "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x40", + "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", - "MSRIndex": "0x3F6", - "SampleAfterValue": "2003", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC4", + "PublicDescription": "RTM region detected inside HLE.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x80", + "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", - "MSRIndex": "0x3F6", - "SampleAfterValue": "1009", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC3", + "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x100", + "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", - "MSRIndex": "0x3F6", - "SampleAfterValue": "503", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC2", + "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x200", + "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", - "MSRIndex": "0x3F6", - "SampleAfterValue": "101", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC1", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C408000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C408000", + "BriefDescription": "Number of times an RTM execution successfully committed", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.COMMIT", + "PublicDescription": "Number of times RTM commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C408000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C408000", + "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC408000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C408000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004008000", + "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_TIMER", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004000004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104008000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084008000", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044008000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000408000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C8000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000108000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000088000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000028000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC400004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C400004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC400004", + "BriefDescription": "Number of times an HLE execution successfully committed", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.COMMIT", + "PublicDescription": "Number of times HLE commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4000004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000004", + "BriefDescription": "Demand Data Read requests who miss L3 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Demand Data Read requests who miss L3 cache.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400004", + "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C0004", + "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEM", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100004", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "503", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C400002", + "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times RTM abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C400002", + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times HLE abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C400002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C400002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C400002", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "20011", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C400002", + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4000002", + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000002", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "101", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000002", + "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CAPACITY", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000002", + "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000002", + "BriefDescription": "Number of times an RTM execution started.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.START", + "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000002", + "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL089", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400002", + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C0002", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC400001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4000002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C400001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C400001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C400001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC400001", + "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_EVENTS", + "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", + "SampleAfterValue": "2000003", + "UMask": "0x80" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C400001", + "BriefDescription": "Number of times an HLE execution started.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.START", + "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4000001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000001", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "1009", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000001", + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000001", + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x6" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C0001", + "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_EVENTS", + "SampleAfterValue": "2000003", + "UMask": "0x80" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C8000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080001", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040001", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "50021", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/other.json b/tools/perf/pmu-events/arch/x86/skylake/other.json index 84a316d380acb..1a3683f1de918 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/other.json +++ b/tools/perf/pmu-events/arch/x86/skylake/other.json @@ -1,48 +1,56 @@ [ { - "EventCode": "0x32", + "BriefDescription": "Number of PREFETCHW instructions executed.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "SW_PREFETCH_ACCESS.NTA", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHNTA instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0x32", + "BriefDescription": "Number of PREFETCHT0 instructions executed.", "Counter": "0,1,2,3", - "UMask": "0x2", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", "EventName": "SW_PREFETCH_ACCESS.T0", "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHT0 instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0x32", + "BriefDescription": "Number of hardware interrupts received by the processor.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "SW_PREFETCH_ACCESS.T1_T2", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCB", + "EventName": "HW_INTERRUPTS.RECEIVED", + "PublicDescription": "Counts the number of hardware interruptions received by the processor.", + "SampleAfterValue": "203", + "UMask": "0x1" }, { + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHW instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of hardware interruptions received by the processor.", - "EventCode": "0xCB", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "HW_INTERRUPTS.RECEIVED", - "SampleAfterValue": "203", - "BriefDescription": "Number of hardware interrupts received by the processor.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x09", + "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json index 4a891fbbc4bb2..f46e93a57fb49 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json @@ -1,967 +1,969 @@ [ { - "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", - "Counter": "Fixed counter 0", - "UMask": "0x1", - "EventName": "INST_RETIRED.ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 0" - }, - { - "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", - "Counter": "Fixed counter 1", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.THREAD", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when the thread is not in halt state", - "CounterHTOff": "Fixed counter 1" + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.ANY_P", + "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "SampleAfterValue": "2000003" }, { - "Counter": "Fixed counter 1", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 1" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", - "Counter": "Fixed counter 2", - "UMask": "0x3", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "UMask": "0x10" }, { - "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.", - "EventCode": "0x03", + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "LD_BLOCKS.STORE_FORWARD", - "SampleAfterValue": "100003", - "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x14", + "EventName": "ARITH.DIVIDER_ACTIVE", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "EventCode": "0x03", + "BriefDescription": "False dependencies in MOB due to partial compare on address.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "LD_BLOCKS.NO_SR", - "SampleAfterValue": "100003", - "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x07", - "Counter": "0,1,2,3", - "UMask": "0x1", "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", "SampleAfterValue": "100003", - "BriefDescription": "False dependencies in MOB due to partial compare on address.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", - "EventCode": "0x0D", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "INT_MISC.RECOVERY_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x40" }, { - "EventCode": "0x0D", + "BriefDescription": "Counts the number of x87 uops dispatched.", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.X87", + "PublicDescription": "Counts the number of x87 uops executed.", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x0D", + "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4C", + "EventName": "LOAD_HIT_PRE.SW_PF", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", - "EventCode": "0x0E", + "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "SampleAfterValue": "400009", + "UMask": "0x2" }, { - "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "EventCode": "0x0E", - "Invert": "1", + "BriefDescription": "Total execution stalls.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", - "EventCode": "0x0E", + "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x0E", - "Counter": "0,1,2,3", - "UMask": "0x20", "EventName": "UOPS_ISSUED.SLOW_LEA", "SampleAfterValue": "2000003", - "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0x14", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ARITH.DIVIDER_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "10", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "Invert": "1", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "EventCode": "0x3C", + "BriefDescription": "Thread cycles when thread is not in halt state", "Counter": "0,1,2,3", - "UMask": "0x0", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003" }, { - "EventCode": "0x3C", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", - "EventCode": "0x3C", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x0", - "EdgeDetect": "1", - "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", - "SampleAfterValue": "100007", - "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0x3C", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "AnyThread": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0x3C", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0x3C", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x3C", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", - "EventCode": "0x4C", + "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LOAD_HIT_PRE.SW_PF", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", - "EventCode": "0x59", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "8", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", - "EventCode": "0x5E", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RS_EVENTS.EMPTY_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_ACTIVE", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", - "EventCode": "0x5E", - "Invert": "1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", - "EventCode": "0x87", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ILD_STALL.LCP", - "SampleAfterValue": "2000003", - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "UMask": "0x2", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "UMask": "0x4", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "UMask": "0x8", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "UMask": "0x10", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "Counter": "0,1,2,3", - "UMask": "0x20", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "UMask": "0x40", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "UMask": "0x80", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "PublicDescription": "Counts resource-related stall cycles.", - "EventCode": "0xa2", + "AnyThread": "1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RESOURCE_STALLS.ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", - "EventCode": "0xA2", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "Counter": "1", + "CounterHTOff": "1", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.PREC_DIST", + "PEBS": "2", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_4_UOPS", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xA3", + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x2" }, { - "EventCode": "0xA3", + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x59", + "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "16", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x14", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "20", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_NTAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x10" }, { - "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "Fixed counter 2", + "CounterHTOff": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x3" }, { - "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "SampleAfterValue": "400009" }, { - "EventCode": "0xA6", + "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC1", + "EventName": "OTHER_ASSISTS.ANY", + "SampleAfterValue": "100003", + "UMask": "0x3f" }, { - "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", - "EventCode": "0xA8", + "BriefDescription": "Cycles without actually retired uops.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of Uops delivered by the LSD.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", - "EventCode": "0xA8", + "BriefDescription": "Number of Uops delivered by the LSD.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA8", + "EventName": "LSD.UOPS", + "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", - "EventCode": "0xA8", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" }, { - "PublicDescription": "Number of uops to be executed per-thread each cycle.", - "EventCode": "0xB1", + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.THREAD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x87", + "EventName": "ILD_STALL.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "EventCode": "0xB1", - "Invert": "1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EdgeDetect": "1", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_END", + "Invert": "1", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "16", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "Taken branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "This event counts taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x20" }, { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.NO_SR", + "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "SampleAfterValue": "100003", + "UMask": "0x8" }, { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of uops executed from any thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE", + "BriefDescription": "Core cycles when the thread is not in halt state", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xB1", + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0xB1", + "BriefDescription": "Direct and indirect near call instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "This event counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x2" }, { - "EventCode": "0xB1", + "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", + "EventName": "ROB_MISC_EVENTS.PAUSE_INST", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0xB1", + "BriefDescription": "Resource-related stall cycles", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xa2", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "Counts resource-related stall cycles.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB1", - "Invert": "1", + "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "5", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x5" }, { - "PublicDescription": "Counts the number of x87 uops executed.", - "EventCode": "0xB1", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED.X87", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of x87 uops dispatched.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", - "EventCode": "0xC0", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.ANY_P", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x20" + }, + { + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "CounterMask": "20", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x14" }, { - "PEBS": "2", - "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", - "EventCode": "0xC0", - "Counter": "1", - "UMask": "0x1", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.PREC_DIST", + "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", - "EventCode": "0xC0", - "Invert": "1", + "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", "Counter": "0,2,3", - "UMask": "0x1", + "CounterHTOff": "0,2,3", + "CounterMask": "10", "Errata": "SKL091, SKL044", + "EventCode": "0xC0", "EventName": "INST_RETIRED.TOTAL_CYCLES_PS", + "Invert": "1", + "PEBS": "2", + "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", "SampleAfterValue": "2000003", - "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", - "CounterMask": "10", - "CounterHTOff": "0,2,3" + "UMask": "0x1" }, { - "EventCode": "0xC1", + "BriefDescription": "Retirement slots used.", "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "OTHER_ASSISTS.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC2", - "Counter": "0,1,2,3", - "UMask": "0x2", "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "Counts the retirement slots used.", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "This event counts cycles without actually retired uops.", - "EventCode": "0xC2", - "Invert": "1", + "AnyThread": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", - "EventCode": "0xC2", - "Invert": "1", + "BriefDescription": "Number of macro-fused uops retired. (non precise)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MACRO_FUSED", + "PublicDescription": "Counts the number of macro-fused uops retired. (non precise)", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", - "CounterMask": "10", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Number of machine clears (nukes) of any type.", - "EventCode": "0xC3", + "BriefDescription": "Increments whenever there is an update to the LBR array.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", - "EventCode": "0xC3", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MACHINE_CLEARS.SMC", - "SampleAfterValue": "100003", - "BriefDescription": "Self-modifying code (SMC) detected.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_CYCLES", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Instructions retired from execution.", + "Counter": "Fixed counter 0", + "CounterHTOff": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA2", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", + "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", + "SampleAfterValue": "100007" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x4", + "CounterHTOff": "0,1,2,3", "Errata": "SKL091", + "EventCode": "0xC4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x8", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x4" + }, + { + "BriefDescription": "Return instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", + "EventCode": "0xC4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "PublicDescription": "This event counts return instructions retired.", "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "UMask": "0x10", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Counts all not taken macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x20", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Conditional branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x40", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of far branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PEBS": "1", + "PublicDescription": "This event counts conditional branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PEBS": "1", + "PublicDescription": "This event counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", - "EventCode": "0xC5", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "12", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "UMask": "0xc" }, { - "PEBS": "1", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "EventCode": "0xC5", + "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", - "EventCode": "0xCC", + "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Increments whenever there is an update to the LBR array.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "EventCode": "0xCC", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "ROB_MISC_EVENTS.PAUSE_INST", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009" + }, + { + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", - "EventCode": "0xE6", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index 8704efeb8d311..4cd246782dde1 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -1,370 +1,371 @@ [ { - "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", "MetricGroup": "TopdownL1", - "MetricName": "Frontend_Bound", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + "MetricName": "Frontend_Bound" }, { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Frontend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Frontend_Bound_SMT" }, { - "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", "MetricGroup": "TopdownL1", - "MetricName": "Bad_Speculation", - "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example." + "MetricName": "Bad_Speculation" }, { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Bad_Speculation_SMT", - "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Bad_Speculation_SMT" }, { + "MetricConstraint": "NO_NMI_WATCHDOG", + "MetricGroup": "TopdownL1", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", - "MetricGroup": "TopdownL1", - "MetricName": "Backend_Bound", - "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound." + "MetricName": "Backend_Bound" }, { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Backend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Backend_Bound_SMT" }, { - "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", "MetricGroup": "TopdownL1", - "MetricName": "Retiring", - "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. " + "MetricName": "Retiring" }, { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Retiring_SMT", - "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Retiring_SMT" }, { - "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "TopDownL1", + "BriefDescription": "Instructions Per Cycle (per Logical Processor)", + "MetricGroup": "Summary", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "BriefDescription": "Uops Per Instruction", "MetricGroup": "Pipeline;Retire", "MetricName": "UPI" }, { - "BriefDescription": "Instruction per taken branch", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", "MetricGroup": "Branches;Fetch_BW;PGO", "MetricName": "IpTB" }, { - "BriefDescription": "Branch instructions per taken branch. ", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", - "MetricName": "BpTB" - }, - { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", - "MetricGroup": "PGO;IcMiss", - "MetricName": "IFetch_Line_Utilization" - }, - { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;Fetch_BW", - "MetricName": "DSB_Coverage" - }, - { - "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", - "MetricGroup": "Pipeline;Summary", + "BriefDescription": "Cycles Per Instruction (per Logical Processor)", + "MetricGroup": "Pipeline", "MetricName": "CPI" }, { - "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", - "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricGroup": "TopDownL1_SMT", "MetricName": "SLOTS_SMT" }, { - "BriefDescription": "Instructions per Load (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", - "MetricGroup": "Instruction_Type", - "MetricName": "IpL" - }, - { - "BriefDescription": "Instructions per Store (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", - "MetricGroup": "Instruction_Type", - "MetricName": "IpS" - }, - { - "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", - "MetricGroup": "Branches;Instruction_Type", - "MetricName": "IpB" - }, - { - "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", - "MetricGroup": "Branches", - "MetricName": "IpCall" - }, - { - "BriefDescription": "Total number of retired Instructions", - "MetricExpr": "INST_RETIRED.ANY", - "MetricGroup": "Summary", - "MetricName": "Instructions" - }, - { - "BriefDescription": "Instructions Per Cycle (per physical core)", "MetricExpr": "INST_RETIRED.ANY / cycles", - "MetricGroup": "SMT", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", - "MetricGroup": "SMT", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC_SMT" }, { + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / cycles", "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles", "MetricGroup": "FLOPS", "MetricName": "FLOPc" }, { + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "FLOPS_SMT", "MetricName": "FLOPc_SMT" }, { + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline", + "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", "MetricGroup": "BrMispredicts", "MetricName": "Branch_Misprediction_Cost" }, { - "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", "MetricGroup": "BrMispredicts_SMT", "MetricName": "Branch_Misprediction_Cost_SMT" }, { - "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", "MetricGroup": "BrMispredicts", "MetricName": "IpMispredict" }, { - "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core", "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", + "MetricGroup": "Instruction_Type", + "MetricName": "IpLoad" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", + "MetricGroup": "Instruction_Type", + "MetricName": "IpStore" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricGroup": "Branches;Instruction_Type", + "MetricName": "IpBranch" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTkBranch" + }, + { + "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )", + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricGroup": "FLOPS;FP_Arith;Instruction_Type", + "MetricName": "IpFLOP" + }, + { + "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", + "MetricGroup": "Summary;TopDownL1", + "MetricName": "Instructions" + }, + { + "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Fetch_BW", + "MetricName": "DSB_Coverage" + }, + { "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { - "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization", - "MetricConstraint": "NO_NMI_WATCHDOG" + "MetricName": "Page_Walks_Utilization" }, { + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", "MetricGroup": "TLB_SMT", "MetricName": "Page_Walks_Utilization_SMT" }, { - "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", "MetricGroup": "Memory_BW", "MetricName": "L1D_Cache_Fill_BW" }, { - "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", "MetricGroup": "Memory_BW", "MetricName": "L2_Cache_Fill_BW" }, { - "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricGroup": "Memory_BW", "MetricName": "L3_Cache_Fill_BW" }, { - "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "Memory_BW", + "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { - "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", "MetricGroup": "Cache_Misses", "MetricName": "L1MPKI" }, { - "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", "MetricGroup": "Cache_Misses", "MetricName": "L2MPKI" }, { - "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;Offcore", "MetricName": "L2MPKI_All" }, { - "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", "MetricGroup": "Cache_Misses", "MetricName": "L2HPKI_All" }, { - "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", "MetricGroup": "Cache_Misses", "MetricName": "L3MPKI" }, { - "BriefDescription": "Average CPU Utilization", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", - "MetricGroup": "Summary", + "BriefDescription": "Average CPU Utilization", + "MetricGroup": "HPC;Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time", - "MetricGroup": "FLOPS;Summary", + "MetricGroup": "FLOPS;HPC", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )", "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", - "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", - "MetricGroup": "SMT;Summary", + "MetricGroup": "SMT", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", + "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000", - "MetricGroup": "Memory_BW", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "HPC;Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, { + "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,cmask\\=1@", "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,thresh\\=1@", - "MetricGroup": "Memory_BW", - "MetricName": "DRAM_Parallel_Reads" + "MetricGroup": "Memory_BW;SoC", + "MetricName": "MEM_Parallel_Reads" }, { - "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )", "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )", - "MetricGroup": "", + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", + "MetricGroup": "Branches;OS", "MetricName": "IpFarBranch" }, { - "BriefDescription": "C3 residency percent per core", "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C3 residency percent per core", "MetricGroup": "Power", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C6 residency percent per core", "MetricGroup": "Power", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C7 residency percent per core", "MetricGroup": "Power", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C2 residency percent per package", "MetricGroup": "Power", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C3 residency percent per package", "MetricGroup": "Power", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C6 residency percent per package", "MetricGroup": "Power", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C7 residency percent per package", "MetricGroup": "Power", "MetricName": "C7_Pkg_Residency" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json index 2bcba7daca14d..432530d15c26a 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json @@ -1,284 +1,284 @@ [ { - "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", - "EventCode": "0x08", + "BriefDescription": "Store misses in all DTLB levels that cause page walks", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "SampleAfterValue": "100003", - "BriefDescription": "Load misses in all DTLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "SampleAfterValue": "2000003", - "BriefDescription": "Page walk completed due to a demand data load to a 4K page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", - "EventCode": "0x08", + "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "SampleAfterValue": "2000003", - "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "EventCode": "0x08", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "SampleAfterValue": "2000003", - "BriefDescription": "Page walk completed due to a demand data load to a 1G page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", + "SampleAfterValue": "100003", + "UMask": "0x10" }, { - "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", - "EventCode": "0x08", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", - "EventCode": "0x08", + "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAE", + "EventName": "ITLB.ITLB_FLUSH", + "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", + "SampleAfterValue": "100007", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", - "EventCode": "0x08", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", - "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_ACTIVE", + "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "100003", + "UMask": "0x10" }, { - "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", - "EventCode": "0x08", + "BriefDescription": "Loads that miss the DTLB and hit the STLB.", "Counter": "0,1,2,3", - "UMask": "0x20", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", "SampleAfterValue": "2000003", - "BriefDescription": "Loads that miss the DTLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "DTLB flush attempts of the thread-specific entries", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", + "EventName": "TLB_FLUSH.DTLB_THREAD", + "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", + "SampleAfterValue": "100007", + "UMask": "0x1" }, { - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "EventCode": "0x49", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "SampleAfterValue": "100003", - "BriefDescription": "Page walk completed due to a demand data store to a 4K page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", - "EventCode": "0x49", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", "SampleAfterValue": "100003", - "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", - "EventCode": "0x49", + "BriefDescription": "Misses at all ITLB levels that cause page walks", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", "SampleAfterValue": "100003", - "BriefDescription": "Page walk completed due to a demand data store to a 1G page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", - "EventCode": "0x49", + "BriefDescription": "Stores that miss the DTLB and hit the STLB.", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", - "EventCode": "0x49", + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" }, { - "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", - "EventCode": "0x49", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", - "EventCode": "0x49", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "BriefDescription": "Stores that miss the DTLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", - "EventCode": "0x4F", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "EPT.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" }, { - "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", - "EventCode": "0x85", + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Misses at all ITLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.STLB_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Load misses in all DTLB levels that cause page walks", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4f", + "EventName": "EPT.WALK_PENDING", + "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", - "EventCode": "0x85", + "BriefDescription": "STLB flush attempts", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "ITLB_MISSES.WALK_PENDING", - "SampleAfterValue": "100003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", + "EventName": "TLB_FLUSH.STLB_ANY", + "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", + "SampleAfterValue": "100007", + "UMask": "0x20" }, { - "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", - "EventCode": "0x85", + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "ITLB_MISSES.WALK_ACTIVE", - "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "EventCode": "0x85", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ITLB_MISSES.STLB_HIT", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", "SampleAfterValue": "100003", - "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", - "EventCode": "0xAE", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB.ITLB_FLUSH", - "SampleAfterValue": "100007", - "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", - "EventCode": "0xBD", + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TLB_FLUSH.DTLB_THREAD", - "SampleAfterValue": "100007", - "BriefDescription": "DTLB flush attempts of the thread-specific entries", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" }, { - "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", - "EventCode": "0xBD", + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TLB_FLUSH.STLB_ANY", - "SampleAfterValue": "100007", - "BriefDescription": "STLB flush attempts", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x8" } ] \ No newline at end of file -- GitLab From 29396cd573da08ae9ab0b75925c2f6b3cabb9dfa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 26 Aug 2020 08:30:55 -0700 Subject: [PATCH 0206/1894] perf expr: Force encapsulation on expr_id_data This patch resolves some undefined behavior where variables in expr_id_data were accessed (for debugging) without being defined. To better enforce the tagged union behavior, the struct is moved into expr.c and accessors provided. Tag values (kinds) are explicitly identified. Signed-off-by: Ian Rogers Reviewed-By: Kajol Jain Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20200826153055.2067780-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 68 ++++++++++++++++++++++++++++++----- tools/perf/util/expr.h | 17 +++------ tools/perf/util/expr.y | 2 +- tools/perf/util/metricgroup.c | 4 +-- 4 files changed, 66 insertions(+), 25 deletions(-) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 53482ef53c411..a850fd0be3ee2 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -17,6 +17,29 @@ extern int expr_debug; #endif +struct expr_id_data { + union { + double val; + struct { + double val; + const char *metric_name; + const char *metric_expr; + } ref; + struct expr_id *parent; + }; + + enum { + /* Holding a double value. */ + EXPR_ID_DATA__VALUE, + /* Reference to another metric. */ + EXPR_ID_DATA__REF, + /* A reference but the value has been computed. */ + EXPR_ID_DATA__REF_VALUE, + /* A parent is remembered for the recursion check. */ + EXPR_ID_DATA__PARENT, + } kind; +}; + static size_t key_hash(const void *key, void *ctx __maybe_unused) { const char *str = (const char *)key; @@ -48,6 +71,7 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) return -ENOMEM; data_ptr->parent = ctx->parent; + data_ptr->kind = EXPR_ID_DATA__PARENT; ret = hashmap__set(&ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); @@ -69,7 +93,7 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) if (!data_ptr) return -ENOMEM; data_ptr->val = val; - data_ptr->is_ref = false; + data_ptr->kind = EXPR_ID_DATA__VALUE; ret = hashmap__set(&ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); @@ -114,8 +138,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) */ data_ptr->ref.metric_name = ref->metric_name; data_ptr->ref.metric_expr = ref->metric_expr; - data_ptr->ref.counted = false; - data_ptr->is_ref = true; + data_ptr->kind = EXPR_ID_DATA__REF; ret = hashmap__set(&ctx->ids, name, data_ptr, (const void **)&old_key, (void **)&old_data); @@ -148,17 +171,30 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, data = *datap; - pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n", - data->is_ref, data->ref.counted, data->val, id); - - if (data->is_ref && !data->ref.counted) { - data->ref.counted = true; + switch (data->kind) { + case EXPR_ID_DATA__VALUE: + pr_debug2("lookup(%s): val %f\n", id, data->val); + break; + case EXPR_ID_DATA__PARENT: + pr_debug2("lookup(%s): parent %s\n", id, data->parent->id); + break; + case EXPR_ID_DATA__REF: + pr_debug2("lookup(%s): ref metric name %s\n", id, + data->ref.metric_name); pr_debug("processing metric: %s ENTRY\n", id); - if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) { + data->kind = EXPR_ID_DATA__REF_VALUE; + if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) { pr_debug("%s failed to count\n", id); return -1; } pr_debug("processing metric: %s EXIT: %f\n", id, data->val); + break; + case EXPR_ID_DATA__REF_VALUE: + pr_debug2("lookup(%s): ref val %f metric name %s\n", id, + data->ref.val, data->ref.metric_name); + break; + default: + assert(0); /* Unreachable. */ } return 0; @@ -241,3 +277,17 @@ int expr__find_other(const char *expr, const char *one, return ret; } + +double expr_id_data__value(const struct expr_id_data *data) +{ + if (data->kind == EXPR_ID_DATA__VALUE) + return data->val; + assert(data->kind == EXPR_ID_DATA__REF_VALUE); + return data->ref.val; +} + +struct expr_id *expr_id_data__parent(struct expr_id_data *data) +{ + assert(data->kind == EXPR_ID_DATA__PARENT); + return data->parent; +} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index fc2b5e824a663..dcf8d19b83c85 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -23,19 +23,7 @@ struct expr_parse_ctx { struct expr_id *parent; }; -struct expr_id_data { - union { - double val; - struct { - const char *metric_name; - const char *metric_expr; - bool counted; - } ref; - struct expr_id *parent; - }; - - bool is_ref; -}; +struct expr_id_data; struct expr_scanner_ctx { int start_token; @@ -57,4 +45,7 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx, int expr__find_other(const char *expr, const char *one, struct expr_parse_ctx *ids, int runtime); +double expr_id_data__value(const struct expr_id_data *data); +struct expr_id *expr_id_data__parent(struct expr_id_data *data); + #endif diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index d34b370391c67..b2ada8f8309a9 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -93,7 +93,7 @@ expr: NUMBER YYABORT; } - $$ = data->val; + $$ = expr_id_data__value(data); free($1); } | expr '|' expr { $$ = (long)$1 | (long)$3; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 060454a172935..81d201c8b833d 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -833,7 +833,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa if (ret) return ret; - p = data->parent; + p = expr_id_data__parent(data); while (p->parent) { if (!strcmp(p->id, id)) { @@ -854,7 +854,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa } p->id = strdup(id); - p->parent = data->parent; + p->parent = expr_id_data__parent(data); *parent = p; return p->id ? 0 : -ENOMEM; -- GitLab From 08d3e27718bd45ea3284b1b99a2082a233b8667c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:36 +0100 Subject: [PATCH 0207/1894] KVM: selftests: Make test skipping consistent Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-12-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/kvm_pv_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/user_msr_test.c | 6 +++++- .../kvm/x86_64/vmx_preemption_timer_test.c | 14 +++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index b10a27485bad9..732b244d69564 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -211,8 +211,8 @@ int main(void) struct kvm_vm *vm; if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { - pr_info("will skip kvm paravirt restriction tests.\n"); - return 0; + print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported"); + exit(KSFT_SKIP); } vm = vm_create_default(VCPU_ID, 0, guest_main); diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c index cbe1b08890ff9..f2a667ca49c1f 100644 --- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c @@ -209,7 +209,11 @@ int main(int argc, char *argv[]) run = vcpu_state(vm, VCPU_ID); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + if (!rc) { + print_skip("KVM_CAP_X86_USER_SPACE_MSR not supported"); + exit(KSFT_SKIP); + } + vm_enable_cap(vm, &cap); rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index a7737af1224fe..25b783070d214 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -169,6 +169,11 @@ int main(int argc, char *argv[]) */ nested_vmx_check_supported(); + if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) { + print_skip("KVM_CAP_NESTED_STATE not supported"); + exit(KSFT_SKIP); + } + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -176,13 +181,8 @@ int main(int argc, char *argv[]) vcpu_regs_get(vm, VCPU_ID, ®s1); - if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); - } else { - pr_info("will skip vmx preemption timer checks\n"); - goto done; - } + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); -- GitLab From 22f232d134e142022f5e4cf2de4587a34d5b7d65 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:35 +0100 Subject: [PATCH 0208/1894] KVM: selftests: x86: Set supported CPUIDs on default VM Almost all tests do this anyway and the ones that don't don't appear to care. Only vmx_set_nested_state_test assumes that a feature (VMX) is disabled until later setting the supported CPUIDs. It's better to disable that explicitly anyway. Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-11-drjones@redhat.com> [Restore CPUID_VMX, or vmx_set_nested_state breaks. - Paolo] Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 3 --- .../selftests/kvm/include/perf_test_util.h | 4 ---- tools/testing/selftests/kvm/lib/kvm_util.c | 11 ++++++++-- .../selftests/kvm/set_memory_region_test.c | 2 -- .../kvm/x86_64/cr4_cpuid_sync_test.c | 1 - .../testing/selftests/kvm/x86_64/debug_regs.c | 1 - .../testing/selftests/kvm/x86_64/evmcs_test.c | 2 -- tools/testing/selftests/kvm/x86_64/smm_test.c | 2 -- .../testing/selftests/kvm/x86_64/state_test.c | 1 - .../selftests/kvm/x86_64/svm_vmcall_test.c | 1 - .../selftests/kvm/x86_64/tsc_msrs_test.c | 1 - .../selftests/kvm/x86_64/user_msr_test.c | 1 - .../kvm/x86_64/vmx_apic_access_test.c | 1 - .../kvm/x86_64/vmx_close_while_nested_test.c | 1 - .../selftests/kvm/x86_64/vmx_dirty_log_test.c | 1 - .../kvm/x86_64/vmx_preemption_timer_test.c | 1 - .../kvm/x86_64/vmx_set_nested_state_test.c | 21 +++++++++++++++++++ .../kvm/x86_64/vmx_tsc_adjust_test.c | 1 - 18 files changed, 30 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b1f8731721b96..471baecb7772a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -740,9 +740,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); -#ifdef __x86_64__ - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); -#endif ucall_init(vm, NULL); /* Export the shared variables to the guest */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 2618052057b19..239421e4f6b81 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -179,10 +179,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) vm_vcpu_add_default(vm, vcpu_id, guest_code); -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - vcpu_args->vcpu_id = vcpu_id; vcpu_args->gva = guest_test_virt_mem + (vcpu_id * vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index df2035ac54a63..b2c426adb87f8 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -311,8 +311,15 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, vm_create_irqchip(vm); #endif - for (i = 0; i < nr_vcpus; ++i) - vm_vcpu_add_default(vm, vcpuids ? vcpuids[i] : i, guest_code); + for (i = 0; i < nr_vcpus; ++i) { + uint32_t vcpuid = vcpuids ? vcpuids[i] : i; + + vm_vcpu_add_default(vm, vcpuid, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); +#endif + } return vm; } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index b3ece55a2da69..5fa5823661da4 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -121,8 +121,6 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code) vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP, MEM_REGION_GPA, MEM_REGION_SLOT, MEM_REGION_SIZE / getpagesize(), 0); diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 140e91901582b..f40fd097cb359 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -81,7 +81,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); while (1) { diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 2fc6b3af81a11..6097a8283377f 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -85,7 +85,6 @@ int main(void) } vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); /* Test software BPs - int3 */ diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 757928199f19a..37b8a78f6b74a 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -92,8 +92,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - if (!nested_vmx_supported() || !kvm_check_cap(KVM_CAP_NESTED_STATE) || !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index ae39a220609f5..613c42c5a9b8d 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -102,8 +102,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - run = vcpu_state(vm, VCPU_ID); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index f6c8b9042f8ab..32854c1462ad2 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -165,7 +165,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); vcpu_regs_get(vm, VCPU_ID, ®s1); diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c index 0e1adb4e3199f..be2ca157485b6 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c @@ -44,7 +44,6 @@ int main(int argc, char *argv[]) nested_svm_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_alloc_svm(vm, &svm_gva); vcpu_args_set(vm, VCPU_ID, 1, svm_gva); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index f8e761149daac..e357d8e222d47 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -107,7 +107,6 @@ int main(void) uint64_t val; vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); val = 0; ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c index f2a667ca49c1f..fe88b98908ee1 100644 --- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c @@ -205,7 +205,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c index 1f65342d6cb72..d14888b34adb3 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -87,7 +87,6 @@ int main(int argc, char *argv[]) nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); kvm_get_cpu_address_width(&paddr_width, &vaddr_width); high_gpa = (1ul << paddr_width) - getpagesize(); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index fe40ade06a490..2835a17f1b7ad 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -57,7 +57,6 @@ int main(int argc, char *argv[]) nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ vcpu_alloc_vmx(vm, &vmx_pages_gva); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index e894a638a155c..537de10685544 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -82,7 +82,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); run = vcpu_state(vm, VCPU_ID); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index 25b783070d214..a07480aed397b 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -176,7 +176,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); vcpu_regs_get(vm, VCPU_ID, ®s1); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index d59f3eb67c8f5..5827b9bae4682 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -244,6 +244,22 @@ void test_vmx_nested_state(struct kvm_vm *vm) free(state); } +void disable_vmx(struct kvm_vm *vm) +{ + struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid(); + int i; + + for (i = 0; i < cpuid->nent; ++i) + if (cpuid->entries[i].function == 1 && + cpuid->entries[i].index == 0) + break; + TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found"); + + cpuid->entries[i].ecx &= ~CPUID_VMX; + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + cpuid->entries[i].ecx |= CPUID_VMX; +} + int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -264,6 +280,11 @@ int main(int argc, char *argv[]) vm = vm_create_default(VCPU_ID, 0, 0); + /* + * First run tests with VMX disabled to check error handling. + */ + disable_vmx(vm); + /* Passing a NULL kvm_nested_state causes a EFAULT. */ test_nested_state_expect_efault(vm, NULL); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index fbe8417cbc2c3..7e33a350b053a 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -132,7 +132,6 @@ int main(int argc, char *argv[]) nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ vcpu_alloc_vmx(vm, &vmx_pages_gva); -- GitLab From 789f52c071a0fdaa15ed119912fedd840458e25f Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 6 Nov 2020 16:39:23 +0800 Subject: [PATCH 0209/1894] x86/kvm: remove unused macro HV_CLOCK_SIZE This macro is useless, and could cause gcc warning: arch/x86/kernel/kvmclock.c:47:0: warning: macro "HV_CLOCK_SIZE" is not used [-Wunused-macros] Let's remove it. Signed-off-by: Alex Shi Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Vitaly Kuznetsov Cc: Wanpeng Li Cc: Jim Mattson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: <1604651963-10067-1-git-send-email-alex.shi@linux.alibaba.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 34b18f6eeb2ce..aa593743acf67 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -44,7 +44,6 @@ static int __init parse_no_kvmclock_vsyscall(char *arg) early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */ -#define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS) #define HVC_BOOT_ARRAY_SIZE \ (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) -- GitLab From 7e8e6eed75e290526d5c98d023e88b141e2c93ec Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Sun, 11 Oct 2020 14:48:17 -0400 Subject: [PATCH 0210/1894] KVM: SVM: Move asid to vcpu_svm KVM does not have separate ASIDs for L1 and L2; either the nested hypervisor and nested guests share a single ASID, or on older processor the ASID is used only to implement TLB flushing. Either way, ASIDs are handled at the VM level. In preparation for having different VMCBs passed to VMLOAD/VMRUN/VMSAVE for L1 and L2, store the current ASID to struct vcpu_svm and only move it to the VMCB in svm_vcpu_run. This way, TLB flushes can be applied no matter which VMCB will be active during the next svm_vcpu_run. Signed-off-by: Cathy Avery Message-Id: <20201011184818.3609-2-cavery@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 10 +++++++--- arch/x86/kvm/svm/svm.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 253809216cc73..3b53a7ead04bb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1215,6 +1215,7 @@ static void init_vmcb(struct vcpu_svm *svm) save->cr4 = 0; } svm->asid_generation = 0; + svm->asid = 0; svm->nested.vmcb12_gpa = 0; svm->vcpu.arch.hflags = 0; @@ -1755,12 +1756,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) ++sd->asid_generation; sd->next_asid = sd->min_asid; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; + vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } svm->asid_generation = sd->asid_generation; - svm->vmcb->control.asid = sd->next_asid++; - - vmcb_mark_dirty(svm->vmcb, VMCB_ASID); + svm->asid = sd->next_asid++; } static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) @@ -3570,6 +3570,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) sync_lapic_to_cr8(vcpu); + if (unlikely(svm->asid != svm->vmcb->control.asid)) { + svm->vmcb->control.asid = svm->asid; + vmcb_mark_dirty(svm->vmcb, VMCB_ASID); + } svm->vmcb->save.cr2 = vcpu->arch.cr2; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index eb6fbafbe36c2..fdff76eb6ceb7 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -106,6 +106,7 @@ struct vcpu_svm { struct vmcb *vmcb; unsigned long vmcb_pa; struct svm_cpu_data *svm_data; + u32 asid; uint64_t asid_generation; uint64_t sysenter_esp; uint64_t sysenter_eip; -- GitLab From dc924b062488a0376aae41d3e0a27dc99f852a5e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 15 Nov 2020 09:44:18 -0500 Subject: [PATCH 0211/1894] KVM: SVM: check CR4 changes against vcpu->arch Similarly to what vmx/vmx.c does, use vcpu->arch.cr4 to check if CR4 bits PGE, PKE and OSXSAVE have changed. When switching between VMCB01 and VMCB02, CPUID has to be adjusted every time if CR4.PKE or CR4.OSXSAVE change; without this patch, instead, CR4 would be checked against the previous value for L2 on vmentry, and against the previous value for L1 on vmexit, and CPUID would not be updated. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3b53a7ead04bb..6dc337b9c2319 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1691,7 +1691,7 @@ static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; - unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; + unsigned long old_cr4 = vcpu->arch.cr4; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) svm_flush_tlb(vcpu); -- GitLab From 8d39cee0592e0129280e5a3cc480d64649c5e63f Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Fri, 30 Oct 2020 14:08:40 +0800 Subject: [PATCH 0212/1894] arm64/ima: add ima_arch support Add arm64 IMA arch support. The code and arch policy is mainly inherited from x86. Co-developed-by: Chester Lin Signed-off-by: Chester Lin Acked-by: Mimi Zohar Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f858c352f72a4..04e78a367c2c1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1849,6 +1849,7 @@ config EFI select EFI_RUNTIME_WRAPPERS select EFI_STUB select EFI_GENERIC_STUB + imply IMA_SECURE_AND_OR_TRUSTED_BOOT default y help This option provides support for runtime services provided -- GitLab From b283477d394ac41ca59ee20eb9293ae9002eb1d7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Nov 2020 07:50:04 +0100 Subject: [PATCH 0213/1894] efi: x86/xen: switch to efi_get_secureboot_mode helper Now that we have a static inline helper to discover the platform's secure boot mode that can be shared between the EFI stub and the kernel proper, switch to it, and drop some comments about keeping them in sync manually. Signed-off-by: Ard Biesheuvel --- arch/x86/xen/efi.c | 37 ++++++----------------- drivers/firmware/efi/libstub/secureboot.c | 3 -- 2 files changed, 9 insertions(+), 31 deletions(-) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 205a9bc981b0c..7d7ffb9c826a3 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -93,37 +93,22 @@ static efi_system_table_t __init *xen_efi_probe(void) /* * Determine whether we're in secure boot mode. - * - * Please keep the logic in sync with - * drivers/firmware/efi/libstub/secureboot.c:efi_get_secureboot(). */ static enum efi_secureboot_mode xen_efi_get_secureboot(void) { - static efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; static efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID; + enum efi_secureboot_mode mode; efi_status_t status; - u8 moksbstate, secboot, setupmode; + u8 moksbstate; unsigned long size; - size = sizeof(secboot); - status = efi.get_variable(L"SecureBoot", &efi_variable_guid, - NULL, &size, &secboot); - - if (status == EFI_NOT_FOUND) - return efi_secureboot_mode_disabled; - - if (status != EFI_SUCCESS) - goto out_efi_err; - - size = sizeof(setupmode); - status = efi.get_variable(L"SetupMode", &efi_variable_guid, - NULL, &size, &setupmode); - - if (status != EFI_SUCCESS) - goto out_efi_err; - - if (secboot == 0 || setupmode == 1) - return efi_secureboot_mode_disabled; + mode = efi_get_secureboot_mode(efi.get_variable); + if (mode == efi_secureboot_mode_unknown) { + pr_err("Could not determine UEFI Secure Boot status.\n"); + return efi_secureboot_mode_unknown; + } + if (mode != efi_secureboot_mode_enabled) + return mode; /* See if a user has put the shim into insecure mode. */ size = sizeof(moksbstate); @@ -140,10 +125,6 @@ static enum efi_secureboot_mode xen_efi_get_secureboot(void) secure_boot_enabled: pr_info("UEFI Secure Boot is enabled.\n"); return efi_secureboot_mode_enabled; - - out_efi_err: - pr_err("Could not determine UEFI Secure Boot status.\n"); - return efi_secureboot_mode_unknown; } void __init xen_efi_init(struct boot_params *boot_params) diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index af18d86c16041..8a18930f3eb69 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -24,9 +24,6 @@ static efi_status_t get_var(efi_char16_t *name, efi_guid_t *vendor, u32 *attr, /* * Determine whether we're in secure boot mode. - * - * Please keep the logic in sync with - * arch/x86/xen/efi.c:xen_efi_get_secureboot(). */ enum efi_secureboot_mode efi_get_secureboot(void) { -- GitLab From f6a46f8b302d9bfcf347577cbf1dd22f19dfe555 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 9 Nov 2020 00:20:00 +0100 Subject: [PATCH 0214/1894] rtc: at91rm9200: add correction support The sama5d4 and sama5d2 RTCs are able to correct for imprecise crystals, up to 1953 ppm. Signed-off-by: Alexandre Belloni Reviewed-by: Nicolas Ferre Link: https://lore.kernel.org/r/20201108232001.1580128-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-at91rm9200.c | 103 +++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 5e811e04cb215..1eea187d9850a 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -36,6 +36,10 @@ #define AT91_RTC_UPDCAL BIT(1) /* Update Request Calendar Register */ #define AT91_RTC_MR 0x04 /* Mode Register */ +#define AT91_RTC_HRMOD BIT(0) /* 12/24 hour mode */ +#define AT91_RTC_NEGPPM BIT(4) /* Negative PPM correction */ +#define AT91_RTC_CORRECTION GENMASK(14, 8) /* Slow clock correction */ +#define AT91_RTC_HIGHPPM BIT(15) /* High PPM correction */ #define AT91_RTC_TIMR 0x08 /* Time Register */ #define AT91_RTC_SEC GENMASK(6, 0) /* Current Second */ @@ -77,6 +81,9 @@ #define AT91_RTC_NVTIMALR BIT(2) /* Non valid Time Alarm */ #define AT91_RTC_NVCALALR BIT(3) /* Non valid Calendar Alarm */ +#define AT91_RTC_CORR_DIVIDEND 3906000 +#define AT91_RTC_CORR_LOW_RATIO 20 + #define at91_rtc_read(field) \ readl_relaxed(at91_rtc_regs + field) #define at91_rtc_write(field, val) \ @@ -84,6 +91,7 @@ struct at91_rtc_config { bool use_shadow_imr; + bool has_correction; }; static const struct at91_rtc_config *at91_rtc_config; @@ -293,6 +301,75 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) return 0; } +static int at91_rtc_readoffset(struct device *dev, long *offset) +{ + u32 mr = at91_rtc_read(AT91_RTC_MR); + long val = FIELD_GET(AT91_RTC_CORRECTION, mr); + + if (!val) { + *offset = 0; + return 0; + } + + val++; + + if (!(mr & AT91_RTC_NEGPPM)) + val = -val; + + if (!(mr & AT91_RTC_HIGHPPM)) + val *= AT91_RTC_CORR_LOW_RATIO; + + *offset = DIV_ROUND_CLOSEST(AT91_RTC_CORR_DIVIDEND, val); + + return 0; +} + +static int at91_rtc_setoffset(struct device *dev, long offset) +{ + long corr; + u32 mr; + + if (offset > AT91_RTC_CORR_DIVIDEND / 2) + return -ERANGE; + if (offset < -AT91_RTC_CORR_DIVIDEND / 2) + return -ERANGE; + + mr = at91_rtc_read(AT91_RTC_MR); + mr &= ~(AT91_RTC_NEGPPM | AT91_RTC_CORRECTION | AT91_RTC_HIGHPPM); + + if (offset > 0) + mr |= AT91_RTC_NEGPPM; + else + offset = -offset; + + /* offset less than 764 ppb, disable correction*/ + if (offset < 764) { + at91_rtc_write(AT91_RTC_MR, mr & ~AT91_RTC_NEGPPM); + + return 0; + } + + /* + * 29208 ppb is the perfect cutoff between low range and high range + * low range values are never better than high range value after that. + */ + if (offset < 29208) { + corr = DIV_ROUND_CLOSEST(AT91_RTC_CORR_DIVIDEND, offset * AT91_RTC_CORR_LOW_RATIO); + } else { + corr = DIV_ROUND_CLOSEST(AT91_RTC_CORR_DIVIDEND, offset); + mr |= AT91_RTC_HIGHPPM; + } + + if (corr > 128) + corr = 128; + + mr |= FIELD_PREP(AT91_RTC_CORRECTION, corr - 1); + + at91_rtc_write(AT91_RTC_MR, mr); + + return 0; +} + /* * IRQ handler for the RTC */ @@ -343,6 +420,10 @@ static const struct at91_rtc_config at91sam9x5_config = { .use_shadow_imr = true, }; +static const struct at91_rtc_config sama5d4_config = { + .has_correction = true, +}; + static const struct of_device_id at91_rtc_dt_ids[] = { { .compatible = "atmel,at91rm9200-rtc", @@ -352,10 +433,10 @@ static const struct of_device_id at91_rtc_dt_ids[] = { .data = &at91sam9x5_config, }, { .compatible = "atmel,sama5d4-rtc", - .data = &at91rm9200_config, + .data = &sama5d4_config, }, { .compatible = "atmel,sama5d2-rtc", - .data = &at91rm9200_config, + .data = &sama5d4_config, }, { /* sentinel */ } @@ -370,6 +451,16 @@ static const struct rtc_class_ops at91_rtc_ops = { .alarm_irq_enable = at91_rtc_alarm_irq_enable, }; +static const struct rtc_class_ops sama5d4_rtc_ops = { + .read_time = at91_rtc_readtime, + .set_time = at91_rtc_settime, + .read_alarm = at91_rtc_readalarm, + .set_alarm = at91_rtc_setalarm, + .alarm_irq_enable = at91_rtc_alarm_irq_enable, + .set_offset = at91_rtc_setoffset, + .read_offset = at91_rtc_readoffset, +}; + /* * Initialize and install RTC driver */ @@ -416,7 +507,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev) } at91_rtc_write(AT91_RTC_CR, 0); - at91_rtc_write(AT91_RTC_MR, 0); /* 24 hour mode */ + at91_rtc_write(AT91_RTC_MR, at91_rtc_read(AT91_RTC_MR) & ~AT91_RTC_HRMOD); /* Disable all interrupts */ at91_rtc_write_idr(AT91_RTC_ACKUPD | AT91_RTC_ALARM | @@ -437,7 +528,11 @@ static int __init at91_rtc_probe(struct platform_device *pdev) if (!device_can_wakeup(&pdev->dev)) device_init_wakeup(&pdev->dev, 1); - rtc->ops = &at91_rtc_ops; + if (at91_rtc_config->has_correction) + rtc->ops = &sama5d4_rtc_ops; + else + rtc->ops = &at91_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_1900; rtc->range_max = RTC_TIMESTAMP_END_2099; ret = rtc_register_device(rtc); -- GitLab From bfca1c924d97696303491ddae0458861653d3b88 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 17 Nov 2020 14:39:20 +0100 Subject: [PATCH 0215/1894] rtc: at91rm9200: Add sam9x60 compatible Handle the sam9x60 RTC. While it can work with the at91sam9x5 fallback, it has crystal correction support and doesn't need to shadow IMR. Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20201117133920.1229679-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-at91rm9200.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 1eea187d9850a..da24e68adccae 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -437,6 +437,9 @@ static const struct of_device_id at91_rtc_dt_ids[] = { }, { .compatible = "atmel,sama5d2-rtc", .data = &sama5d4_config, + }, { + .compatible = "microchip,sam9x60-rtc", + .data = &sama5d4_config, }, { /* sentinel */ } -- GitLab From a31111189bb1160f84cf4cf9f910aa2ba7553d18 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 16 Nov 2020 16:28:57 +0200 Subject: [PATCH 0216/1894] rtc: ds1307: Remove non-valid ACPI IDs The commit 9c19b8930d2c ("rtc: ds1307: Add ACPI support") added invalid ACPI IDs (all of them are abusing ACPI specification). Moreover there is not even a single evidence that vendor registered any of such devices. Remove broken ACPI IDs from the driver. For prototyping one may use PRP0001 with device properties adhering to a DT binding. The following patches will add support of that to the driver. Signed-off-by: Andy Shevchenko Signed-off-by: Alexandre Belloni Reviewed-by: Rafael J. Wysocki Cc: Tin Huynh Link: https://uefi.org/PNP_ACPI_Registry Link: https://lore.kernel.org/r/20201116142859.31257-1-andriy.shevchenko@linux.intel.com --- drivers/rtc/rtc-ds1307.c | 36 +----------------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 9f5f54ca039d0..fcb8e281abd51 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -8,7 +8,6 @@ * Copyright (C) 2012 Bertrand Achard (nvram access fixes) */ -#include #include #include #include @@ -1169,31 +1168,6 @@ static const struct of_device_id ds1307_of_match[] = { MODULE_DEVICE_TABLE(of, ds1307_of_match); #endif -#ifdef CONFIG_ACPI -static const struct acpi_device_id ds1307_acpi_ids[] = { - { .id = "DS1307", .driver_data = ds_1307 }, - { .id = "DS1308", .driver_data = ds_1308 }, - { .id = "DS1337", .driver_data = ds_1337 }, - { .id = "DS1338", .driver_data = ds_1338 }, - { .id = "DS1339", .driver_data = ds_1339 }, - { .id = "DS1388", .driver_data = ds_1388 }, - { .id = "DS1340", .driver_data = ds_1340 }, - { .id = "DS1341", .driver_data = ds_1341 }, - { .id = "DS3231", .driver_data = ds_3231 }, - { .id = "M41T0", .driver_data = m41t0 }, - { .id = "M41T00", .driver_data = m41t00 }, - { .id = "M41T11", .driver_data = m41t11 }, - { .id = "MCP7940X", .driver_data = mcp794xx }, - { .id = "MCP7941X", .driver_data = mcp794xx }, - { .id = "PT7C4338", .driver_data = ds_1307 }, - { .id = "RX8025", .driver_data = rx_8025 }, - { .id = "ISL12057", .driver_data = ds_1337 }, - { .id = "RX8130", .driver_data = rx_8130 }, - { } -}; -MODULE_DEVICE_TABLE(acpi, ds1307_acpi_ids); -#endif - /* * The ds1337 and ds1339 both have two alarms, but we only use the first * one (with a "seconds" field). For ds1337 we expect nINTA is our alarm @@ -1794,14 +1768,7 @@ static int ds1307_probe(struct i2c_client *client, chip = &chips[id->driver_data]; ds1307->type = id->driver_data; } else { - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(ACPI_PTR(ds1307_acpi_ids), - ds1307->dev); - if (!acpi_id) - return -ENODEV; - chip = &chips[acpi_id->driver_data]; - ds1307->type = acpi_id->driver_data; + return -ENODEV; } want_irq = client->irq > 0 && chip->alarm; @@ -2065,7 +2032,6 @@ static struct i2c_driver ds1307_driver = { .driver = { .name = "rtc-ds1307", .of_match_table = of_match_ptr(ds1307_of_match), - .acpi_match_table = ACPI_PTR(ds1307_acpi_ids), }, .probe = ds1307_probe, .id_table = ds1307_id, -- GitLab From 227ec129ad7b035ee2ae2e57e9567a8126ad93f3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 16 Nov 2020 16:28:58 +0200 Subject: [PATCH 0217/1894] rtc: ds1307: Make use of device properties Device property API allows to gather device resources from different sources, such as ACPI. Convert the drivers to unleash the power of device property API. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201116142859.31257-2-andriy.shevchenko@linux.intel.com --- drivers/rtc/rtc-ds1307.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index fcb8e281abd51..49260bc260e39 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -12,7 +12,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -30,6 +31,7 @@ * That's a natural job for a factory or repair bench. */ enum ds_type { + unknown_ds_type, /* always first and 0 */ ds_1307, ds_1308, ds_1337, @@ -1600,13 +1602,16 @@ static const struct clk_ops ds3231_clk_32khz_ops = { .recalc_rate = ds3231_clk_32khz_recalc_rate, }; +static const char *ds3231_clks_names[] = { + [DS3231_CLK_SQW] = "ds3231_clk_sqw", + [DS3231_CLK_32KHZ] = "ds3231_clk_32khz", +}; + static struct clk_init_data ds3231_clks_init[] = { [DS3231_CLK_SQW] = { - .name = "ds3231_clk_sqw", .ops = &ds3231_clk_sqw_ops, }, [DS3231_CLK_32KHZ] = { - .name = "ds3231_clk_32khz", .ops = &ds3231_clk_32khz_ops, }, }; @@ -1627,6 +1632,11 @@ static int ds3231_clks_register(struct ds1307 *ds1307) if (!onecell->clks) return -ENOMEM; + /* optional override of the clockname */ + device_property_read_string_array(ds1307->dev, "clock-output-names", + ds3231_clks_names, + ARRAY_SIZE(ds3231_clks_names)); + for (i = 0; i < ARRAY_SIZE(ds3231_clks_init); i++) { struct clk_init_data init = ds3231_clks_init[i]; @@ -1637,9 +1647,7 @@ static int ds3231_clks_register(struct ds1307 *ds1307) if (i == DS3231_CLK_SQW && test_bit(HAS_ALARM, &ds1307->flags)) continue; - /* optional override of the clockname */ - of_property_read_string_index(node, "clock-output-names", i, - &init.name); + init.name = ds3231_clks_names[i]; ds1307->clks[i].init = &init; onecell->clks[i] = devm_clk_register(ds1307->dev, @@ -1648,10 +1656,8 @@ static int ds3231_clks_register(struct ds1307 *ds1307) return PTR_ERR(onecell->clks[i]); } - if (!node) - return 0; - - of_clk_add_provider(node, of_clk_src_onecell_get, onecell); + if (node) + of_clk_add_provider(node, of_clk_src_onecell_get, onecell); return 0; } @@ -1735,6 +1741,7 @@ static int ds1307_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct ds1307 *ds1307; + const void *match; int err = -ENODEV; int tmp; const struct chip_desc *chip; @@ -1760,9 +1767,9 @@ static int ds1307_probe(struct i2c_client *client, i2c_set_clientdata(client, ds1307); - if (client->dev.of_node) { - ds1307->type = (enum ds_type) - of_device_get_match_data(&client->dev); + match = device_get_match_data(&client->dev); + if (match) { + ds1307->type = (enum ds_type)match; chip = &chips[ds1307->type]; } else if (id) { chip = &chips[id->driver_data]; @@ -1786,7 +1793,6 @@ static int ds1307_probe(struct i2c_client *client, trickle_charger_setup); } -#ifdef CONFIG_OF /* * For devices with no IRQ directly connected to the SoC, the RTC chip * can be forced as a wakeup source by stating that explicitly in @@ -1795,10 +1801,8 @@ static int ds1307_probe(struct i2c_client *client, * This will guarantee the 'wakealarm' sysfs entry is available on the device, * if supported by the RTC. */ - if (chip->alarm && of_property_read_bool(client->dev.of_node, - "wakeup-source")) + if (chip->alarm && device_property_read_bool(&client->dev, "wakeup-source")) ds1307_can_wakeup_device = true; -#endif switch (ds1307->type) { case ds_1337: -- GitLab From 698fffc2705cc48804cc31021cdb2ae4290927be Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 16 Nov 2020 16:28:59 +0200 Subject: [PATCH 0218/1894] rtc: ds1307: Drop of_match_ptr and CONFIG_OF protections These prevent use of this driver with ACPI via PRP0001. Drop them to remove this restriction. Also added mod_devicetable.h include given use of struct of_device_id. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201116142859.31257-3-andriy.shevchenko@linux.intel.com --- drivers/rtc/rtc-ds1307.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 49260bc260e39..fdf25db1b1b3a 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -11,8 +11,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -1091,7 +1091,6 @@ static const struct i2c_device_id ds1307_id[] = { }; MODULE_DEVICE_TABLE(i2c, ds1307_id); -#ifdef CONFIG_OF static const struct of_device_id ds1307_of_match[] = { { .compatible = "dallas,ds1307", @@ -1168,7 +1167,6 @@ static const struct of_device_id ds1307_of_match[] = { { } }; MODULE_DEVICE_TABLE(of, ds1307_of_match); -#endif /* * The ds1337 and ds1339 both have two alarms, but we only use the first @@ -2035,7 +2033,7 @@ exit: static struct i2c_driver ds1307_driver = { .driver = { .name = "rtc-ds1307", - .of_match_table = of_match_ptr(ds1307_of_match), + .of_match_table = ds1307_of_match, }, .probe = ds1307_probe, .id_table = ds1307_id, -- GitLab From 7e6066ca1f1fa5c79915dfb4720ca20c5e62edcc Mon Sep 17 00:00:00 2001 From: Claudius Heine Date: Tue, 17 Nov 2020 13:18:16 +0100 Subject: [PATCH 0219/1894] rtc: Kconfig: Fix typo in help message of rx 6110 The help message in the Kconfig for the RX-6110 erronously stated RX-6610. Signed-off-by: Claudius Heine Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201117121817.953924-2-ch@denx.de --- drivers/rtc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index f784b52381b1a..fa47bafe09f98 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -821,7 +821,7 @@ config RTC_DRV_RX6110 tristate "Epson RX-6110" select REGMAP_SPI help - If you say yes here you will get support for the Epson RX-6610. + If you say yes here you will get support for the Epson RX-6110. This driver can also be built as a module. If so the module will be called rtc-rx6110. -- GitLab From afa819c2c6bf0d6b99d3e41217a2c7d3b3b53228 Mon Sep 17 00:00:00 2001 From: Claudius Heine Date: Tue, 17 Nov 2020 13:18:17 +0100 Subject: [PATCH 0220/1894] rtc: rx6110: add i2c support The RX6110 also supports I2C, so this patch adds support for it to the driver. This also renames the SPI specific functions and variables to include `_spi_` in their names. Signed-off-by: Claudius Heine Signed-off-by: Henning Schild Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201117121817.953924-3-ch@denx.de --- drivers/rtc/Kconfig | 20 ++--- drivers/rtc/rtc-rx6110.c | 165 +++++++++++++++++++++++++++++++++------ 2 files changed, 153 insertions(+), 32 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index fa47bafe09f98..4d2c5d1f75cce 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -817,15 +817,6 @@ config RTC_DRV_RX4581 This driver can also be built as a module. If so the module will be called rtc-rx4581. -config RTC_DRV_RX6110 - tristate "Epson RX-6110" - select REGMAP_SPI - help - If you say yes here you will get support for the Epson RX-6110. - - This driver can also be built as a module. If so the module - will be called rtc-rx6110. - config RTC_DRV_RS5C348 tristate "Ricoh RS5C348A/B" help @@ -936,6 +927,17 @@ config RTC_DRV_RV3029_HWMON Say Y here if you want to expose temperature sensor data on rtc-rv3029. +config RTC_DRV_RX6110 + tristate "Epson RX-6110" + depends on RTC_I2C_AND_SPI + select REGMAP_SPI if SPI_MASTER + select REGMAP_I2C if I2C + help + If you say yes here you will get support for the Epson RX-6110. + + This driver can also be built as a module. If so the module + will be called rtc-rx6110. + comment "Platform RTC drivers" # this 'CMOS' RTC driver is arch dependent because it requires diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c index 3a9eb7043f016..a7b671a210223 100644 --- a/drivers/rtc/rtc-rx6110.c +++ b/drivers/rtc/rtc-rx6110.c @@ -16,6 +16,7 @@ #include #include #include +#include /* RX-6110 Register definitions */ #define RX6110_REG_SEC 0x10 @@ -310,6 +311,27 @@ static const struct rtc_class_ops rx6110_rtc_ops = { .set_time = rx6110_set_time, }; +static int rx6110_probe(struct rx6110_data *rx6110, struct device *dev) +{ + int err; + + rx6110->rtc = devm_rtc_device_register(dev, + RX6110_DRIVER_NAME, + &rx6110_rtc_ops, THIS_MODULE); + + if (IS_ERR(rx6110->rtc)) + return PTR_ERR(rx6110->rtc); + + err = rx6110_init(rx6110); + if (err) + return err; + + rx6110->rtc->max_user_freq = 1; + + return 0; +} + +#ifdef CONFIG_SPI_MASTER static struct regmap_config regmap_spi_config = { .reg_bits = 8, .val_bits = 8, @@ -318,13 +340,12 @@ static struct regmap_config regmap_spi_config = { }; /** - * rx6110_probe - initialize rtc driver + * rx6110_spi_probe - initialize rtc driver * @spi: pointer to spi device */ -static int rx6110_probe(struct spi_device *spi) +static int rx6110_spi_probe(struct spi_device *spi) { struct rx6110_data *rx6110; - int err; if ((spi->bits_per_word && spi->bits_per_word != 8) || (spi->max_speed_hz > 2000000) || @@ -346,27 +367,14 @@ static int rx6110_probe(struct spi_device *spi) spi_set_drvdata(spi, rx6110); - rx6110->rtc = devm_rtc_device_register(&spi->dev, - RX6110_DRIVER_NAME, - &rx6110_rtc_ops, THIS_MODULE); - - if (IS_ERR(rx6110->rtc)) - return PTR_ERR(rx6110->rtc); - - err = rx6110_init(rx6110); - if (err) - return err; - - rx6110->rtc->max_user_freq = 1; - - return 0; + return rx6110_probe(rx6110, &spi->dev); } -static const struct spi_device_id rx6110_id[] = { +static const struct spi_device_id rx6110_spi_id[] = { { "rx6110", 0 }, { } }; -MODULE_DEVICE_TABLE(spi, rx6110_id); +MODULE_DEVICE_TABLE(spi, rx6110_spi_id); static const struct of_device_id rx6110_spi_of_match[] = { { .compatible = "epson,rx6110" }, @@ -374,16 +382,127 @@ static const struct of_device_id rx6110_spi_of_match[] = { }; MODULE_DEVICE_TABLE(of, rx6110_spi_of_match); -static struct spi_driver rx6110_driver = { +static struct spi_driver rx6110_spi_driver = { .driver = { .name = RX6110_DRIVER_NAME, .of_match_table = of_match_ptr(rx6110_spi_of_match), }, - .probe = rx6110_probe, - .id_table = rx6110_id, + .probe = rx6110_spi_probe, + .id_table = rx6110_spi_id, +}; + +static int rx6110_spi_register(void) +{ + return spi_register_driver(&rx6110_spi_driver); +} + +static void rx6110_spi_unregister(void) +{ + spi_unregister_driver(&rx6110_spi_driver); +} +#else +static int rx6110_spi_register(void) +{ + return 0; +} + +static void rx6110_spi_unregister(void) +{ +} +#endif /* CONFIG_SPI_MASTER */ + +#ifdef CONFIG_I2C +static struct regmap_config regmap_i2c_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = RX6110_REG_IRQ, + .read_flag_mask = 0x80, }; -module_spi_driver(rx6110_driver); +static int rx6110_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct rx6110_data *rx6110; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA + | I2C_FUNC_SMBUS_I2C_BLOCK)) { + dev_err(&adapter->dev, + "doesn't support required functionality\n"); + return -EIO; + } + + rx6110 = devm_kzalloc(&client->dev, sizeof(*rx6110), GFP_KERNEL); + if (!rx6110) + return -ENOMEM; + + rx6110->regmap = devm_regmap_init_i2c(client, ®map_i2c_config); + if (IS_ERR(rx6110->regmap)) { + dev_err(&client->dev, "regmap init failed for rtc rx6110\n"); + return PTR_ERR(rx6110->regmap); + } + + i2c_set_clientdata(client, rx6110); + + return rx6110_probe(rx6110, &client->dev); +} + +static const struct i2c_device_id rx6110_i2c_id[] = { + { "rx6110", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, rx6110_i2c_id); + +static struct i2c_driver rx6110_i2c_driver = { + .driver = { + .name = RX6110_DRIVER_NAME, + }, + .probe = rx6110_i2c_probe, + .id_table = rx6110_i2c_id, +}; + +static int rx6110_i2c_register(void) +{ + return i2c_add_driver(&rx6110_i2c_driver); +} + +static void rx6110_i2c_unregister(void) +{ + i2c_del_driver(&rx6110_i2c_driver); +} +#else +static int rx6110_i2c_register(void) +{ + return 0; +} + +static void rx6110_i2c_unregister(void) +{ +} +#endif /* CONFIG_I2C */ + +static int __init rx6110_module_init(void) +{ + int ret; + + ret = rx6110_spi_register(); + if (ret) + return ret; + + ret = rx6110_i2c_register(); + if (ret) + rx6110_spi_unregister(); + + return ret; +} +module_init(rx6110_module_init); + +static void __exit rx6110_module_exit(void) +{ + rx6110_spi_unregister(); + rx6110_i2c_unregister(); +} +module_exit(rx6110_module_exit); MODULE_AUTHOR("Val Krutov "); MODULE_DESCRIPTION("RX-6110 SA RTC driver"); -- GitLab From 42882a8a22a86513c8c8c6bc7e0822bb14791999 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 16 Nov 2020 15:03:26 -0300 Subject: [PATCH 0221/1894] rtc: mxc: Convert the driver to DT-only Since 5.10-rc1 i.MX is a devicetree-only platform, so simplify the code by removing the unused non-DT support. Signed-off-by: Fabio Estevam Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201116180326.5199-1-festevam@gmail.com --- drivers/rtc/rtc-mxc.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index a8cfbde048f42..018bfa952d66e 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -70,27 +70,12 @@ struct rtc_plat_data { enum imx_rtc_type devtype; }; -static const struct platform_device_id imx_rtc_devtype[] = { - { - .name = "imx1-rtc", - .driver_data = IMX1_RTC, - }, { - .name = "imx21-rtc", - .driver_data = IMX21_RTC, - }, { - /* sentinel */ - } -}; -MODULE_DEVICE_TABLE(platform, imx_rtc_devtype); - -#ifdef CONFIG_OF static const struct of_device_id imx_rtc_dt_ids[] = { { .compatible = "fsl,imx1-rtc", .data = (const void *)IMX1_RTC }, { .compatible = "fsl,imx21-rtc", .data = (const void *)IMX21_RTC }, {} }; MODULE_DEVICE_TABLE(of, imx_rtc_dt_ids); -#endif static inline int is_imx1_rtc(struct rtc_plat_data *data) { @@ -329,10 +314,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) return -ENOMEM; of_id = of_match_device(imx_rtc_dt_ids, &pdev->dev); - if (of_id) - pdata->devtype = (enum imx_rtc_type)of_id->data; - else - pdata->devtype = pdev->id_entry->driver_data; + pdata->devtype = (enum imx_rtc_type)of_id->data; pdata->ioaddr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pdata->ioaddr)) @@ -438,7 +420,6 @@ static struct platform_driver mxc_rtc_driver = { .name = "mxc_rtc", .of_match_table = of_match_ptr(imx_rtc_dt_ids), }, - .id_table = imx_rtc_devtype, .probe = mxc_rtc_probe, }; -- GitLab From ba7aa63000f26c5a2c87d5a716601499a02a3156 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 17 Nov 2020 21:30:35 +0100 Subject: [PATCH 0222/1894] rtc: mxc: use of_device_get_match_data Use of_device_get_match_data to simplify mxc_rtc_probe. Signed-off-by: Alexandre Belloni Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20201117203035.1280099-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-mxc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 018bfa952d66e..0d253ce3a8f5e 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -307,14 +307,12 @@ static int mxc_rtc_probe(struct platform_device *pdev) u32 reg; unsigned long rate; int ret; - const struct of_device_id *of_id; pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - of_id = of_match_device(imx_rtc_dt_ids, &pdev->dev); - pdata->devtype = (enum imx_rtc_type)of_id->data; + pdata->devtype = (enum imx_rtc_type)of_device_get_match_data(&pdev->dev); pdata->ioaddr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pdata->ioaddr)) -- GitLab From 1a57b1a3e11086a4f183b245754b213b1d9b2d40 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 18 Nov 2020 15:35:17 +0800 Subject: [PATCH 0223/1894] ACPI/nfit: avoid accessing uninitialized memory in acpi_nfit_ctl() The ACPI_ALLOCATE() does not zero the "buf", so when the condition "integer->type != ACPI_TYPE_INTEGER" in int_to_buf() is met, the result is unpredictable in acpi_nfit_ctl(). Signed-off-by: Zhen Lei Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20201118073517.1884-1-thunder.leizhen@huawei.com Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 442608220b5c8..cda7b6c525049 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -282,18 +282,19 @@ err: static union acpi_object *int_to_buf(union acpi_object *integer) { - union acpi_object *buf = ACPI_ALLOCATE(sizeof(*buf) + 4); + union acpi_object *buf = NULL; void *dst = NULL; - if (!buf) - goto err; - if (integer->type != ACPI_TYPE_INTEGER) { WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", integer->type); goto err; } + buf = ACPI_ALLOCATE(sizeof(*buf) + 4); + if (!buf) + goto err; + dst = buf + 1; buf->type = ACPI_TYPE_BUFFER; buf->buffer.length = 4; -- GitLab From 7d9d4868ec0b34dbfc74b3075dc1e896cc98f783 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 17 Nov 2020 22:22:01 +0100 Subject: [PATCH 0224/1894] rtc: sc27xx: Always read normal alarm The RTC core only reads the alarm from the hardware at boot time, to know whether an alarm was already set before booting. It keeps track of all the alarms after that so there is no need to ever read the auxiliary alarm. Commit 3822d1bb0df1 ("rtc: sc27xx: Always read normal alarm when registering RTC device") already effectively removed the capability to read the auxiliary alarm as .read_alarm is always called with rtc->registered set to false. Signed-off-by: Alexandre Belloni Reviewed-by: Chunyan Zhang Link: https://lore.kernel.org/r/20201117212201.1288608-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-sc27xx.c | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c index 6e65f68ea86dc..a953bc0a5a5ba 100644 --- a/drivers/rtc/rtc-sc27xx.c +++ b/drivers/rtc/rtc-sc27xx.c @@ -299,33 +299,6 @@ static int sprd_rtc_set_secs(struct sprd_rtc *rtc, enum sprd_rtc_reg_types type, sts_mask); } -static int sprd_rtc_read_aux_alarm(struct device *dev, struct rtc_wkalrm *alrm) -{ - struct sprd_rtc *rtc = dev_get_drvdata(dev); - time64_t secs; - u32 val; - int ret; - - ret = sprd_rtc_get_secs(rtc, SPRD_RTC_AUX_ALARM, &secs); - if (ret) - return ret; - - rtc_time64_to_tm(secs, &alrm->time); - - ret = regmap_read(rtc->regmap, rtc->base + SPRD_RTC_INT_EN, &val); - if (ret) - return ret; - - alrm->enabled = !!(val & SPRD_RTC_AUXALM_EN); - - ret = regmap_read(rtc->regmap, rtc->base + SPRD_RTC_INT_RAW_STS, &val); - if (ret) - return ret; - - alrm->pending = !!(val & SPRD_RTC_AUXALM_EN); - return 0; -} - static int sprd_rtc_set_aux_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct sprd_rtc *rtc = dev_get_drvdata(dev); @@ -415,16 +388,9 @@ static int sprd_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) u32 val; /* - * Before RTC device is registered, it will check to see if there is an - * alarm already set in RTC hardware, and we always read the normal - * alarm at this time. - * - * Or if aie_timer is enabled, we should get the normal alarm time. - * Otherwise we should get auxiliary alarm time. + * The RTC core checks to see if there is an alarm already set in RTC + * hardware, and we always read the normal alarm at this time. */ - if (rtc->rtc && rtc->rtc->registered && rtc->rtc->aie_timer.enabled == 0) - return sprd_rtc_read_aux_alarm(dev, alrm); - ret = sprd_rtc_get_secs(rtc, SPRD_RTC_ALARM, &secs); if (ret) return ret; -- GitLab From 7c45c9741ab2063e76ed716ac7aae05f97143f9c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:02 +0100 Subject: [PATCH 0225/1894] rtc: omap: use devm_pinctrl_register() Use a managed variant of pinctrl_register(). This way we can shorten the remove() callback as well as drop a goto label from probe(). Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-2-brgl@bgdev.pl --- drivers/rtc/rtc-omap.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index c20fc7937dfa8..606fa80ad6e0e 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -879,7 +879,7 @@ static int omap_rtc_probe(struct platform_device *pdev) /* Support ext_wakeup pinconf */ rtc_pinctrl_desc.name = dev_name(&pdev->dev); - rtc->pctldev = pinctrl_register(&rtc_pinctrl_desc, &pdev->dev, rtc); + rtc->pctldev = devm_pinctrl_register(&pdev->dev, &rtc_pinctrl_desc, rtc); if (IS_ERR(rtc->pctldev)) { dev_err(&pdev->dev, "Couldn't register pinctrl driver\n"); ret = PTR_ERR(rtc->pctldev); @@ -888,7 +888,7 @@ static int omap_rtc_probe(struct platform_device *pdev) ret = rtc_register_device(rtc->rtc); if (ret) - goto err_deregister_pinctrl; + goto err; rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config); @@ -901,8 +901,6 @@ static int omap_rtc_probe(struct platform_device *pdev) return 0; -err_deregister_pinctrl: - pinctrl_unregister(rtc->pctldev); err: clk_disable_unprepare(rtc->clk); device_init_wakeup(&pdev->dev, false); @@ -945,9 +943,6 @@ static int omap_rtc_remove(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - /* Remove ext_wakeup pinconf */ - pinctrl_unregister(rtc->pctldev); - return 0; } -- GitLab From 4d49ffc7a20dd0b05efb82fbf5b52d7aa57e9f4b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:04 +0100 Subject: [PATCH 0226/1894] Documentation: list RTC devres helpers in devres.rst It's customary to list all devres helpers in devres.rst. Add missing RTC routines. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-4-brgl@bgdev.pl --- Documentation/driver-api/driver-model/devres.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index bb676570acc38..6ffc0f07404f5 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -411,6 +411,10 @@ RESET devm_reset_control_get() devm_reset_controller_register() +RTC + devm_rtc_device_register() + devm_rtc_allocate_device() + SERDEV devm_serdev_device_open() -- GitLab From 25ece30561d247b2931b0d11d92e9c976a668771 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 9 Nov 2020 17:34:05 +0100 Subject: [PATCH 0227/1894] rtc: nvmem: remove nvram ABI The nvram sysfs attributes have been deprecated at least since v4.13, more than 3 years ago and nobody ever complained about the deprecation warning. Remove the sysfs attributes now. [Bartosz: remove the declaration of rtc_nvmem_unregister()] Signed-off-by: Alexandre Belloni Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20201109163409.24301-5-brgl@bgdev.pl --- drivers/rtc/class.c | 2 - drivers/rtc/nvmem.c | 82 +------------------------------------- drivers/rtc/rtc-cmos.c | 1 - drivers/rtc/rtc-ds1305.c | 1 - drivers/rtc/rtc-ds1307.c | 1 - drivers/rtc/rtc-ds1343.c | 1 - drivers/rtc/rtc-ds1511.c | 2 - drivers/rtc/rtc-ds1553.c | 1 - drivers/rtc/rtc-ds1685.c | 1 - drivers/rtc/rtc-ds1742.c | 1 - drivers/rtc/rtc-m48t59.c | 1 - drivers/rtc/rtc-m48t86.c | 1 - drivers/rtc/rtc-rp5c01.c | 1 - drivers/rtc/rtc-rv8803.c | 1 - drivers/rtc/rtc-stk17ta8.c | 1 - drivers/rtc/rtc-tx4939.c | 1 - include/linux/rtc.h | 6 --- 17 files changed, 1 insertion(+), 104 deletions(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 7c88d190c51fc..a99b7d24b77c2 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -339,8 +339,6 @@ static void devm_rtc_release_device(struct device *dev, void *res) { struct rtc_device *rtc = *(struct rtc_device **)res; - rtc_nvmem_unregister(rtc); - if (rtc->registered) rtc_device_unregister(rtc); else diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c index 4312096c77387..5e0b178a3b65f 100644 --- a/drivers/rtc/nvmem.c +++ b/drivers/rtc/nvmem.c @@ -9,74 +9,7 @@ #include #include #include -#include -#include -/* - * Deprecated ABI compatibility, this should be removed at some point - */ - -static const char nvram_warning[] = "Deprecated ABI, please use nvmem"; - -static ssize_t -rtc_nvram_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, - char *buf, loff_t off, size_t count) -{ - dev_warn_once(kobj_to_dev(kobj), nvram_warning); - - return nvmem_device_read(attr->private, off, count, buf); -} - -static ssize_t -rtc_nvram_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, - char *buf, loff_t off, size_t count) -{ - dev_warn_once(kobj_to_dev(kobj), nvram_warning); - - return nvmem_device_write(attr->private, off, count, buf); -} - -static int rtc_nvram_register(struct rtc_device *rtc, - struct nvmem_device *nvmem, size_t size) -{ - int err; - - rtc->nvram = kzalloc(sizeof(*rtc->nvram), GFP_KERNEL); - if (!rtc->nvram) - return -ENOMEM; - - rtc->nvram->attr.name = "nvram"; - rtc->nvram->attr.mode = 0644; - rtc->nvram->private = nvmem; - - sysfs_bin_attr_init(rtc->nvram); - - rtc->nvram->read = rtc_nvram_read; - rtc->nvram->write = rtc_nvram_write; - rtc->nvram->size = size; - - err = sysfs_create_bin_file(&rtc->dev.parent->kobj, - rtc->nvram); - if (err) { - kfree(rtc->nvram); - rtc->nvram = NULL; - } - - return err; -} - -static void rtc_nvram_unregister(struct rtc_device *rtc) -{ - sysfs_remove_bin_file(&rtc->dev.parent->kobj, rtc->nvram); - kfree(rtc->nvram); - rtc->nvram = NULL; -} - -/* - * New ABI, uses nvmem - */ int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { @@ -88,20 +21,7 @@ int rtc_nvmem_register(struct rtc_device *rtc, nvmem_config->dev = rtc->dev.parent; nvmem_config->owner = rtc->owner; nvmem = devm_nvmem_register(rtc->dev.parent, nvmem_config); - if (IS_ERR(nvmem)) - return PTR_ERR(nvmem); - - /* Register the old ABI */ - if (rtc->nvram_old_abi) - rtc_nvram_register(rtc, nvmem, nvmem_config->size); - return 0; + return PTR_ERR_OR_ZERO(nvmem); } EXPORT_SYMBOL_GPL(rtc_nvmem_register); - -void rtc_nvmem_unregister(struct rtc_device *rtc) -{ - /* unregister the old ABI */ - if (rtc->nvram) - rtc_nvram_unregister(rtc); -} diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index c633319cdb913..adca0de76e536 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -863,7 +863,6 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) cmos_rtc.rtc->ops = &cmos_rtc_ops_no_alarm; } - cmos_rtc.rtc->nvram_old_abi = true; retval = rtc_register_device(cmos_rtc.rtc); if (retval) goto cleanup2; diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index a3d790889eea6..a1ed539d41b49 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -694,7 +694,6 @@ static int ds1305_probe(struct spi_device *spi) ds1305->rtc->range_max = RTC_TIMESTAMP_END_2099; ds1305_nvmem_cfg.priv = ds1305; - ds1305->rtc->nvram_old_abi = true; status = rtc_register_device(ds1305->rtc); if (status) return status; diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index fdf25db1b1b3a..e359cbf7882b1 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -2016,7 +2016,6 @@ static int ds1307_probe(struct i2c_client *client, .priv = ds1307, }; - ds1307->rtc->nvram_old_abi = true; rtc_nvmem_register(ds1307->rtc, &nvmem_cfg); } diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index ba143423875b9..e7604e844cbdc 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -399,7 +399,6 @@ static int ds1343_probe(struct spi_device *spi) if (IS_ERR(priv->rtc)) return PTR_ERR(priv->rtc); - priv->rtc->nvram_old_abi = true; priv->rtc->ops = &ds1343_rtc_ops; priv->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; priv->rtc->range_max = RTC_TIMESTAMP_END_2099; diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index a63872c4c76d0..33c483d759c81 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c @@ -466,8 +466,6 @@ static int ds1511_rtc_probe(struct platform_device *pdev) pdata->rtc->ops = &ds1511_rtc_ops; - pdata->rtc->nvram_old_abi = true; - ret = rtc_register_device(pdata->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index cdf5e05b9489a..c6a5563504e55 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -294,7 +294,6 @@ static int ds1553_rtc_probe(struct platform_device *pdev) return PTR_ERR(pdata->rtc); pdata->rtc->ops = &ds1553_rtc_ops; - pdata->rtc->nvram_old_abi = true; ret = rtc_register_device(pdata->rtc); if (ret) diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index dfbd7b88b2b91..9043c96e88458 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -1316,7 +1316,6 @@ ds1685_rtc_probe(struct platform_device *pdev) if (ret) return ret; - rtc_dev->nvram_old_abi = true; nvmem_cfg.priv = rtc; ret = rtc_nvmem_register(rtc_dev, &nvmem_cfg); if (ret) diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 2b949f0dbaa92..291bbed90ef85 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -190,7 +190,6 @@ static int ds1742_rtc_probe(struct platform_device *pdev) return PTR_ERR(rtc); rtc->ops = &ds1742_rtc_ops; - rtc->nvram_old_abi = true; ret = rtc_register_device(rtc); if (ret) diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index 67e218758a8b8..ee1d8f0146fd1 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -463,7 +463,6 @@ static int m48t59_rtc_probe(struct platform_device *pdev) if (IS_ERR(m48t59->rtc)) return PTR_ERR(m48t59->rtc); - m48t59->rtc->nvram_old_abi = true; m48t59->rtc->ops = ops; nvmem_cfg.size = pdata->offset; diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index 75a0e73071d8d..2b1135590dd56 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -254,7 +254,6 @@ static int m48t86_rtc_probe(struct platform_device *pdev) return PTR_ERR(info->rtc); info->rtc->ops = &m48t86_rtc_ops; - info->rtc->nvram_old_abi = true; err = rtc_register_device(info->rtc); if (err) diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c index 8776eadbdd3a7..a69e8adcc4a11 100644 --- a/drivers/rtc/rtc-rp5c01.c +++ b/drivers/rtc/rtc-rp5c01.c @@ -251,7 +251,6 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) return PTR_ERR(rtc); rtc->ops = &rp5c01_rtc_ops; - rtc->nvram_old_abi = true; priv->rtc = rtc; diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index c6d8e3425688b..1d888da48c7c9 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -585,7 +585,6 @@ static int rv8803_probe(struct i2c_client *client, } rv8803->rtc->ops = &rv8803_rtc_ops; - rv8803->rtc->nvram_old_abi = true; rv8803->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv8803->rtc->range_max = RTC_TIMESTAMP_END_2099; err = rtc_register_device(rv8803->rtc); diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index 01a45044f468a..1ccf0d5d05b47 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c @@ -311,7 +311,6 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev) return PTR_ERR(pdata->rtc); pdata->rtc->ops = &stk17ta8_rtc_ops; - pdata->rtc->nvram_old_abi = true; nvmem_cfg.priv = pdata; ret = rtc_nvmem_register(pdata->rtc, &nvmem_cfg); diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c index 715b82981279d..abbb62b14d7a0 100644 --- a/drivers/rtc/rtc-tx4939.c +++ b/drivers/rtc/rtc-tx4939.c @@ -266,7 +266,6 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev) return PTR_ERR(rtc); rtc->ops = &tx4939_rtc_ops; - rtc->nvram_old_abi = true; rtc->range_max = U32_MAX; pdata->rtc = rtc; diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 22d1575e4991b..0983ab9faffb3 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -120,10 +120,6 @@ struct rtc_device { bool registered; - /* Old ABI support */ - bool nvram_old_abi; - struct bin_attribute *nvram; - time64_t range_min; timeu64_t range_max; time64_t start_secs; @@ -250,14 +246,12 @@ extern int rtc_hctosys_ret; #ifdef CONFIG_RTC_NVMEM int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config); -void rtc_nvmem_unregister(struct rtc_device *rtc); #else static inline int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { return 0; } -static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {} #endif #ifdef CONFIG_RTC_INTF_SYSFS -- GitLab From 3a905c2d9544a418953d6c18668f0f853fbd9be9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:06 +0100 Subject: [PATCH 0228/1894] rtc: add devm_ prefix to rtc_nvmem_register() rtc_nvmem_register() is a managed interface. It doesn't require any release function to be called at driver detach. To avoid confusing driver authors, let's rename it to devm_rtc_nvmem_register() and add it to the list of managed interfaces in Documentation/. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-6-brgl@bgdev.pl --- Documentation/driver-api/driver-model/devres.rst | 1 + drivers/rtc/nvmem.c | 4 ++-- drivers/rtc/rtc-cmos.c | 2 +- drivers/rtc/rtc-ds1305.c | 2 +- drivers/rtc/rtc-ds1307.c | 2 +- drivers/rtc/rtc-ds1343.c | 2 +- drivers/rtc/rtc-ds1511.c | 2 +- drivers/rtc/rtc-ds1553.c | 2 +- drivers/rtc/rtc-ds1685.c | 2 +- drivers/rtc/rtc-ds1742.c | 2 +- drivers/rtc/rtc-ds3232.c | 2 +- drivers/rtc/rtc-isl12026.c | 2 +- drivers/rtc/rtc-isl1208.c | 2 +- drivers/rtc/rtc-m48t59.c | 2 +- drivers/rtc/rtc-m48t86.c | 2 +- drivers/rtc/rtc-meson.c | 2 +- drivers/rtc/rtc-omap.c | 2 +- drivers/rtc/rtc-pcf2127.c | 2 +- drivers/rtc/rtc-pcf85063.c | 2 +- drivers/rtc/rtc-pcf85363.c | 2 +- drivers/rtc/rtc-rp5c01.c | 2 +- drivers/rtc/rtc-rv3028.c | 4 ++-- drivers/rtc/rtc-rv3029c2.c | 2 +- drivers/rtc/rtc-rv3032.c | 4 ++-- drivers/rtc/rtc-rv8803.c | 2 +- drivers/rtc/rtc-rx8581.c | 2 +- drivers/rtc/rtc-stk17ta8.c | 2 +- drivers/rtc/rtc-tx4939.c | 2 +- include/linux/rtc.h | 8 ++++---- 29 files changed, 35 insertions(+), 34 deletions(-) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 6ffc0f07404f5..5df7ba54a4ba5 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -414,6 +414,7 @@ RESET RTC devm_rtc_device_register() devm_rtc_allocate_device() + devm_rtc_nvmem_register() SERDEV devm_serdev_device_open() diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c index 5e0b178a3b65f..7502deb6390e3 100644 --- a/drivers/rtc/nvmem.c +++ b/drivers/rtc/nvmem.c @@ -10,7 +10,7 @@ #include #include -int rtc_nvmem_register(struct rtc_device *rtc, +int devm_rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { struct nvmem_device *nvmem; @@ -24,4 +24,4 @@ int rtc_nvmem_register(struct rtc_device *rtc, return PTR_ERR_OR_ZERO(nvmem); } -EXPORT_SYMBOL_GPL(rtc_nvmem_register); +EXPORT_SYMBOL_GPL(devm_rtc_nvmem_register); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index adca0de76e536..eea91c1538aa3 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -869,7 +869,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) /* export at least the first block of NVRAM */ nvmem_cfg.size = address_space - NVRAM_OFFSET; - if (rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg)) + if (devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg)) dev_err(dev, "nvmem registration failed\n"); dev_info(dev, "%s%s, %d bytes nvram%s\n", diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index a1ed539d41b49..a4e768261b431 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -698,7 +698,7 @@ static int ds1305_probe(struct spi_device *spi) if (status) return status; - rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg); + devm_rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg); /* Maybe set up alarm IRQ; be ready to handle it triggering right * away. NOTE that we don't share this. The signal is active low, diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index e359cbf7882b1..216bc5d9b716e 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -2016,7 +2016,7 @@ static int ds1307_probe(struct i2c_client *client, .priv = ds1307, }; - rtc_nvmem_register(ds1307->rtc, &nvmem_cfg); + devm_rtc_nvmem_register(ds1307->rtc, &nvmem_cfg); } ds1307_hwmon_register(ds1307); diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index e7604e844cbdc..ea663e24a34c4 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -413,7 +413,7 @@ static int ds1343_probe(struct spi_device *spi) return res; nvmem_cfg.priv = priv; - rtc_nvmem_register(priv->rtc, &nvmem_cfg); + devm_rtc_nvmem_register(priv->rtc, &nvmem_cfg); priv->irq = spi->irq; diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index 33c483d759c81..d5f48216e851d 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c @@ -470,7 +470,7 @@ static int ds1511_rtc_probe(struct platform_device *pdev) if (ret) return ret; - rtc_nvmem_register(pdata->rtc, &ds1511_nvmem_cfg); + devm_rtc_nvmem_register(pdata->rtc, &ds1511_nvmem_cfg); /* * if the platform has an interrupt in mind for this device, diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index c6a5563504e55..2d2eb739d92b4 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -309,7 +309,7 @@ static int ds1553_rtc_probe(struct platform_device *pdev) } } - if (rtc_nvmem_register(pdata->rtc, &nvmem_cfg)) + if (devm_rtc_nvmem_register(pdata->rtc, &nvmem_cfg)) dev_err(&pdev->dev, "unable to register nvmem\n"); return 0; diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 9043c96e88458..bef588fce2660 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -1317,7 +1317,7 @@ ds1685_rtc_probe(struct platform_device *pdev) return ret; nvmem_cfg.priv = rtc; - ret = rtc_nvmem_register(rtc_dev, &nvmem_cfg); + ret = devm_rtc_nvmem_register(rtc_dev, &nvmem_cfg); if (ret) return ret; diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 291bbed90ef85..29792a8cce978 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -195,7 +195,7 @@ static int ds1742_rtc_probe(struct platform_device *pdev) if (ret) return ret; - if (rtc_nvmem_register(rtc, &nvmem_cfg)) + if (devm_rtc_nvmem_register(rtc, &nvmem_cfg)) dev_err(&pdev->dev, "Unable to register nvmem\n"); return 0; diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index 69c37ab643520..16b89035d1350 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -518,7 +518,7 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq, if (IS_ERR(ds3232->rtc)) return PTR_ERR(ds3232->rtc); - ret = rtc_nvmem_register(ds3232->rtc, &nvmem_cfg); + ret = devm_rtc_nvmem_register(ds3232->rtc, &nvmem_cfg); if(ret) return ret; diff --git a/drivers/rtc/rtc-isl12026.c b/drivers/rtc/rtc-isl12026.c index 5b6b17fb6d628..fff8d82536698 100644 --- a/drivers/rtc/rtc-isl12026.c +++ b/drivers/rtc/rtc-isl12026.c @@ -465,7 +465,7 @@ static int isl12026_probe_new(struct i2c_client *client) priv->rtc->ops = &isl12026_rtc_ops; nvm_cfg.priv = priv; - ret = rtc_nvmem_register(priv->rtc, &nvm_cfg); + ret = devm_rtc_nvmem_register(priv->rtc, &nvm_cfg); if (ret) return ret; diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index ebb691fa48a6b..08d778b10e9ec 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -890,7 +890,7 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id) if (rc) return rc; - rc = rtc_nvmem_register(isl1208->rtc, &isl1208->nvmem_config); + rc = devm_rtc_nvmem_register(isl1208->rtc, &isl1208->nvmem_config); if (rc) return rc; diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index ee1d8f0146fd1..e966a66ab2d35 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -466,7 +466,7 @@ static int m48t59_rtc_probe(struct platform_device *pdev) m48t59->rtc->ops = ops; nvmem_cfg.size = pdata->offset; - ret = rtc_nvmem_register(m48t59->rtc, &nvmem_cfg); + ret = devm_rtc_nvmem_register(m48t59->rtc, &nvmem_cfg); if (ret) return ret; diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index 2b1135590dd56..182cfe59e4e04 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -259,7 +259,7 @@ static int m48t86_rtc_probe(struct platform_device *pdev) if (err) return err; - rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg); + devm_rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg); /* read battery status */ reg = m48t86_readb(&pdev->dev, M48T86_D); diff --git a/drivers/rtc/rtc-meson.c b/drivers/rtc/rtc-meson.c index 47ebcf834cc21..938267713a4dc 100644 --- a/drivers/rtc/rtc-meson.c +++ b/drivers/rtc/rtc-meson.c @@ -365,7 +365,7 @@ static int meson_rtc_probe(struct platform_device *pdev) } meson_rtc_nvmem_config.priv = rtc; - ret = rtc_nvmem_register(rtc->rtc, &meson_rtc_nvmem_config); + ret = devm_rtc_nvmem_register(rtc->rtc, &meson_rtc_nvmem_config); if (ret) goto out_disable_vdd; diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index 606fa80ad6e0e..e65f79fc77180 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -890,7 +890,7 @@ static int omap_rtc_probe(struct platform_device *pdev) if (ret) goto err; - rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config); + devm_rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config); if (rtc->is_pmic_controller) { if (!pm_power_off) { diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index fd46860152e15..432cd627359b3 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -608,7 +608,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, .size = 512, }; - ret = rtc_nvmem_register(pcf2127->rtc, &nvmem_cfg); + ret = devm_rtc_nvmem_register(pcf2127->rtc, &nvmem_cfg); } /* diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index f8b99cb729590..c19f139e9b8db 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -607,7 +607,7 @@ static int pcf85063_probe(struct i2c_client *client) } nvmem_cfg.priv = pcf85063->regmap; - rtc_nvmem_register(pcf85063->rtc, &nvmem_cfg); + devm_rtc_nvmem_register(pcf85063->rtc, &nvmem_cfg); #ifdef CONFIG_COMMON_CLK /* register clk in common clk framework */ diff --git a/drivers/rtc/rtc-pcf85363.c b/drivers/rtc/rtc-pcf85363.c index 3450d615974d5..23cf14ca2c96e 100644 --- a/drivers/rtc/rtc-pcf85363.c +++ b/drivers/rtc/rtc-pcf85363.c @@ -422,7 +422,7 @@ static int pcf85363_probe(struct i2c_client *client, for (i = 0; i < config->num_nvram; i++) { nvmem_cfg[i].priv = pcf85363; - rtc_nvmem_register(pcf85363->rtc, &nvmem_cfg[i]); + devm_rtc_nvmem_register(pcf85363->rtc, &nvmem_cfg[i]); } return ret; diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c index a69e8adcc4a11..8bc476c0905f2 100644 --- a/drivers/rtc/rtc-rp5c01.c +++ b/drivers/rtc/rtc-rp5c01.c @@ -255,7 +255,7 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) priv->rtc = rtc; nvmem_cfg.priv = priv; - error = rtc_nvmem_register(rtc, &nvmem_cfg); + error = devm_rtc_nvmem_register(rtc, &nvmem_cfg); if (error) return error; diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index fa226f0fe67d7..f788df9797504 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -891,9 +891,9 @@ static int rv3028_probe(struct i2c_client *client) return ret; nvmem_cfg.priv = rv3028->regmap; - rtc_nvmem_register(rv3028->rtc, &nvmem_cfg); + devm_rtc_nvmem_register(rv3028->rtc, &nvmem_cfg); eeprom_cfg.priv = rv3028; - rtc_nvmem_register(rv3028->rtc, &eeprom_cfg); + devm_rtc_nvmem_register(rv3028->rtc, &eeprom_cfg); rv3028->rtc->max_user_freq = 1; diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index 62718231731b0..ad359b3b74b25 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -755,7 +755,7 @@ static int rv3029_probe(struct device *dev, struct regmap *regmap, int irq, return rc; nvmem_cfg.priv = rv3029->regmap; - rtc_nvmem_register(rv3029->rtc, &nvmem_cfg); + devm_rtc_nvmem_register(rv3029->rtc, &nvmem_cfg); return 0; } diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index 14e931d6f9c6a..ed9cba3292e68 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -890,9 +890,9 @@ static int rv3032_probe(struct i2c_client *client) return ret; nvmem_cfg.priv = rv3032->regmap; - rtc_nvmem_register(rv3032->rtc, &nvmem_cfg); + devm_rtc_nvmem_register(rv3032->rtc, &nvmem_cfg); eeprom_cfg.priv = rv3032; - rtc_nvmem_register(rv3032->rtc, &eeprom_cfg); + devm_rtc_nvmem_register(rv3032->rtc, &eeprom_cfg); rv3032->rtc->max_user_freq = 1; diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 1d888da48c7c9..44e1818a751cc 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -591,7 +591,7 @@ static int rv8803_probe(struct i2c_client *client, if (err) return err; - rtc_nvmem_register(rv8803->rtc, &nvmem_cfg); + devm_rtc_nvmem_register(rv8803->rtc, &nvmem_cfg); rv8803->rtc->max_user_freq = 1; diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c index 490f70f576368..017f74721cc06 100644 --- a/drivers/rtc/rtc-rx8581.c +++ b/drivers/rtc/rtc-rx8581.c @@ -302,7 +302,7 @@ static int rx8581_probe(struct i2c_client *client, for (i = 0; i < config->num_nvram; i++) { nvmem_cfg[i].priv = rx8581; - rtc_nvmem_register(rx8581->rtc, &nvmem_cfg[i]); + devm_rtc_nvmem_register(rx8581->rtc, &nvmem_cfg[i]); } return ret; diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index 1ccf0d5d05b47..ad616bce7bca3 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c @@ -313,7 +313,7 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev) pdata->rtc->ops = &stk17ta8_rtc_ops; nvmem_cfg.priv = pdata; - ret = rtc_nvmem_register(pdata->rtc, &nvmem_cfg); + ret = devm_rtc_nvmem_register(pdata->rtc, &nvmem_cfg); if (ret) return ret; diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c index abbb62b14d7a0..11f46272bad34 100644 --- a/drivers/rtc/rtc-tx4939.c +++ b/drivers/rtc/rtc-tx4939.c @@ -271,7 +271,7 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev) pdata->rtc = rtc; nvmem_cfg.priv = pdata; - ret = rtc_nvmem_register(rtc, &nvmem_cfg); + ret = devm_rtc_nvmem_register(rtc, &nvmem_cfg); if (ret) return ret; diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 0983ab9faffb3..cbca651d8ca4d 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -244,11 +244,11 @@ extern int rtc_hctosys_ret; #endif #ifdef CONFIG_RTC_NVMEM -int rtc_nvmem_register(struct rtc_device *rtc, - struct nvmem_config *nvmem_config); +int devm_rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config); #else -static inline int rtc_nvmem_register(struct rtc_device *rtc, - struct nvmem_config *nvmem_config) +static inline int devm_rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config) { return 0; } -- GitLab From 6746bc095bbd1da719aadd9a11fe2c75a12f22e0 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:07 +0100 Subject: [PATCH 0229/1894] rtc: nvmem: emit an error message when nvmem registration fails Some users check the return value of devm_rtc_nvmem_register() only in order to emit an error message and then continue probing. This is fine as an rtc can function without exposing nvmem but let's generalize it: let's make the registration function emit the error message so that users don't have to. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-7-brgl@bgdev.pl --- drivers/rtc/nvmem.c | 7 +++++-- drivers/rtc/rtc-cmos.c | 3 +-- drivers/rtc/rtc-ds1553.c | 3 +-- drivers/rtc/rtc-ds1742.c | 3 +-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c index 7502deb6390e3..07ede21cee347 100644 --- a/drivers/rtc/nvmem.c +++ b/drivers/rtc/nvmem.c @@ -13,14 +13,17 @@ int devm_rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { + struct device *dev = rtc->dev.parent; struct nvmem_device *nvmem; if (!nvmem_config) return -ENODEV; - nvmem_config->dev = rtc->dev.parent; + nvmem_config->dev = dev; nvmem_config->owner = rtc->owner; - nvmem = devm_nvmem_register(rtc->dev.parent, nvmem_config); + nvmem = devm_nvmem_register(dev, nvmem_config); + if (IS_ERR(nvmem)) + dev_err(dev, "failed to register nvmem device for RTC\n"); return PTR_ERR_OR_ZERO(nvmem); } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index eea91c1538aa3..766074c04b53d 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -869,8 +869,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) /* export at least the first block of NVRAM */ nvmem_cfg.size = address_space - NVRAM_OFFSET; - if (devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg)) - dev_err(dev, "nvmem registration failed\n"); + devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg); dev_info(dev, "%s%s, %d bytes nvram%s\n", !is_valid_irq(rtc_irq) ? "no alarms" : diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 2d2eb739d92b4..bb40ea8b63735 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -309,8 +309,7 @@ static int ds1553_rtc_probe(struct platform_device *pdev) } } - if (devm_rtc_nvmem_register(pdata->rtc, &nvmem_cfg)) - dev_err(&pdev->dev, "unable to register nvmem\n"); + devm_rtc_nvmem_register(pdata->rtc, &nvmem_cfg); return 0; } diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 29792a8cce978..39c6c3a85b347 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -195,8 +195,7 @@ static int ds1742_rtc_probe(struct platform_device *pdev) if (ret) return ret; - if (devm_rtc_nvmem_register(rtc, &nvmem_cfg)) - dev_err(&pdev->dev, "Unable to register nvmem\n"); + devm_rtc_nvmem_register(rtc, &nvmem_cfg); return 0; } -- GitLab From fdcfd854333be5b30377dc5daa9cd0fa1643a979 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:08 +0100 Subject: [PATCH 0230/1894] rtc: rework rtc_register_device() resource management rtc_register_device() is a managed interface but it doesn't use devres by itself - instead it marks an rtc_device as "registered" and the devres callback for devm_rtc_allocate_device() takes care of resource release. This doesn't correspond with the design behind devres where managed structures should not be aware of being managed. The correct solution here is to register a separate devres callback for unregistering the device. While at it: rename rtc_register_device() to devm_rtc_register_device() and add it to the list of managed interfaces in devres.rst. This way we can avoid any potential confusion of driver developers who may expect there to exist a corresponding unregister function. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-8-brgl@bgdev.pl --- .../driver-api/driver-model/devres.rst | 1 + arch/alpha/kernel/rtc.c | 2 +- drivers/mfd/menelaus.c | 2 +- drivers/rtc/class.c | 19 +++++++++---------- drivers/rtc/rtc-88pm80x.c | 2 +- drivers/rtc/rtc-88pm860x.c | 2 +- drivers/rtc/rtc-ab-b5ze-s3.c | 2 +- drivers/rtc/rtc-ab-eoz9.c | 2 +- drivers/rtc/rtc-ab3100.c | 2 +- drivers/rtc/rtc-ab8500.c | 2 +- drivers/rtc/rtc-abx80x.c | 2 +- drivers/rtc/rtc-ac100.c | 2 +- drivers/rtc/rtc-armada38x.c | 2 +- drivers/rtc/rtc-aspeed.c | 2 +- drivers/rtc/rtc-at91rm9200.c | 2 +- drivers/rtc/rtc-at91sam9.c | 2 +- drivers/rtc/rtc-au1xxx.c | 2 +- drivers/rtc/rtc-bd70528.c | 2 +- drivers/rtc/rtc-brcmstb-waketimer.c | 2 +- drivers/rtc/rtc-cadence.c | 2 +- drivers/rtc/rtc-cmos.c | 2 +- drivers/rtc/rtc-coh901331.c | 2 +- drivers/rtc/rtc-cpcap.c | 2 +- drivers/rtc/rtc-cros-ec.c | 2 +- drivers/rtc/rtc-da9052.c | 2 +- drivers/rtc/rtc-da9063.c | 2 +- drivers/rtc/rtc-davinci.c | 2 +- drivers/rtc/rtc-digicolor.c | 2 +- drivers/rtc/rtc-dm355evm.c | 2 +- drivers/rtc/rtc-ds1305.c | 2 +- drivers/rtc/rtc-ds1307.c | 2 +- drivers/rtc/rtc-ds1343.c | 2 +- drivers/rtc/rtc-ds1347.c | 2 +- drivers/rtc/rtc-ds1374.c | 2 +- drivers/rtc/rtc-ds1511.c | 2 +- drivers/rtc/rtc-ds1553.c | 2 +- drivers/rtc/rtc-ds1672.c | 2 +- drivers/rtc/rtc-ds1685.c | 2 +- drivers/rtc/rtc-ds1742.c | 2 +- drivers/rtc/rtc-ds2404.c | 2 +- drivers/rtc/rtc-ep93xx.c | 2 +- drivers/rtc/rtc-fsl-ftm-alarm.c | 2 +- drivers/rtc/rtc-ftrtc010.c | 2 +- drivers/rtc/rtc-goldfish.c | 2 +- drivers/rtc/rtc-imx-sc.c | 2 +- drivers/rtc/rtc-imxdi.c | 2 +- drivers/rtc/rtc-isl12026.c | 2 +- drivers/rtc/rtc-isl1208.c | 2 +- drivers/rtc/rtc-jz4740.c | 2 +- drivers/rtc/rtc-lpc32xx.c | 2 +- drivers/rtc/rtc-ls1x.c | 2 +- drivers/rtc/rtc-m41t80.c | 2 +- drivers/rtc/rtc-m48t59.c | 2 +- drivers/rtc/rtc-m48t86.c | 2 +- drivers/rtc/rtc-mc13xxx.c | 2 +- drivers/rtc/rtc-meson-vrtc.c | 2 +- drivers/rtc/rtc-meson.c | 2 +- drivers/rtc/rtc-mpc5121.c | 2 +- drivers/rtc/rtc-mrst.c | 2 +- drivers/rtc/rtc-mt2712.c | 2 +- drivers/rtc/rtc-mt6397.c | 2 +- drivers/rtc/rtc-mv.c | 2 +- drivers/rtc/rtc-mxc.c | 2 +- drivers/rtc/rtc-mxc_v2.c | 2 +- drivers/rtc/rtc-omap.c | 2 +- drivers/rtc/rtc-pcap.c | 2 +- drivers/rtc/rtc-pcf2123.c | 2 +- drivers/rtc/rtc-pcf2127.c | 2 +- drivers/rtc/rtc-pcf85063.c | 2 +- drivers/rtc/rtc-pcf85363.c | 2 +- drivers/rtc/rtc-pcf8563.c | 2 +- drivers/rtc/rtc-pic32.c | 2 +- drivers/rtc/rtc-pl030.c | 2 +- drivers/rtc/rtc-pl031.c | 2 +- drivers/rtc/rtc-pm8xxx.c | 2 +- drivers/rtc/rtc-ps3.c | 2 +- drivers/rtc/rtc-r9701.c | 2 +- drivers/rtc/rtc-rc5t619.c | 2 +- drivers/rtc/rtc-rk808.c | 2 +- drivers/rtc/rtc-rp5c01.c | 2 +- drivers/rtc/rtc-rs5c348.c | 2 +- drivers/rtc/rtc-rv3028.c | 2 +- drivers/rtc/rtc-rv3029c2.c | 2 +- drivers/rtc/rtc-rv3032.c | 2 +- drivers/rtc/rtc-rv8803.c | 2 +- drivers/rtc/rtc-rx8010.c | 2 +- drivers/rtc/rtc-rx8581.c | 2 +- drivers/rtc/rtc-s35390a.c | 2 +- drivers/rtc/rtc-sa1100.c | 2 +- drivers/rtc/rtc-sc27xx.c | 2 +- drivers/rtc/rtc-sd3078.c | 2 +- drivers/rtc/rtc-sh.c | 2 +- drivers/rtc/rtc-sirfsoc.c | 2 +- drivers/rtc/rtc-snvs.c | 2 +- drivers/rtc/rtc-st-lpc.c | 2 +- drivers/rtc/rtc-starfire.c | 2 +- drivers/rtc/rtc-stk17ta8.c | 2 +- drivers/rtc/rtc-stmp3xxx.c | 2 +- drivers/rtc/rtc-sun4v.c | 2 +- drivers/rtc/rtc-sun6i.c | 2 +- drivers/rtc/rtc-sunxi.c | 2 +- drivers/rtc/rtc-tegra.c | 2 +- drivers/rtc/rtc-test.c | 2 +- drivers/rtc/rtc-tps6586x.c | 2 +- drivers/rtc/rtc-tps65910.c | 2 +- drivers/rtc/rtc-tx4939.c | 2 +- drivers/rtc/rtc-vr41xx.c | 2 +- drivers/rtc/rtc-vt8500.c | 2 +- drivers/rtc/rtc-wilco-ec.c | 2 +- drivers/rtc/rtc-wm831x.c | 2 +- drivers/rtc/rtc-xgene.c | 2 +- drivers/rtc/rtc-zynqmp.c | 2 +- drivers/rtc/sysfs.c | 2 -- include/linux/rtc.h | 8 +++----- 114 files changed, 123 insertions(+), 127 deletions(-) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 5df7ba54a4ba5..cd8b6e657b94f 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -414,6 +414,7 @@ RESET RTC devm_rtc_device_register() devm_rtc_allocate_device() + devm_rtc_register_device() devm_rtc_nvmem_register() SERDEV diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index 1b1d5963ac550..ce3077946e1d9 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -216,6 +216,6 @@ alpha_rtc_init(void) rtc->ops = &remote_rtc_ops; #endif - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } device_initcall(alpha_rtc_init); diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c index b64d3315a5e13..07e0ca2e467cc 100644 --- a/drivers/mfd/menelaus.c +++ b/drivers/mfd/menelaus.c @@ -1119,7 +1119,7 @@ static inline void menelaus_rtc_init(struct menelaus_chip *m) menelaus_write_reg(MENELAUS_RTC_CTRL, m->rtc_control); } - err = rtc_register_device(m->rtc); + err = devm_rtc_register_device(m->rtc); if (err) { if (alarm) { menelaus_remove_irq_work(MENELAUS_RTCALM_IRQ); diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index a99b7d24b77c2..b8a34ee039ad9 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -321,8 +321,10 @@ static void rtc_device_get_offset(struct rtc_device *rtc) * * @rtc: the RTC class device to destroy */ -static void rtc_device_unregister(struct rtc_device *rtc) +static void devm_rtc_unregister_device(void *data) { + struct rtc_device *rtc = data; + mutex_lock(&rtc->ops_lock); /* * Remove innards of this RTC, then disable it, before @@ -339,10 +341,7 @@ static void devm_rtc_release_device(struct device *dev, void *res) { struct rtc_device *rtc = *(struct rtc_device **)res; - if (rtc->registered) - rtc_device_unregister(rtc); - else - put_device(&rtc->dev); + put_device(&rtc->dev); } struct rtc_device *devm_rtc_allocate_device(struct device *dev) @@ -383,7 +382,7 @@ exit_ida: } EXPORT_SYMBOL_GPL(devm_rtc_allocate_device); -int __rtc_register_device(struct module *owner, struct rtc_device *rtc) +int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc) { struct rtc_wkalrm alrm; int err; @@ -413,7 +412,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc) rtc_proc_add_device(rtc); - rtc->registered = true; dev_info(rtc->dev.parent, "registered as %s\n", dev_name(&rtc->dev)); @@ -422,9 +420,10 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc) rtc_hctosys(rtc); #endif - return 0; + return devm_add_action_or_reset(rtc->dev.parent, + devm_rtc_unregister_device, rtc); } -EXPORT_SYMBOL_GPL(__rtc_register_device); +EXPORT_SYMBOL_GPL(__devm_rtc_register_device); /** * devm_rtc_device_register - resource managed rtc_device_register() @@ -454,7 +453,7 @@ struct rtc_device *devm_rtc_device_register(struct device *dev, rtc->ops = ops; - err = __rtc_register_device(owner, rtc); + err = __devm_rtc_register_device(owner, rtc); if (err) return ERR_PTR(err); diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index 75779e8501a30..6a3f44cf6ebe5 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -294,7 +294,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev) info->rtc_dev->ops = &pm80x_rtc_ops; info->rtc_dev->range_max = U32_MAX; - ret = rtc_register_device(info->rtc_dev); + ret = devm_rtc_register_device(info->rtc_dev); if (ret) goto out_rtc; diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index c90457d001e95..2c809a1a445e8 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -307,7 +307,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev) info->rtc_dev->ops = &pm860x_rtc_ops; info->rtc_dev->range_max = U32_MAX; - ret = rtc_register_device(info->rtc_dev); + ret = devm_rtc_register_device(info->rtc_dev); if (ret) return ret; diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c index 2370ac0cdb5f8..6e3e320dc727d 100644 --- a/drivers/rtc/rtc-ab-b5ze-s3.c +++ b/drivers/rtc/rtc-ab-b5ze-s3.c @@ -892,7 +892,7 @@ static int abb5zes3_probe(struct i2c_client *client, } } - ret = rtc_register_device(data->rtc); + ret = devm_rtc_register_device(data->rtc); err: if (ret && data->irq) diff --git a/drivers/rtc/rtc-ab-eoz9.c b/drivers/rtc/rtc-ab-eoz9.c index d690985caa4cf..b20d8f26dcdb8 100644 --- a/drivers/rtc/rtc-ab-eoz9.c +++ b/drivers/rtc/rtc-ab-eoz9.c @@ -420,7 +420,7 @@ static int abeoz9_probe(struct i2c_client *client, data->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; data->rtc->range_max = RTC_TIMESTAMP_END_2099; - ret = rtc_register_device(data->rtc); + ret = devm_rtc_register_device(data->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c index 2ed6def90975d..e4fd961e8bf67 100644 --- a/drivers/rtc/rtc-ab3100.c +++ b/drivers/rtc/rtc-ab3100.c @@ -238,7 +238,7 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct platform_driver ab3100_rtc_driver = { diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index 3d60f3283f11c..b40048871295d 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -404,7 +404,7 @@ static int ab8500_rtc_probe(struct platform_device *pdev) if (err) return err; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static int ab8500_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index 803725b3a02c3..6733bb0df6745 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -851,7 +851,7 @@ static int abx80x_probe(struct i2c_client *client, return err; } - return rtc_register_device(priv->rtc); + return devm_rtc_register_device(priv->rtc); } static const struct i2c_device_id abx80x_id[] = { diff --git a/drivers/rtc/rtc-ac100.c b/drivers/rtc/rtc-ac100.c index 29223931aba72..1ddbef99e38f2 100644 --- a/drivers/rtc/rtc-ac100.c +++ b/drivers/rtc/rtc-ac100.c @@ -610,7 +610,7 @@ static int ac100_rtc_probe(struct platform_device *pdev) if (ret) return ret; - return rtc_register_device(chip->rtc); + return devm_rtc_register_device(chip->rtc); } static int ac100_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c index 94d7c22fc4f3a..807a79c07f084 100644 --- a/drivers/rtc/rtc-armada38x.c +++ b/drivers/rtc/rtc-armada38x.c @@ -556,7 +556,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) rtc->rtc_dev->range_max = U32_MAX; - return rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc->rtc_dev); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-aspeed.c b/drivers/rtc/rtc-aspeed.c index eacdd0637cce2..a93352ed3aec9 100644 --- a/drivers/rtc/rtc-aspeed.c +++ b/drivers/rtc/rtc-aspeed.c @@ -104,7 +104,7 @@ static int aspeed_rtc_probe(struct platform_device *pdev) rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900; rtc->rtc_dev->range_max = 38814989399LL; /* 3199-12-31 23:59:59 */ - return rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc->rtc_dev); } static const struct of_device_id aspeed_rtc_match[] = { diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index da24e68adccae..fe396d27ebb71 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -538,7 +538,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev) rtc->range_min = RTC_TIMESTAMP_BEGIN_1900; rtc->range_max = RTC_TIMESTAMP_END_2099; - ret = rtc_register_device(rtc); + ret = devm_rtc_register_device(rtc); if (ret) goto err_clk; diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index e39e89867d293..2216be429ab72 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -431,7 +431,7 @@ static int at91_rtc_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "%s: SET TIME!\n", dev_name(&rtc->rtcdev->dev)); - return rtc_register_device(rtc->rtcdev); + return devm_rtc_register_device(rtc->rtcdev); err_clk: clk_disable_unprepare(rtc->sclk); diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c index 791bebcb6f47f..e6428b27b5d41 100644 --- a/drivers/rtc/rtc-au1xxx.c +++ b/drivers/rtc/rtc-au1xxx.c @@ -104,7 +104,7 @@ static int au1xtoy_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtcdev); - return rtc_register_device(rtcdev); + return devm_rtc_register_device(rtcdev); } static struct platform_driver au1xrtc_driver = { diff --git a/drivers/rtc/rtc-bd70528.c b/drivers/rtc/rtc-bd70528.c index 4492b770422c8..17cb67f5bf6e2 100644 --- a/drivers/rtc/rtc-bd70528.c +++ b/drivers/rtc/rtc-bd70528.c @@ -604,7 +604,7 @@ static int bd70528_probe(struct platform_device *pdev) } } - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static const struct platform_device_id bd718x7_rtc_id[] = { diff --git a/drivers/rtc/rtc-brcmstb-waketimer.c b/drivers/rtc/rtc-brcmstb-waketimer.c index 375a9987a1d68..0366e2ff04ae4 100644 --- a/drivers/rtc/rtc-brcmstb-waketimer.c +++ b/drivers/rtc/rtc-brcmstb-waketimer.c @@ -252,7 +252,7 @@ static int brcmstb_waketmr_probe(struct platform_device *pdev) timer->rtc->ops = &brcmstb_waketmr_ops; timer->rtc->range_max = U32_MAX; - ret = rtc_register_device(timer->rtc); + ret = devm_rtc_register_device(timer->rtc); if (ret) goto err_notifier; diff --git a/drivers/rtc/rtc-cadence.c b/drivers/rtc/rtc-cadence.c index 595d5d252850a..1edf7f16d73af 100644 --- a/drivers/rtc/rtc-cadence.c +++ b/drivers/rtc/rtc-cadence.c @@ -336,7 +336,7 @@ static int cdns_rtc_probe(struct platform_device *pdev) writel(0, crtc->regs + CDNS_RTC_HMR); writel(CDNS_RTC_KRTCR_KRTC, crtc->regs + CDNS_RTC_KRTCR); - ret = rtc_register_device(crtc->rtc_dev); + ret = devm_rtc_register_device(crtc->rtc_dev); if (ret) goto err_disable_wakeup; diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 766074c04b53d..83415600185c1 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -863,7 +863,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) cmos_rtc.rtc->ops = &cmos_rtc_ops_no_alarm; } - retval = rtc_register_device(cmos_rtc.rtc); + retval = devm_rtc_register_device(cmos_rtc.rtc); if (retval) goto cleanup2; diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c index da59917c9ee84..168ced87d93ae 100644 --- a/drivers/rtc/rtc-coh901331.c +++ b/drivers/rtc/rtc-coh901331.c @@ -203,7 +203,7 @@ static int __init coh901331_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtap); - ret = rtc_register_device(rtap->rtc); + ret = devm_rtc_register_device(rtap->rtc); if (ret) goto out_no_rtc; diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index 38d576b0c4faa..afc8fcba8f888 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -301,7 +301,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev) /* ignore error and continue without wakeup support */ } - return rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc->rtc_dev); } static const struct of_device_id cpcap_rtc_of_match[] = { diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index f7343c289cab7..70626793ca694 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -350,7 +350,7 @@ static int cros_ec_rtc_probe(struct platform_device *pdev) cros_ec_rtc->rtc->ops = &cros_ec_rtc_ops; cros_ec_rtc->rtc->range_max = U32_MAX; - ret = rtc_register_device(cros_ec_rtc->rtc); + ret = devm_rtc_register_device(cros_ec_rtc->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index 58de10da37b1a..9ca99bd357022 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -304,7 +304,7 @@ static int da9052_rtc_probe(struct platform_device *pdev) rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rtc->rtc->range_max = RTC_TIMESTAMP_END_2063; - ret = rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c index 6f0a3a7111356..d4b72a9fa2ba8 100644 --- a/drivers/rtc/rtc-da9063.c +++ b/drivers/rtc/rtc-da9063.c @@ -494,7 +494,7 @@ static int da9063_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n", irq_alarm, ret); - return rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc->rtc_dev); } static struct platform_driver da9063_rtc_driver = { diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c index 73f87a17cdf39..6bef0f2353da4 100644 --- a/drivers/rtc/rtc-davinci.c +++ b/drivers/rtc/rtc-davinci.c @@ -484,7 +484,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); - return rtc_register_device(davinci_rtc->rtc); + return devm_rtc_register_device(davinci_rtc->rtc); } static int __exit davinci_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-digicolor.c b/drivers/rtc/rtc-digicolor.c index 200d85b01e8bc..4fdfa5b6feb27 100644 --- a/drivers/rtc/rtc-digicolor.c +++ b/drivers/rtc/rtc-digicolor.c @@ -202,7 +202,7 @@ static int __init dc_rtc_probe(struct platform_device *pdev) rtc->rtc_dev->ops = &dc_rtc_ops; rtc->rtc_dev->range_max = U32_MAX; - return rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc->rtc_dev); } static const struct of_device_id dc_dt_ids[] = { diff --git a/drivers/rtc/rtc-dm355evm.c b/drivers/rtc/rtc-dm355evm.c index cd947a20843b3..94fb16ac3e0fa 100644 --- a/drivers/rtc/rtc-dm355evm.c +++ b/drivers/rtc/rtc-dm355evm.c @@ -132,7 +132,7 @@ static int dm355evm_rtc_probe(struct platform_device *pdev) rtc->ops = &dm355evm_rtc_ops; rtc->range_max = U32_MAX; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } /* diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index a4e768261b431..8c2ab29c3d912 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -694,7 +694,7 @@ static int ds1305_probe(struct spi_device *spi) ds1305->rtc->range_max = RTC_TIMESTAMP_END_2099; ds1305_nvmem_cfg.priv = ds1305; - status = rtc_register_device(ds1305->rtc); + status = devm_rtc_register_device(ds1305->rtc); if (status) return status; diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 216bc5d9b716e..183cf7c01364c 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -2001,7 +2001,7 @@ static int ds1307_probe(struct i2c_client *client, if (err) return err; - err = rtc_register_device(ds1307->rtc); + err = devm_rtc_register_device(ds1307->rtc); if (err) return err; diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index ea663e24a34c4..f14ed6c96437b 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -408,7 +408,7 @@ static int ds1343_probe(struct spi_device *spi) dev_err(&spi->dev, "unable to create sysfs entries for rtc ds1343\n"); - res = rtc_register_device(priv->rtc); + res = devm_rtc_register_device(priv->rtc); if (res) return res; diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c index 7025cf3fb9f8d..157bf5209ac40 100644 --- a/drivers/rtc/rtc-ds1347.c +++ b/drivers/rtc/rtc-ds1347.c @@ -166,7 +166,7 @@ static int ds1347_probe(struct spi_device *spi) rtc->range_min = RTC_TIMESTAMP_BEGIN_0000; rtc->range_max = RTC_TIMESTAMP_END_9999; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct spi_driver ds1347_driver = { diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index 177d870bda0d3..fab79921a7122 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -508,7 +508,7 @@ static int ds1374_probe(struct i2c_client *client, ds1374->rtc->ops = &ds1374_rtc_ops; ds1374->rtc->range_max = U32_MAX; - ret = rtc_register_device(ds1374->rtc); + ret = devm_rtc_register_device(ds1374->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index d5f48216e851d..bda8843330821 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c @@ -466,7 +466,7 @@ static int ds1511_rtc_probe(struct platform_device *pdev) pdata->rtc->ops = &ds1511_rtc_ops; - ret = rtc_register_device(pdata->rtc); + ret = devm_rtc_register_device(pdata->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index bb40ea8b63735..dbff5b621ef5c 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -295,7 +295,7 @@ static int ds1553_rtc_probe(struct platform_device *pdev) pdata->rtc->ops = &ds1553_rtc_ops; - ret = rtc_register_device(pdata->rtc); + ret = devm_rtc_register_device(pdata->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 9da84df9f1520..630493759d15d 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -124,7 +124,7 @@ static int ds1672_probe(struct i2c_client *client, rtc->ops = &ds1672_rtc_ops; rtc->range_max = U32_MAX; - err = rtc_register_device(rtc); + err = devm_rtc_register_device(rtc); if (err) return err; diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index bef588fce2660..d69c807af29b6 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -1321,7 +1321,7 @@ ds1685_rtc_probe(struct platform_device *pdev) if (ret) return ret; - return rtc_register_device(rtc_dev); + return devm_rtc_register_device(rtc_dev); } /** diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 39c6c3a85b347..13d45c697da68 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -191,7 +191,7 @@ static int ds1742_rtc_probe(struct platform_device *pdev) rtc->ops = &ds1742_rtc_ops; - ret = rtc_register_device(rtc); + ret = devm_rtc_register_device(rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c index 9df0c44512b8a..0480f592307e9 100644 --- a/drivers/rtc/rtc-ds2404.c +++ b/drivers/rtc/rtc-ds2404.c @@ -234,7 +234,7 @@ static int rtc_probe(struct platform_device *pdev) chip->rtc->ops = &ds2404_rtc_ops; chip->rtc->range_max = U32_MAX; - retval = rtc_register_device(chip->rtc); + retval = devm_rtc_register_device(chip->rtc); if (retval) return retval; diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 8ec9ea1ca72e1..9a5a15cbcd9b7 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -145,7 +145,7 @@ static int ep93xx_rtc_probe(struct platform_device *pdev) if (err) return err; - return rtc_register_device(ep93xx_rtc->rtc); + return devm_rtc_register_device(ep93xx_rtc->rtc); } static struct platform_driver ep93xx_rtc_driver = { diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c index 48d3b38ea3480..57cc09d0a8067 100644 --- a/drivers/rtc/rtc-fsl-ftm-alarm.c +++ b/drivers/rtc/rtc-fsl-ftm-alarm.c @@ -290,7 +290,7 @@ static int ftm_rtc_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); - ret = rtc_register_device(rtc->rtc_dev); + ret = devm_rtc_register_device(rtc->rtc_dev); if (ret) { dev_err(&pdev->dev, "can't register rtc device\n"); return ret; diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c index 0919f7dc94a34..ad3add5db4c82 100644 --- a/drivers/rtc/rtc-ftrtc010.c +++ b/drivers/rtc/rtc-ftrtc010.c @@ -176,7 +176,7 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev) if (unlikely(ret)) return ret; - return rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc->rtc_dev); } static int ftrtc010_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-goldfish.c b/drivers/rtc/rtc-goldfish.c index 6349d2cd36805..7ab95d0526440 100644 --- a/drivers/rtc/rtc-goldfish.c +++ b/drivers/rtc/rtc-goldfish.c @@ -194,7 +194,7 @@ static int goldfish_rtc_probe(struct platform_device *pdev) if (err) return err; - return rtc_register_device(rtcdrv->rtc); + return devm_rtc_register_device(rtcdrv->rtc); } static const struct of_device_id goldfish_rtc_of_match[] = { diff --git a/drivers/rtc/rtc-imx-sc.c b/drivers/rtc/rtc-imx-sc.c index a5f59e6f862e0..cc9fbab499994 100644 --- a/drivers/rtc/rtc-imx-sc.c +++ b/drivers/rtc/rtc-imx-sc.c @@ -166,7 +166,7 @@ static int imx_sc_rtc_probe(struct platform_device *pdev) imx_sc_rtc->range_min = 0; imx_sc_rtc->range_max = U32_MAX; - ret = rtc_register_device(imx_sc_rtc); + ret = devm_rtc_register_device(imx_sc_rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 8d141d8a5490b..c2692da74e098 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -814,7 +814,7 @@ static int __init dryice_rtc_probe(struct platform_device *pdev) imxdi->rtc->ops = &dryice_rtc_ops; imxdi->rtc->range_max = U32_MAX; - rc = rtc_register_device(imxdi->rtc); + rc = devm_rtc_register_device(imxdi->rtc); if (rc) goto err; diff --git a/drivers/rtc/rtc-isl12026.c b/drivers/rtc/rtc-isl12026.c index fff8d82536698..1fc6627d854d3 100644 --- a/drivers/rtc/rtc-isl12026.c +++ b/drivers/rtc/rtc-isl12026.c @@ -469,7 +469,7 @@ static int isl12026_probe_new(struct i2c_client *client) if (ret) return ret; - return rtc_register_device(priv->rtc); + return devm_rtc_register_device(priv->rtc); } static int isl12026_remove(struct i2c_client *client) diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index 08d778b10e9ec..563a6d9c9fcfd 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -894,7 +894,7 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id) if (rc) return rc; - return rtc_register_device(isl1208->rtc); + return devm_rtc_register_device(isl1208->rtc); } static struct i2c_driver isl1208_driver = { diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index 9607e6b6e0b32..6e51df72fd658 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -375,7 +375,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev) /* Each 1 Hz pulse should happen after (rate) ticks */ jz4740_rtc_reg_write(rtc, JZ_REG_RTC_REGULATOR, rate - 1); - ret = rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 15d8abda81fe5..76ad7031a13dd 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c @@ -239,7 +239,7 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) rtc->rtc->ops = &lpc32xx_rtc_ops; rtc->rtc->range_max = U32_MAX; - err = rtc_register_device(rtc->rtc); + err = devm_rtc_register_device(rtc->rtc); if (err) return err; diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c index 8bd34056fea0a..5af26dc5c2a39 100644 --- a/drivers/rtc/rtc-ls1x.c +++ b/drivers/rtc/rtc-ls1x.c @@ -176,7 +176,7 @@ static int ls1x_rtc_probe(struct platform_device *pdev) rtcdev->range_min = RTC_TIMESTAMP_BEGIN_1900; rtcdev->range_max = RTC_TIMESTAMP_END_2099; - return rtc_register_device(rtcdev); + return devm_rtc_register_device(rtcdev); } static struct platform_driver ls1x_rtc_driver = { diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 8a89bc52b0d45..160dcf68e64eb 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -977,7 +977,7 @@ static int m41t80_probe(struct i2c_client *client, m41t80_sqw_register_clk(m41t80_data); #endif - rc = rtc_register_device(m41t80_data->rtc); + rc = devm_rtc_register_device(m41t80_data->rtc); if (rc) return rc; diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index e966a66ab2d35..5f5898d3b055c 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -470,7 +470,7 @@ static int m48t59_rtc_probe(struct platform_device *pdev) if (ret) return ret; - ret = rtc_register_device(m48t59->rtc); + ret = devm_rtc_register_device(m48t59->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index 182cfe59e4e04..481c9525b1ddb 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -255,7 +255,7 @@ static int m48t86_rtc_probe(struct platform_device *pdev) info->rtc->ops = &m48t86_rtc_ops; - err = rtc_register_device(info->rtc); + err = devm_rtc_register_device(info->rtc); if (err) return err; diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index d6802e6191cbe..d4234e78497e0 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -307,7 +307,7 @@ static int __init mc13xxx_rtc_probe(struct platform_device *pdev) mc13xxx_unlock(mc13xxx); - ret = rtc_register_device(priv->rtc); + ret = devm_rtc_register_device(priv->rtc); if (ret) { mc13xxx_lock(mc13xxx); goto err_irq_request; diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c index e6bd0808a092b..1463c86215615 100644 --- a/drivers/rtc/rtc-meson-vrtc.c +++ b/drivers/rtc/rtc-meson-vrtc.c @@ -83,7 +83,7 @@ static int meson_vrtc_probe(struct platform_device *pdev) return PTR_ERR(vrtc->rtc); vrtc->rtc->ops = &meson_vrtc_ops; - return rtc_register_device(vrtc->rtc); + return devm_rtc_register_device(vrtc->rtc); } static int __maybe_unused meson_vrtc_suspend(struct device *dev) diff --git a/drivers/rtc/rtc-meson.c b/drivers/rtc/rtc-meson.c index 938267713a4dc..8642c06565ea0 100644 --- a/drivers/rtc/rtc-meson.c +++ b/drivers/rtc/rtc-meson.c @@ -369,7 +369,7 @@ static int meson_rtc_probe(struct platform_device *pdev) if (ret) goto out_disable_vdd; - ret = rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc->rtc); if (ret) goto out_disable_vdd; diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index 5c2ce71aa044d..bb2ea9bc56f2c 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -371,7 +371,7 @@ static int mpc5121_rtc_probe(struct platform_device *op) rtc->rtc->range_max = U32_MAX; } - err = rtc_register_device(rtc->rtc); + err = devm_rtc_register_device(rtc->rtc); if (err) goto out_dispose2; diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index 17bf5394e1e51..421b3b6071b69 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -361,7 +361,7 @@ static int vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, } } - retval = rtc_register_device(mrst_rtc.rtc); + retval = devm_rtc_register_device(mrst_rtc.rtc); if (retval) goto cleanup0; diff --git a/drivers/rtc/rtc-mt2712.c b/drivers/rtc/rtc-mt2712.c index d5f691c8a0358..cd92a9788351d 100644 --- a/drivers/rtc/rtc-mt2712.c +++ b/drivers/rtc/rtc-mt2712.c @@ -352,7 +352,7 @@ static int mt2712_rtc_probe(struct platform_device *pdev) mt2712_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; mt2712_rtc->rtc->range_max = MT2712_RTC_TIMESTAMP_END_2127; - return rtc_register_device(mt2712_rtc->rtc); + return devm_rtc_register_device(mt2712_rtc->rtc); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 1894aded4c857..6655035e5164d 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -301,7 +301,7 @@ static int mtk_rtc_probe(struct platform_device *pdev) rtc->rtc_dev->ops = &mtk_rtc_ops; - return rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc->rtc_dev); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c index d5f190e578e4d..f8e2ecea1d8df 100644 --- a/drivers/rtc/rtc-mv.c +++ b/drivers/rtc/rtc-mv.c @@ -278,7 +278,7 @@ static int __init mv_rtc_probe(struct platform_device *pdev) pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; pdata->rtc->range_max = RTC_TIMESTAMP_END_2099; - ret = rtc_register_device(pdata->rtc); + ret = devm_rtc_register_device(pdata->rtc); if (!ret) return 0; out: diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 0d253ce3a8f5e..65b29b0fa5480 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -408,7 +408,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to enable irq wake\n"); } - ret = rtc_register_device(rtc); + ret = devm_rtc_register_device(rtc); return ret; } diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c index 91534560fe2a2..0d73f6f0cf9e4 100644 --- a/drivers/rtc/rtc-mxc_v2.c +++ b/drivers/rtc/rtc-mxc_v2.c @@ -354,7 +354,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) return ret; } - ret = rtc_register_device(pdata->rtc); + ret = devm_rtc_register_device(pdata->rtc); if (ret < 0) clk_unprepare(pdata->clk); diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index e65f79fc77180..dc7db2477f883 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -886,7 +886,7 @@ static int omap_rtc_probe(struct platform_device *pdev) goto err; } - ret = rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc->rtc); if (ret) goto err; diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c index 178bfb1dea214..8c7a98a5452c6 100644 --- a/drivers/rtc/rtc-pcap.c +++ b/drivers/rtc/rtc-pcap.c @@ -163,7 +163,7 @@ static int __init pcap_rtc_probe(struct platform_device *pdev) if (err) return err; - return rtc_register_device(pcap_rtc->rtc); + return devm_rtc_register_device(pcap_rtc->rtc); } static int __exit pcap_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index c3691fa4210e5..534ffc91eec15 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -434,7 +434,7 @@ static int pcf2123_probe(struct spi_device *spi) rtc->range_max = RTC_TIMESTAMP_END_2099; rtc->set_start_time = true; - ret = rtc_register_device(rtc); + ret = devm_rtc_register_device(rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 432cd627359b3..33fa8b17b79c4 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -682,7 +682,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, return ret; } - return rtc_register_device(pcf2127->rtc); + return devm_rtc_register_device(pcf2127->rtc); } #ifdef CONFIG_OF diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index c19f139e9b8db..e19cf2adbc355 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -614,7 +614,7 @@ static int pcf85063_probe(struct i2c_client *client) pcf85063_clkout_register_clk(pcf85063); #endif - return rtc_register_device(pcf85063->rtc); + return devm_rtc_register_device(pcf85063->rtc); } #ifdef CONFIG_OF diff --git a/drivers/rtc/rtc-pcf85363.c b/drivers/rtc/rtc-pcf85363.c index 23cf14ca2c96e..a574c8d15a5c3 100644 --- a/drivers/rtc/rtc-pcf85363.c +++ b/drivers/rtc/rtc-pcf85363.c @@ -418,7 +418,7 @@ static int pcf85363_probe(struct i2c_client *client, pcf85363->rtc->ops = &rtc_ops_alarm; } - ret = rtc_register_device(pcf85363->rtc); + ret = devm_rtc_register_device(pcf85363->rtc); for (i = 0; i < config->num_nvram; i++) { nvmem_cfg[i].priv = pcf85363; diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 2dc30eafa6396..de3e6c355f2e6 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -582,7 +582,7 @@ static int pcf8563_probe(struct i2c_client *client, } } - err = rtc_register_device(pcf8563->rtc); + err = devm_rtc_register_device(pcf8563->rtc); if (err) return err; diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c index 2b69467446548..7fb9145c43bd5 100644 --- a/drivers/rtc/rtc-pic32.c +++ b/drivers/rtc/rtc-pic32.c @@ -338,7 +338,7 @@ static int pic32_rtc_probe(struct platform_device *pdev) pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; pdata->rtc->range_max = RTC_TIMESTAMP_END_2099; - ret = rtc_register_device(pdata->rtc); + ret = devm_rtc_register_device(pdata->rtc); if (ret) goto err_nortc; diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index ebe03eba8f5ff..5a880516f3e81 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -121,7 +121,7 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) if (ret) goto err_irq; - ret = rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc->rtc); if (ret) goto err_reg; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index d4b2ab7861266..224bbf0962622 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -370,7 +370,7 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) ldata->rtc->range_min = vendor->range_min; ldata->rtc->range_max = vendor->range_max; - ret = rtc_register_device(ldata->rtc); + ret = devm_rtc_register_device(ldata->rtc); if (ret) goto out; diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index b45ee2cb2c044..0d9dd6faabbaa 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -508,7 +508,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) return rc; } - return rtc_register_device(rtc_dd->rtc); + return devm_rtc_register_device(rtc_dd->rtc); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c index f0336d691e6cf..6b098734c7155 100644 --- a/drivers/rtc/rtc-ps3.c +++ b/drivers/rtc/rtc-ps3.c @@ -56,7 +56,7 @@ static int __init ps3_rtc_probe(struct platform_device *dev) platform_set_drvdata(dev, rtc); - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct platform_driver ps3_rtc_driver = { diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index 7ceb968f0e445..60a3c3d7499b0 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -127,7 +127,7 @@ static int r9701_probe(struct spi_device *spi) rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rtc->range_max = RTC_TIMESTAMP_END_2099; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct spi_driver r9701_driver = { diff --git a/drivers/rtc/rtc-rc5t619.c b/drivers/rtc/rtc-rc5t619.c index dd1a20977478e..e73102a39f1b8 100644 --- a/drivers/rtc/rtc-rc5t619.c +++ b/drivers/rtc/rtc-rc5t619.c @@ -426,7 +426,7 @@ static int rc5t619_rtc_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "rc5t619 interrupt is disabled\n"); } - return rtc_register_device(rtc->rtc); + return devm_rtc_register_device(rtc->rtc); } static struct platform_driver rc5t619_rtc_driver = { diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index c0334c602e88e..e920da8c08da1 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -447,7 +447,7 @@ static int rk808_rtc_probe(struct platform_device *pdev) return ret; } - return rtc_register_device(rk808_rtc->rtc); + return devm_rtc_register_device(rk808_rtc->rtc); } static struct platform_driver rk808_rtc_driver = { diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c index 8bc476c0905f2..44afa6d996e74 100644 --- a/drivers/rtc/rtc-rp5c01.c +++ b/drivers/rtc/rtc-rp5c01.c @@ -259,7 +259,7 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) if (error) return error; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct platform_driver rp5c01_rtc_driver = { diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c index 47c13678449e2..fec633f80789d 100644 --- a/drivers/rtc/rtc-rs5c348.c +++ b/drivers/rtc/rtc-rs5c348.c @@ -197,7 +197,7 @@ static int rs5c348_probe(struct spi_device *spi) rtc->ops = &rs5c348_rtc_ops; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct spi_driver rs5c348_driver = { diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index f788df9797504..979407a51c7a9 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -886,7 +886,7 @@ static int rv3028_probe(struct i2c_client *client) rv3028->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv3028->rtc->range_max = RTC_TIMESTAMP_END_2099; rv3028->rtc->ops = &rv3028_rtc_ops; - ret = rtc_register_device(rv3028->rtc); + ret = devm_rtc_register_device(rv3028->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index ad359b3b74b25..dc1bda62095e6 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -750,7 +750,7 @@ static int rv3029_probe(struct device *dev, struct regmap *regmap, int irq, rv3029->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv3029->rtc->range_max = RTC_TIMESTAMP_END_2079; - rc = rtc_register_device(rv3029->rtc); + rc = devm_rtc_register_device(rv3029->rtc); if (rc) return rc; diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index ed9cba3292e68..c9bcea7277579 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -885,7 +885,7 @@ static int rv3032_probe(struct i2c_client *client) rv3032->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv3032->rtc->range_max = RTC_TIMESTAMP_END_2099; rv3032->rtc->ops = &rv3032_rtc_ops; - ret = rtc_register_device(rv3032->rtc); + ret = devm_rtc_register_device(rv3032->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 44e1818a751cc..d4ea6db51b263 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -587,7 +587,7 @@ static int rv8803_probe(struct i2c_client *client, rv8803->rtc->ops = &rv8803_rtc_ops; rv8803->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv8803->rtc->range_max = RTC_TIMESTAMP_END_2099; - err = rtc_register_device(rv8803->rtc); + err = devm_rtc_register_device(rv8803->rtc); if (err) return err; diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index dca41a2a39b23..8340ab47a0597 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -419,7 +419,7 @@ static int rx8010_probe(struct i2c_client *client) rx8010->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rx8010->rtc->range_max = RTC_TIMESTAMP_END_2099; - return rtc_register_device(rx8010->rtc); + return devm_rtc_register_device(rx8010->rtc); } static struct i2c_driver rx8010_driver = { diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c index 017f74721cc06..de109139529b4 100644 --- a/drivers/rtc/rtc-rx8581.c +++ b/drivers/rtc/rtc-rx8581.c @@ -298,7 +298,7 @@ static int rx8581_probe(struct i2c_client *client, rx8581->rtc->start_secs = 0; rx8581->rtc->set_start_time = true; - ret = rtc_register_device(rx8581->rtc); + ret = devm_rtc_register_device(rx8581->rtc); for (i = 0; i < config->num_nvram; i++) { nvmem_cfg[i].priv = rx8581; diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 03672a2463569..ea15d0392bb9c 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -497,7 +497,7 @@ static int s35390a_probe(struct i2c_client *client, if (status1 & S35390A_FLAG_INT2) rtc_update_irq(s35390a->rtc, 1, RTC_AF); - return rtc_register_device(s35390a->rtc); + return devm_rtc_register_device(s35390a->rtc); } static struct i2c_driver s35390a_driver = { diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 9ccc97cf5e095..1250887e43827 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -205,7 +205,7 @@ int sa1100_rtc_init(struct platform_device *pdev, struct sa1100_rtc *info) info->rtc->max_user_freq = RTC_FREQ; info->rtc->range_max = U32_MAX; - ret = rtc_register_device(info->rtc); + ret = devm_rtc_register_device(info->rtc); if (ret) { clk_disable_unprepare(info->clk); return ret; diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c index a953bc0a5a5ba..187aa955b79cf 100644 --- a/drivers/rtc/rtc-sc27xx.c +++ b/drivers/rtc/rtc-sc27xx.c @@ -618,7 +618,7 @@ static int sprd_rtc_probe(struct platform_device *pdev) rtc->rtc->ops = &sprd_rtc_ops; rtc->rtc->range_min = 0; rtc->rtc->range_max = 5662310399LL; - ret = rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc->rtc); if (ret) { device_init_wakeup(&pdev->dev, 0); return ret; diff --git a/drivers/rtc/rtc-sd3078.c b/drivers/rtc/rtc-sd3078.c index a7aa943c11837..f6bee69ba017c 100644 --- a/drivers/rtc/rtc-sd3078.c +++ b/drivers/rtc/rtc-sd3078.c @@ -192,7 +192,7 @@ static int sd3078_probe(struct i2c_client *client, sd3078->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; sd3078->rtc->range_max = RTC_TIMESTAMP_END_2099; - ret = rtc_register_device(sd3078->rtc); + ret = devm_rtc_register_device(sd3078->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index 9167b48014a15..cd146b5741431 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -607,7 +607,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev) rtc->rtc_dev->range_max = mktime64(2098, 12, 31, 23, 59, 59); } - ret = rtc_register_device(rtc->rtc_dev); + ret = devm_rtc_register_device(rtc->rtc_dev); if (ret) goto err_unmap; diff --git a/drivers/rtc/rtc-sirfsoc.c b/drivers/rtc/rtc-sirfsoc.c index abf19435dbad0..03a6cca23201a 100644 --- a/drivers/rtc/rtc-sirfsoc.c +++ b/drivers/rtc/rtc-sirfsoc.c @@ -356,7 +356,7 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev) return err; } - return rtc_register_device(rtcdrv->rtc); + return devm_rtc_register_device(rtcdrv->rtc); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index a7d39a49b7486..bd929b0e7d7de 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -387,7 +387,7 @@ static int snvs_rtc_probe(struct platform_device *pdev) data->rtc->ops = &snvs_rtc_ops; data->rtc->range_max = U32_MAX; - return rtc_register_device(data->rtc); + return devm_rtc_register_device(data->rtc); } static int __maybe_unused snvs_rtc_suspend_noirq(struct device *dev) diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c index 0c65448b85eef..bdb20f63254e2 100644 --- a/drivers/rtc/rtc-st-lpc.c +++ b/drivers/rtc/rtc-st-lpc.c @@ -250,7 +250,7 @@ static int st_rtc_probe(struct platform_device *pdev) rtc->rtc_dev->range_max = U64_MAX; do_div(rtc->rtc_dev->range_max, rtc->clkrate); - ret = rtc_register_device(rtc->rtc_dev); + ret = devm_rtc_register_device(rtc->rtc_dev); if (ret) { clk_disable_unprepare(rtc->clk); return ret; diff --git a/drivers/rtc/rtc-starfire.c b/drivers/rtc/rtc-starfire.c index 37a26279e1072..fbd1ed41cbf1e 100644 --- a/drivers/rtc/rtc-starfire.c +++ b/drivers/rtc/rtc-starfire.c @@ -48,7 +48,7 @@ static int __init starfire_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct platform_driver starfire_rtc_driver = { diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index ad616bce7bca3..7cb6be1b78151 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c @@ -317,7 +317,7 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev) if (ret) return ret; - return rtc_register_device(pdata->rtc); + return devm_rtc_register_device(pdata->rtc); } /* work with hotplug and coldplug */ diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index 0a969af80af7c..40c0f7ed36e06 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c @@ -366,7 +366,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) rtc_data->rtc->ops = &stmp3xxx_rtc_ops; rtc_data->rtc->range_max = U32_MAX; - err = rtc_register_device(rtc_data->rtc); + err = devm_rtc_register_device(rtc_data->rtc); if (err) return err; diff --git a/drivers/rtc/rtc-sun4v.c b/drivers/rtc/rtc-sun4v.c index 036463dfa103a..a86e27de8c06f 100644 --- a/drivers/rtc/rtc-sun4v.c +++ b/drivers/rtc/rtc-sun4v.c @@ -86,7 +86,7 @@ static int __init sun4v_rtc_probe(struct platform_device *pdev) rtc->range_max = U64_MAX; platform_set_drvdata(pdev, rtc); - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct platform_driver sun4v_rtc_driver = { diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index f2818cdd11d82..adec1b14a8deb 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -726,7 +726,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev) chip->rtc->ops = &sun6i_rtc_ops; chip->rtc->range_max = 2019686399LL; /* 2033-12-31 23:59:59 */ - ret = rtc_register_device(chip->rtc); + ret = devm_rtc_register_device(chip->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-sunxi.c b/drivers/rtc/rtc-sunxi.c index f5d7f44550ce9..5d019e3a835ad 100644 --- a/drivers/rtc/rtc-sunxi.c +++ b/drivers/rtc/rtc-sunxi.c @@ -470,7 +470,7 @@ static int sunxi_rtc_probe(struct platform_device *pdev) chip->rtc->ops = &sunxi_rtc_ops; - return rtc_register_device(chip->rtc); + return devm_rtc_register_device(chip->rtc); } static struct platform_driver sunxi_rtc_driver = { diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 7fbb1741692f6..8925015cc698c 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -329,7 +329,7 @@ static int tegra_rtc_probe(struct platform_device *pdev) goto disable_clk; } - ret = rtc_register_device(info->rtc); + ret = devm_rtc_register_device(info->rtc); if (ret) goto disable_clk; diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 74b3a0603b738..b092a16485139 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -139,7 +139,7 @@ static int test_probe(struct platform_device *plat_dev) timer_setup(&rtd->alarm, test_rtc_alarm_handler, 0); rtd->alarm.expires = 0; - return rtc_register_device(rtd->rtc); + return devm_rtc_register_device(rtd->rtc); } static struct platform_driver test_driver = { diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index e39af2d670510..a980337c30658 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -280,7 +280,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) goto fail_rtc_register; } - ret = rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc->rtc); if (ret) goto fail_rtc_register; diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index e3840386f430c..2d87b62826a83 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -434,7 +434,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev) tps_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; tps_rtc->rtc->range_max = RTC_TIMESTAMP_END_2099; - return rtc_register_device(tps_rtc->rtc); + return devm_rtc_register_device(tps_rtc->rtc); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c index 11f46272bad34..c3309db5448d8 100644 --- a/drivers/rtc/rtc-tx4939.c +++ b/drivers/rtc/rtc-tx4939.c @@ -275,7 +275,7 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev) if (ret) return ret; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static int __exit tx4939_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index c3671043ace76..5a9f9ad86d32f 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -335,7 +335,7 @@ static int rtc_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Real Time Clock of NEC VR4100 series\n"); - retval = rtc_register_device(rtc); + retval = devm_rtc_register_device(rtc); if (retval) goto err_iounmap_all; diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c index e2588625025fd..197b649cd6293 100644 --- a/drivers/rtc/rtc-vt8500.c +++ b/drivers/rtc/rtc-vt8500.c @@ -232,7 +232,7 @@ static int vt8500_rtc_probe(struct platform_device *pdev) return ret; } - return rtc_register_device(vt8500_rtc->rtc); + return devm_rtc_register_device(vt8500_rtc->rtc); } static int vt8500_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-wilco-ec.c b/drivers/rtc/rtc-wilco-ec.c index ff46066a68a4c..2a205a6464521 100644 --- a/drivers/rtc/rtc-wilco-ec.c +++ b/drivers/rtc/rtc-wilco-ec.c @@ -176,7 +176,7 @@ static int wilco_ec_rtc_probe(struct platform_device *pdev) rtc->range_max = RTC_TIMESTAMP_END_2099; rtc->owner = THIS_MODULE; - return rtc_register_device(rtc); + return devm_rtc_register_device(rtc); } static struct platform_driver wilco_ec_rtc_driver = { diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c index ccef887d26901..640833e210575 100644 --- a/drivers/rtc/rtc-wm831x.c +++ b/drivers/rtc/rtc-wm831x.c @@ -429,7 +429,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev) wm831x_rtc->rtc->ops = &wm831x_rtc_ops; wm831x_rtc->rtc->range_max = U32_MAX; - ret = rtc_register_device(wm831x_rtc->rtc); + ret = devm_rtc_register_device(wm831x_rtc->rtc); if (ret) return ret; diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c index 96db441f92b3e..cf68a9b1c9ebb 100644 --- a/drivers/rtc/rtc-xgene.c +++ b/drivers/rtc/rtc-xgene.c @@ -185,7 +185,7 @@ static int xgene_rtc_probe(struct platform_device *pdev) pdata->rtc->ops = &xgene_rtc_ops; pdata->rtc->range_max = U32_MAX; - ret = rtc_register_device(pdata->rtc); + ret = devm_rtc_register_device(pdata->rtc); if (ret) { clk_disable_unprepare(pdata->clk); return ret; diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c index 4b1077e2f826e..f440bb52be927 100644 --- a/drivers/rtc/rtc-zynqmp.c +++ b/drivers/rtc/rtc-zynqmp.c @@ -264,7 +264,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - return rtc_register_device(xrtcdev->rtc); + return devm_rtc_register_device(xrtcdev->rtc); } static int xlnx_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index 950fac0d41ff9..8a957d31a1a4a 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -317,8 +317,6 @@ int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps) size_t old_cnt = 0, add_cnt = 0, new_cnt; const struct attribute_group **groups, **old; - if (rtc->registered) - return -EINVAL; if (!grps) return -EINVAL; diff --git a/include/linux/rtc.h b/include/linux/rtc.h index cbca651d8ca4d..55e7beed066c7 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -118,8 +118,6 @@ struct rtc_device { */ long set_offset_nsec; - bool registered; - time64_t range_min; timeu64_t range_max; time64_t start_secs; @@ -157,7 +155,7 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev, const struct rtc_class_ops *ops, struct module *owner); struct rtc_device *devm_rtc_allocate_device(struct device *dev); -int __rtc_register_device(struct module *owner, struct rtc_device *rtc); +int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); @@ -234,8 +232,8 @@ static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec, return false; } -#define rtc_register_device(device) \ - __rtc_register_device(THIS_MODULE, device) +#define devm_rtc_register_device(device) \ + __devm_rtc_register_device(THIS_MODULE, device) #ifdef CONFIG_RTC_HCTOSYS_DEVICE extern int rtc_hctosys_ret; -- GitLab From 1bfc485b73579bff5326ac481fd9be7e24a5d5d1 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:09 +0100 Subject: [PATCH 0231/1894] rtc: shrink devm_rtc_allocate_device() We don't need to use devres_alloc() & devres_add() manually if all we want to manage is a single pointer. We can shrink the code by using devm_add_action_or_reset() instead. The number of allocations stays the same. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-9-brgl@bgdev.pl --- drivers/rtc/class.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index b8a34ee039ad9..a1b3711aaf017 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -337,48 +337,37 @@ static void devm_rtc_unregister_device(void *data) put_device(&rtc->dev); } -static void devm_rtc_release_device(struct device *dev, void *res) +static void devm_rtc_release_device(void *res) { - struct rtc_device *rtc = *(struct rtc_device **)res; + struct rtc_device *rtc = res; put_device(&rtc->dev); } struct rtc_device *devm_rtc_allocate_device(struct device *dev) { - struct rtc_device **ptr, *rtc; + struct rtc_device *rtc; int id, err; id = rtc_device_get_id(dev); if (id < 0) return ERR_PTR(id); - ptr = devres_alloc(devm_rtc_release_device, sizeof(*ptr), GFP_KERNEL); - if (!ptr) { - err = -ENOMEM; - goto exit_ida; - } - rtc = rtc_allocate_device(); if (!rtc) { - err = -ENOMEM; - goto exit_devres; + ida_simple_remove(&rtc_ida, id); + return ERR_PTR(-ENOMEM); } - *ptr = rtc; - devres_add(dev, ptr); - rtc->id = id; rtc->dev.parent = dev; dev_set_name(&rtc->dev, "rtc%d", id); - return rtc; + err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc); + if (err) + return ERR_PTR(err); -exit_devres: - devres_free(ptr); -exit_ida: - ida_simple_remove(&rtc_ida, id); - return ERR_PTR(err); + return rtc; } EXPORT_SYMBOL_GPL(devm_rtc_allocate_device); -- GitLab From 0d6d7a390b32ef23d957960d3bb8586a49d6af7c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Nov 2020 10:42:05 +0100 Subject: [PATCH 0232/1894] rtc: destroy mutex when releasing the device Not destroying mutexes doesn't lead to resource leak but it's the correct thing to do for mutex debugging accounting. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201110094205.8972-1-brgl@bgdev.pl --- drivers/rtc/class.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index a1b3711aaf017..e6b44b7c4ad3a 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -28,6 +28,7 @@ static void rtc_device_release(struct device *dev) struct rtc_device *rtc = to_rtc_device(dev); ida_simple_remove(&rtc_ida, rtc->id); + mutex_destroy(&rtc->ops_lock); kfree(rtc); } -- GitLab From 886144058d53db85b269256922e71b5462c53c60 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 18 Nov 2020 01:27:45 +0100 Subject: [PATCH 0233/1894] rtc: pcf8523: switch to devm_rtc_allocate_device Switch to devm_rtc_allocate_device/devm_rtc_register_device, this allows for further improvement of the driver. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201118002747.1346504-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 57d351dfe2723..b525998cd70e8 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -356,12 +356,13 @@ static int pcf8523_probe(struct i2c_client *client, if (err < 0) return err; - rtc = devm_rtc_device_register(&client->dev, DRIVER_NAME, - &pcf8523_rtc_ops, THIS_MODULE); + rtc = devm_rtc_allocate_device(&client->dev); if (IS_ERR(rtc)) return PTR_ERR(rtc); - return 0; + rtc->ops = &pcf8523_rtc_ops; + + return devm_rtc_register_device(rtc); } static const struct i2c_device_id pcf8523_id[] = { -- GitLab From 219cc0f9189759cf6e22a935c20df3654331037f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 18 Nov 2020 01:27:46 +0100 Subject: [PATCH 0234/1894] rtc: pcf8523: set range Set the th RTC range, it is a classic BCD RTC, considering 00 as a leap year. Let the core handle range checking. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201118002747.1346504-2-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index b525998cd70e8..d5f913cb2ec9f 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -226,17 +226,6 @@ static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm) u8 regs[8]; int err; - /* - * The hardware can only store values between 0 and 99 in it's YEAR - * register (with 99 overflowing to 0 on increment). - * After 2100-02-28 we could start interpreting the year to be in the - * interval [2100, 2199], but there is no path to switch in a smooth way - * because the chip handles YEAR=0x00 (and the out-of-spec - * YEAR=0xa0) as a leap year, but 2100 isn't. - */ - if (tm->tm_year < 100 || tm->tm_year >= 200) - return -EINVAL; - err = pcf8523_stop_rtc(client); if (err < 0) return err; @@ -361,6 +350,8 @@ static int pcf8523_probe(struct i2c_client *client, return PTR_ERR(rtc); rtc->ops = &pcf8523_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; return devm_rtc_register_device(rtc); } -- GitLab From 673536cc5f21b34785e386dd05510659bf6d92db Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 18 Nov 2020 01:27:47 +0100 Subject: [PATCH 0235/1894] rtc: pcf8523: use BIT Use the BIT macro to define register bits. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201118002747.1346504-3-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index d5f913cb2ec9f..5e1e7b2a8c9a1 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -12,18 +12,18 @@ #define DRIVER_NAME "rtc-pcf8523" #define REG_CONTROL1 0x00 -#define REG_CONTROL1_CAP_SEL (1 << 7) -#define REG_CONTROL1_STOP (1 << 5) +#define REG_CONTROL1_CAP_SEL BIT(7) +#define REG_CONTROL1_STOP BIT(5) #define REG_CONTROL3 0x02 -#define REG_CONTROL3_PM_BLD (1 << 7) /* battery low detection disabled */ -#define REG_CONTROL3_PM_VDD (1 << 6) /* switch-over disabled */ -#define REG_CONTROL3_PM_DSM (1 << 5) /* direct switching mode */ +#define REG_CONTROL3_PM_BLD BIT(7) /* battery low detection disabled */ +#define REG_CONTROL3_PM_VDD BIT(6) /* switch-over disabled */ +#define REG_CONTROL3_PM_DSM BIT(5) /* direct switching mode */ #define REG_CONTROL3_PM_MASK 0xe0 -#define REG_CONTROL3_BLF (1 << 2) /* battery low bit, read-only */ +#define REG_CONTROL3_BLF BIT(2) /* battery low bit, read-only */ #define REG_SECONDS 0x03 -#define REG_SECONDS_OS (1 << 7) +#define REG_SECONDS_OS BIT(7) #define REG_MINUTES 0x04 #define REG_HOURS 0x05 -- GitLab From 33dd3574f5fef57c2c6caccf98925d63aa2a8d09 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 10:59:43 -0700 Subject: [PATCH 0236/1894] kvm: x86/mmu: Add existing trace points to TDP MMU The TDP MMU was initially implemented without some of the usual tracepoints found in mmu.c. Correct this discrepancy by adding the missing trace points to the TDP MMU. Tested: ran the demand paging selftest on an Intel Skylake machine with all the trace points used by the TDP MMU enabled and observed them firing with expected values. This patch can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/3812 Signed-off-by: Ben Gardon Message-Id: <20201027175944.1183301-1-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index cffa51c6049ee..d6354f3795549 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -7,6 +7,8 @@ #include "tdp_mmu.h" #include "spte.h" +#include + #ifdef CONFIG_X86_64 static bool __read_mostly tdp_mmu_enabled = false; module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); @@ -108,6 +110,8 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, sp->gfn = gfn; sp->tdp_mmu_page = true; + trace_kvm_mmu_get_page(sp, true); + return sp; } @@ -278,6 +282,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, pt = spte_to_child_pt(old_spte, level); sp = sptep_to_sp(pt); + trace_kvm_mmu_prepare_zap_page(sp); + list_del(&sp->link); if (sp->lpage_disallowed) @@ -480,11 +486,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, if (unlikely(is_noslot_pfn(pfn))) { new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); - } else + } else { make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, pfn, iter->old_spte, prefault, true, map_writable, !shadow_accessed_mask, &new_spte); + trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); + } if (new_spte == iter->old_spte) ret = RET_PF_SPURIOUS; @@ -698,6 +706,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte); young = 1; + + trace_kvm_age_page(iter.gfn, iter.level, slot, young); } return young; -- GitLab From b9a98c3437e353b269ebf3567acc5c3dc757c7a5 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 10:59:44 -0700 Subject: [PATCH 0237/1894] kvm: x86/mmu: Add TDP MMU SPTE changed trace point Add an extremely verbose trace point to the TDP MMU to log all SPTE changes, regardless of callstack / motivation. This is useful when a complete picture of the paging structure is needed or a change cannot be explained with the other, existing trace points. Tested: ran the demand paging selftest on an Intel Skylake machine with all the trace points used by the TDP MMU enabled and observed them firing with expected values. This patch can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/3813 Signed-off-by: Ben Gardon Message-Id: <20201027175944.1183301-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmutrace.h | 29 +++++++++++++++++++++++++++++ arch/x86/kvm/mmu/tdp_mmu.c | 2 ++ 2 files changed, 31 insertions(+) diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index 213699b27b448..e798489b56b55 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -381,6 +381,35 @@ TRACE_EVENT( ) ); +TRACE_EVENT( + kvm_tdp_mmu_spte_changed, + TP_PROTO(int as_id, gfn_t gfn, int level, u64 old_spte, u64 new_spte), + TP_ARGS(as_id, gfn, level, old_spte, new_spte), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, old_spte) + __field(u64, new_spte) + /* Level cannot be larger than 5 on x86, so it fits in a u8. */ + __field(u8, level) + /* as_id can only be 0 or 1 x86, so it fits in a u8. */ + __field(u8, as_id) + ), + + TP_fast_assign( + __entry->gfn = gfn; + __entry->old_spte = old_spte; + __entry->new_spte = new_spte; + __entry->level = level; + __entry->as_id = as_id; + ), + + TP_printk("as id %d gfn %llx level %d old_spte %llx new_spte %llx", + __entry->as_id, __entry->gfn, __entry->level, + __entry->old_spte, __entry->new_spte + ) +); + #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d6354f3795549..75db27fda8f34 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -248,6 +248,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, if (old_spte == new_spte) return; + trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); + /* * The only times a SPTE should be changed from a non-present to * non-present state is when an MMIO entry is installed/modified/ -- GitLab From 6636b6dcc3db2258cd0585b8078c1c225c4b6dde Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Wed, 23 Sep 2020 22:11:46 +0800 Subject: [PATCH 0238/1894] 9p: add refcount to p9_fid struct Fix race issue in fid contention. Eric's and Greg's patch offer a mechanism to fix open-unlink-f*syscall bug in 9p. But there is race issue in fid parallel accesses. As Greg's patch stores all of fids from opened files into according inode, so all the lookup fid ops can retrieve fid from inode preferentially. But there is no mechanism to handle the fid contention issue. For example, there are two threads get the same fid in the same time and one of them clunk the fid before the other thread ready to discard the fid. In this scenario, it will lead to some fatal problems, even kernel core dump. I introduce a mechanism to fix this race issue. A counter field introduced into p9_fid struct to store the reference counter to the fid. When a fid is allocated from the inode or dentry, the counter will increase, and will decrease at the end of its occupation. It is guaranteed that the fid won't be clunked before the reference counter go down to 0, then we can avoid the clunked fid to be used. tests: race issue test from the old test case: for file in {01..50}; do touch f.${file}; done seq 1 1000 | xargs -n 1 -P 50 -I{} cat f.* > /dev/null open-unlink-f*syscall test: I have tested for f*syscall include: ftruncate fstat fchown fchmod faccessat. Link: http://lkml.kernel.org/r/20200923141146.90046-5-jianyong.wu@arm.com Fixes: 478ba09edc1f ("fs/9p: search open fids first") Signed-off-by: Jianyong Wu Signed-off-by: Dominique Martinet --- fs/9p/fid.c | 22 ++++++++++++++++++---- fs/9p/fid.h | 10 +++++++++- fs/9p/vfs_dentry.c | 2 ++ fs/9p/vfs_dir.c | 9 +++++---- fs/9p/vfs_inode.c | 37 +++++++++++++++++++++++++++++-------- fs/9p/vfs_inode_dotl.c | 34 ++++++++++++++++++++++++++++------ fs/9p/vfs_super.c | 1 + fs/9p/xattr.c | 16 +++++++++++++--- include/net/9p/client.h | 6 ++++++ net/9p/client.c | 9 +++++---- 10 files changed, 116 insertions(+), 30 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 0b23b0fe6c515..89643dabcdaef 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -28,6 +28,7 @@ static inline void __add_fid(struct dentry *dentry, struct p9_fid *fid) { + atomic_set(&fid->count, 1); hlist_add_head(&fid->dlist, (struct hlist_head *)&dentry->d_fsdata); } @@ -60,6 +61,8 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) break; } } + if (ret && !IS_ERR(ret)) + atomic_inc(&ret->count); spin_unlock(&inode->i_lock); return ret; } @@ -74,6 +77,7 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid) { spin_lock(&inode->i_lock); + atomic_set(&fid->count, 1); hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private); spin_unlock(&inode->i_lock); } @@ -106,6 +110,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) hlist_for_each_entry(fid, h, dlist) { if (any || uid_eq(fid->uid, uid)) { ret = fid; + atomic_inc(&ret->count); break; } } @@ -167,7 +172,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, fid = v9fs_fid_find(ds, uid, any); if (fid) { /* Found the parent fid do a lookup with that */ - fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1); + struct p9_fid *ofid = fid; + + fid = p9_client_walk(ofid, 1, &dentry->d_name.name, 1); + p9_client_clunk(ofid); goto fid_out; } up_read(&v9ses->rename_sem); @@ -192,8 +200,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, v9fs_fid_add(dentry->d_sb->s_root, fid); } /* If we are root ourself just return that */ - if (dentry->d_sb->s_root == dentry) + if (dentry->d_sb->s_root == dentry) { + atomic_inc(&fid->count); return fid; + } /* * Do a multipath walk with attached root. * When walking parent we need to make sure we @@ -240,6 +250,7 @@ fid_out: fid = ERR_PTR(-ENOENT); } else { __add_fid(dentry, fid); + atomic_inc(&fid->count); spin_unlock(&dentry->d_lock); } } @@ -290,11 +301,14 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) { int err; - struct p9_fid *fid; + struct p9_fid *fid, *ofid; - fid = clone_fid(v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0)); + ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0); + if (ofid && !IS_ERR(ofid)) + fid = clone_fid(ofid); if (IS_ERR(fid)) goto error_out; + p9_client_clunk(ofid); /* * writeback fid will only be used to write back the * dirty pages. We always request for the open fid in read-write diff --git a/fs/9p/fid.h b/fs/9p/fid.h index dfa11df028182..1fed965467282 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -22,6 +22,14 @@ static inline struct p9_fid *clone_fid(struct p9_fid *fid) } static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) { - return clone_fid(v9fs_fid_lookup(dentry)); + struct p9_fid *fid, *nfid; + + fid = v9fs_fid_lookup(dentry); + if (!fid || IS_ERR(fid)) + return fid; + + nfid = p9_client_walk(fid, 0, NULL, 1); + p9_client_clunk(fid); + return nfid; } #endif diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 7d6f69aefd45d..4b4292123b3d1 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -85,6 +85,8 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) retval = v9fs_refresh_inode_dotl(fid, inode); else retval = v9fs_refresh_inode(fid, inode); + p9_client_clunk(fid); + if (retval == -ENOENT) return 0; if (retval < 0) diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index d82d8a346f866..b6a5a0be444d3 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -210,11 +210,12 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) fid = filp->private_data; p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, fid ? fid->fid : -1); - spin_lock(&inode->i_lock); - hlist_del(&fid->ilist); - spin_unlock(&inode->i_lock); - if (fid) + if (fid) { + spin_lock(&inode->i_lock); + hlist_del(&fid->ilist); + spin_unlock(&inode->i_lock); p9_client_clunk(fid); + } return 0; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 6b243ffcbcf06..4a937fac1acb0 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -551,6 +551,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) if (v9fs_proto_dotl(v9ses)) retval = p9_client_unlinkat(dfid, dentry->d_name.name, v9fs_at_to_dotl_flags(flags)); + p9_client_clunk(dfid); if (retval == -EOPNOTSUPP) { /* Try the one based on path */ v9fid = v9fs_fid_clone(dentry); @@ -595,14 +596,12 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, { int err; const unsigned char *name; - struct p9_fid *dfid, *ofid, *fid; + struct p9_fid *dfid, *ofid = NULL, *fid = NULL; struct inode *inode; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; - ofid = NULL; - fid = NULL; name = dentry->d_name.name; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { @@ -616,12 +615,14 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_client_clunk(dfid); return ERR_PTR(err); } err = p9_client_fcreate(ofid, name, perm, mode, extension); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); + p9_client_clunk(dfid); goto error; } @@ -633,6 +634,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); fid = NULL; + p9_client_clunk(dfid); goto error; } /* @@ -643,11 +645,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); + p9_client_clunk(dfid); goto error; } v9fs_fid_add(dentry, fid); d_instantiate(dentry, inode); } + p9_client_clunk(dfid); return ofid; error: if (ofid) @@ -760,6 +764,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, */ name = dentry->d_name.name; fid = p9_client_walk(dfid, 1, &name, 1); + p9_client_clunk(dfid); if (fid == ERR_PTR(-ENOENT)) inode = NULL; else if (IS_ERR(fid)) @@ -910,7 +915,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode; struct inode *new_inode; struct v9fs_session_info *v9ses; - struct p9_fid *oldfid; + struct p9_fid *oldfid, *dfid; struct p9_fid *olddirfid; struct p9_fid *newdirfid; struct p9_wstat wstat; @@ -927,13 +932,20 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ERR(oldfid)) return PTR_ERR(oldfid); - olddirfid = clone_fid(v9fs_parent_fid(old_dentry)); + dfid = v9fs_parent_fid(old_dentry); + olddirfid = clone_fid(dfid); + if (dfid && !IS_ERR(dfid)) + p9_client_clunk(dfid); + if (IS_ERR(olddirfid)) { retval = PTR_ERR(olddirfid); goto done; } - newdirfid = clone_fid(v9fs_parent_fid(new_dentry)); + dfid = v9fs_parent_fid(new_dentry); + newdirfid = clone_fid(dfid); + p9_client_clunk(dfid); + if (IS_ERR(newdirfid)) { retval = PTR_ERR(newdirfid); goto clunk_olddir; @@ -990,6 +1002,7 @@ clunk_olddir: p9_client_clunk(olddirfid); done: + p9_client_clunk(oldfid); return retval; } @@ -1022,6 +1035,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat, return PTR_ERR(fid); st = p9_client_stat(fid); + p9_client_clunk(fid); if (IS_ERR(st)) return PTR_ERR(st); @@ -1042,7 +1056,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat, static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) { - int retval; + int retval, use_dentry = 0; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL; struct p9_wstat wstat; @@ -1058,8 +1072,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) fid = iattr->ia_file->private_data; WARN_ON(!fid); } - if (!fid) + if (!fid) { fid = v9fs_fid_lookup(dentry); + use_dentry = 1; + } if(IS_ERR(fid)) return PTR_ERR(fid); @@ -1089,6 +1105,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) filemap_write_and_wait(d_inode(dentry)->i_mapping); retval = p9_client_wstat(fid, &wstat); + + if (use_dentry) + p9_client_clunk(fid); + if (retval < 0) return retval; @@ -1213,6 +1233,7 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry, return ERR_PTR(-EBADF); st = p9_client_stat(fid); + p9_client_clunk(fid); if (IS_ERR(st)) return ERR_CAST(st); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 08f2e089fb0ec..823c2eb5f1bf6 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -296,6 +296,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, /* instantiate inode and assign the unopened fid to the dentry */ fid = p9_client_walk(dfid, 1, &name, 1); + p9_client_clunk(dfid); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); @@ -408,7 +409,6 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); if (err < 0) goto error; - fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -452,6 +452,7 @@ error: if (fid) p9_client_clunk(fid); v9fs_put_acl(dacl, pacl); + p9_client_clunk(dfid); return err; } @@ -479,6 +480,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat, */ st = p9_client_getattr_dotl(fid, P9_STATS_ALL); + p9_client_clunk(fid); if (IS_ERR(st)) return PTR_ERR(st); @@ -540,7 +542,7 @@ static int v9fs_mapped_iattr_valid(int iattr_valid) int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) { - int retval; + int retval, use_dentry = 0; struct p9_fid *fid = NULL; struct p9_iattr_dotl p9attr; struct inode *inode = d_inode(dentry); @@ -565,8 +567,10 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) fid = iattr->ia_file->private_data; WARN_ON(!fid); } - if (!fid) + if (!fid) { fid = v9fs_fid_lookup(dentry); + use_dentry = 1; + } if (IS_ERR(fid)) return PTR_ERR(fid); @@ -575,8 +579,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) filemap_write_and_wait(inode->i_mapping); retval = p9_client_setattr(fid, &p9attr); - if (retval < 0) + if (retval < 0) { + if (use_dentry) + p9_client_clunk(fid); return retval; + } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) @@ -588,9 +595,15 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) if (iattr->ia_valid & ATTR_MODE) { /* We also want to update ACL when we update mode bits */ retval = v9fs_acl_chmod(inode, fid); - if (retval < 0) + if (retval < 0) { + if (use_dentry) + p9_client_clunk(fid); return retval; + } } + if (use_dentry) + p9_client_clunk(fid); + return 0; } @@ -742,6 +755,7 @@ error: if (fid) p9_client_clunk(fid); + p9_client_clunk(dfid); return err; } @@ -770,11 +784,15 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, return PTR_ERR(dfid); oldfid = v9fs_fid_lookup(old_dentry); - if (IS_ERR(oldfid)) + if (IS_ERR(oldfid)) { + p9_client_clunk(dfid); return PTR_ERR(oldfid); + } err = p9_client_link(dfid, oldfid, dentry->d_name.name); + p9_client_clunk(dfid); + p9_client_clunk(oldfid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); return err; @@ -789,6 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, return PTR_ERR(fid); v9fs_refresh_inode_dotl(fid, d_inode(old_dentry)); + p9_client_clunk(fid); } ihold(d_inode(old_dentry)); d_instantiate(dentry, d_inode(old_dentry)); @@ -887,6 +906,8 @@ error: if (fid) p9_client_clunk(fid); v9fs_put_acl(dacl, pacl); + p9_client_clunk(dfid); + return err; } @@ -915,6 +936,7 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry, if (IS_ERR(fid)) return ERR_CAST(fid); retval = p9_client_readlink(fid, &target); + p9_client_clunk(fid); if (retval) return ERR_PTR(retval); set_delayed_call(done, kfree_link, target); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 9a21269b72347..5fce6e30bc5ae 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -268,6 +268,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf) } res = simple_statfs(dentry, buf); done: + p9_client_clunk(fid); return res; } diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index ac8ff8ca4c115..87217dd0433e5 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -71,14 +71,17 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, void *buffer, size_t buffer_size) { struct p9_fid *fid; + int ret; p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n", name, buffer_size); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); + ret = v9fs_fid_xattr_get(fid, name, buffer, buffer_size); + p9_client_clunk(fid); - return v9fs_fid_xattr_get(fid, name, buffer, buffer_size); + return ret; } /* @@ -96,8 +99,15 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, int v9fs_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t value_len, int flags) { - struct p9_fid *fid = v9fs_fid_lookup(dentry); - return v9fs_fid_xattr_set(fid, name, value, value_len, flags); + int ret; + struct p9_fid *fid; + + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + ret = v9fs_fid_xattr_set(fid, name, value, value_len, flags); + p9_client_clunk(fid); + return ret; } int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index ce7882da8e860..58ed9bd306bd8 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -140,10 +140,16 @@ struct p9_client { * * TODO: This needs lots of explanation. */ +enum fid_source { + FID_FROM_OTHER, + FID_FROM_INODE, + FID_FROM_DENTRY, +}; struct p9_fid { struct p9_client *clnt; u32 fid; + atomic_t count; int mode; struct p9_qid qid; u32 iounit; diff --git a/net/9p/client.c b/net/9p/client.c index 1a3f72bf45fc1..a6c8a915e0d8e 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -901,6 +901,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) fid->clnt = clnt; fid->rdir = NULL; fid->fid = 0; + atomic_set(&fid->count, 1); idr_preload(GFP_KERNEL); spin_lock_irq(&clnt->lock); @@ -908,7 +909,6 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) GFP_NOWAIT); spin_unlock_irq(&clnt->lock); idr_preload_end(); - if (!ret) return fid; @@ -1187,7 +1187,6 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); - req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, nwname, wnames); if (IS_ERR(req)) { @@ -1461,12 +1460,14 @@ int p9_client_clunk(struct p9_fid *fid) struct p9_req_t *req; int retries = 0; - if (!fid) { - pr_warn("%s (%d): Trying to clunk with NULL fid\n", + if (!fid || IS_ERR(fid)) { + pr_warn("%s (%d): Trying to clunk with invalid fid\n", __func__, task_pid_nr(current)); dump_stack(); return 0; } + if (!atomic_dec_and_test(&fid->count)) + return 0; again: p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, -- GitLab From ff5e72ebef41068789c93b0666cebde80cc8bd8c Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 3 Nov 2020 09:35:57 +0100 Subject: [PATCH 0239/1894] 9p: apply review requests for fid refcounting Fix style issues in parent commit ("apply review requests for fid refcounting"), no functional change. Link: http://lkml.kernel.org/r/1605802012-31133-2-git-send-email-asmadeus@codewreck.org Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct") Signed-off-by: Dominique Martinet --- fs/9p/fid.c | 10 ++++------ fs/9p/fid.h | 2 +- include/net/9p/client.h | 2 +- net/9p/client.c | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 89643dabcdaef..50118ec72a926 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -28,7 +28,6 @@ static inline void __add_fid(struct dentry *dentry, struct p9_fid *fid) { - atomic_set(&fid->count, 1); hlist_add_head(&fid->dlist, (struct hlist_head *)&dentry->d_fsdata); } @@ -62,7 +61,7 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) } } if (ret && !IS_ERR(ret)) - atomic_inc(&ret->count); + refcount_inc(&ret->count); spin_unlock(&inode->i_lock); return ret; } @@ -77,7 +76,6 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid) { spin_lock(&inode->i_lock); - atomic_set(&fid->count, 1); hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private); spin_unlock(&inode->i_lock); } @@ -110,7 +108,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) hlist_for_each_entry(fid, h, dlist) { if (any || uid_eq(fid->uid, uid)) { ret = fid; - atomic_inc(&ret->count); + refcount_inc(&ret->count); break; } } @@ -201,7 +199,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, } /* If we are root ourself just return that */ if (dentry->d_sb->s_root == dentry) { - atomic_inc(&fid->count); + refcount_inc(&fid->count); return fid; } /* @@ -250,7 +248,7 @@ fid_out: fid = ERR_PTR(-ENOENT); } else { __add_fid(dentry, fid); - atomic_inc(&fid->count); + refcount_inc(&fid->count); spin_unlock(&dentry->d_lock); } } diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 1fed965467282..f7f33509e1692 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -28,7 +28,7 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) if (!fid || IS_ERR(fid)) return fid; - nfid = p9_client_walk(fid, 0, NULL, 1); + nfid = clone_fid(fid); p9_client_clunk(fid); return nfid; } diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 58ed9bd306bd8..e1c308d8d288e 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -149,7 +149,7 @@ enum fid_source { struct p9_fid { struct p9_client *clnt; u32 fid; - atomic_t count; + refcount_t count; int mode; struct p9_qid qid; u32 iounit; diff --git a/net/9p/client.c b/net/9p/client.c index a6c8a915e0d8e..ba4910138c5be 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -901,7 +901,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) fid->clnt = clnt; fid->rdir = NULL; fid->fid = 0; - atomic_set(&fid->count, 1); + refcount_set(&fid->count, 1); idr_preload(GFP_KERNEL); spin_lock_irq(&clnt->lock); @@ -1466,7 +1466,7 @@ int p9_client_clunk(struct p9_fid *fid) dump_stack(); return 0; } - if (!atomic_dec_and_test(&fid->count)) + if (!refcount_dec_and_test(&fid->count)) return 0; again: -- GitLab From 5bfe97d7382b5c1ec351c59a878e742c9fd73d38 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Thu, 19 Nov 2020 15:11:04 +0100 Subject: [PATCH 0240/1894] 9p: Fix writeback fid incorrectly being attached to dentry v9fs_dir_release needs fid->ilist to have been initialized for filp's fid, not the inode's writeback fid's. With refcounting this can be improved on later but this appears to fix null deref issues. Link: http://lkml.kernel.org/r/1605802012-31133-3-git-send-email-asmadeus@codewreck.org Fixes: 6636b6dcc3db ("fs/9p: track open fids") Signed-off-by: Dominique Martinet --- fs/9p/vfs_file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b0ef225cecd00..c5e49c88688db 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -46,7 +46,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) int err; struct v9fs_inode *v9inode; struct v9fs_session_info *v9ses; - struct p9_fid *fid; + struct p9_fid *fid, *writeback_fid; int omode; p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); @@ -85,13 +85,13 @@ int v9fs_file_open(struct inode *inode, struct file *file) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(file_dentry(file)); + writeback_fid = v9fs_writeback_fid(file_dentry(file)); if (IS_ERR(fid)) { err = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); goto out_error; } - v9inode->writeback_fid = (void *) fid; + v9inode->writeback_fid = (void *) writeback_fid; } mutex_unlock(&v9inode->v_mutex); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) -- GitLab From 6160aca443148416994c022a35c77daeba948ea6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 28 Oct 2020 17:48:20 -0700 Subject: [PATCH 0241/1894] clk: tegra: Do not return 0 on failure Return values from read_dt_param() will be either TRUE (1) or FALSE (0), while dfll_fetch_pwm_params() returns 0 on success or an ERR code on failure. So this patch fixes the bug of returning 0 on failure. Fixes: 36541f0499fe ("clk: tegra: dfll: support PWM regulator control") Cc: Signed-off-by: Nicolin Chen Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-dfll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c index cfbaa90c7adbf..a5f526bb0483e 100644 --- a/drivers/clk/tegra/clk-dfll.c +++ b/drivers/clk/tegra/clk-dfll.c @@ -1856,13 +1856,13 @@ static int dfll_fetch_pwm_params(struct tegra_dfll *td) &td->reg_init_uV); if (!ret) { dev_err(td->dev, "couldn't get initialized voltage\n"); - return ret; + return -EINVAL; } ret = read_dt_param(td, "nvidia,pwm-period-nanoseconds", &pwm_period); if (!ret) { dev_err(td->dev, "couldn't get PWM period\n"); - return ret; + return -EINVAL; } td->pwm_rate = (NSEC_PER_SEC / pwm_period) * (MAX_DFLL_VOLTAGES - 1); -- GitLab From 2dd2a1740ee19cd2636d247276cf27bfa434b0e2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 20 Nov 2020 08:50:07 -0800 Subject: [PATCH 0242/1894] libnvdimm/namespace: Fix reaping of invalidated block-window-namespace labels A recent change to ndctl to attempt to reconfigure namespaces in place uncovered a label accounting problem in block-window-type namespaces. The ndctl "create.sh" test is able to trigger this signature: WARNING: CPU: 34 PID: 9167 at drivers/nvdimm/label.c:1100 __blk_label_update+0x9a3/0xbc0 [libnvdimm] [..] RIP: 0010:__blk_label_update+0x9a3/0xbc0 [libnvdimm] [..] Call Trace: uuid_store+0x21b/0x2f0 [libnvdimm] kernfs_fop_write+0xcf/0x1c0 vfs_write+0xcc/0x380 ksys_write+0x68/0xe0 When allocated capacity for a namespace is renamed (new UUID) the labels with the old UUID need to be deleted. The ndctl behavior to always destroy namespaces on reconfiguration hid this problem. The immediate impact of this bug is limited since block-window-type namespaces only seem to exist in the specification and not in any shipping products. However, the label handling code is being reused for other technologies like CXL region labels, so there is a benefit to making sure both vertical labels sets (block-window) and horizontal label sets (pmem) have a functional reference implementation in libnvdimm. Fixes: c4703ce11c23 ("libnvdimm/namespace: Fix label tracking error") Cc: Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 47a4828b8b310..6f2be7a34598c 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -980,6 +980,15 @@ static int __blk_label_update(struct nd_region *nd_region, } } + /* release slots associated with any invalidated UUIDs */ + mutex_lock(&nd_mapping->lock); + list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) + if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)) { + reap_victim(nd_mapping, label_ent); + list_move(&label_ent->list, &list); + } + mutex_unlock(&nd_mapping->lock); + /* * Find the resource associated with the first label in the set * per the v1.2 namespace specification. -- GitLab From 14ebb3154b8f3d562cb18331b08ff1a22609ae59 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 15 Sep 2020 14:45:52 +0200 Subject: [PATCH 0243/1894] clk: meson: axg: add Video Clocks Add the clocks entries used in the video clock path, the clock path is doubled to permit having different synchronized clocks for different parts of the video pipeline. The AXG only has a single ENCL CTS clock and even if VCLK exist along VCLK2, only VCLK2 is used since it clocks the MIPI DSI IP directly. All dividers are flagged with CLK_GET_RATE_NOCACHE, and all gates are flagged with CLK_IGNORE_UNUSED since they are currently directly handled by the Meson DRM Driver. Once the DRM Driver is fully migrated to using the Common Clock Framework to handle the video clock tree, the CLK_GET_RATE_NOCACHE and CLK_IGNORE_UNUSED will be dropped. Signed-off-by: Neil Armstrong Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20200915124553.8056-4-narmstrong@baylibre.com --- drivers/clk/meson/axg.c | 753 ++++++++++++++++++++++++++++++++++++++++ drivers/clk/meson/axg.h | 21 +- 2 files changed, 773 insertions(+), 1 deletion(-) diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c index 13fc0006f63d8..a4e8949297cf4 100644 --- a/drivers/clk/meson/axg.c +++ b/drivers/clk/meson/axg.c @@ -1026,6 +1026,683 @@ static struct clk_regmap axg_sd_emmc_c_clk0 = { }, }; +/* VPU Clock */ + +static const struct clk_hw *axg_vpu_parent_hws[] = { + &axg_fclk_div4.hw, + &axg_fclk_div3.hw, + &axg_fclk_div5.hw, + &axg_fclk_div7.hw, +}; + +static struct clk_regmap axg_vpu_0_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VPU_CLK_CNTL, + .mask = 0x3, + .shift = 9, + }, + .hw.init = &(struct clk_init_data){ + .name = "vpu_0_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = axg_vpu_parent_hws, + .num_parents = ARRAY_SIZE(axg_vpu_parent_hws), + /* We need a specific parent for VPU clock source, let it be set in DT */ + .flags = CLK_SET_RATE_NO_REPARENT, + }, +}; + +static struct clk_regmap axg_vpu_0_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_VPU_CLK_CNTL, + .shift = 0, + .width = 7, + }, + .hw.init = &(struct clk_init_data){ + .name = "vpu_0_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vpu_0_sel.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + +static struct clk_regmap axg_vpu_0 = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VPU_CLK_CNTL, + .bit_idx = 8, + }, + .hw.init = &(struct clk_init_data) { + .name = "vpu_0", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vpu_0_div.hw }, + .num_parents = 1, + /* + * We want to avoid CCF to disable the VPU clock if + * display has been set by Bootloader + */ + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vpu_1_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VPU_CLK_CNTL, + .mask = 0x3, + .shift = 25, + }, + .hw.init = &(struct clk_init_data){ + .name = "vpu_1_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = axg_vpu_parent_hws, + .num_parents = ARRAY_SIZE(axg_vpu_parent_hws), + /* We need a specific parent for VPU clock source, let it be set in DT */ + .flags = CLK_SET_RATE_NO_REPARENT, + }, +}; + +static struct clk_regmap axg_vpu_1_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_VPU_CLK_CNTL, + .shift = 16, + .width = 7, + }, + .hw.init = &(struct clk_init_data){ + .name = "vpu_1_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vpu_1_sel.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + +static struct clk_regmap axg_vpu_1 = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VPU_CLK_CNTL, + .bit_idx = 24, + }, + .hw.init = &(struct clk_init_data) { + .name = "vpu_1", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vpu_1_div.hw }, + .num_parents = 1, + /* + * We want to avoid CCF to disable the VPU clock if + * display has been set by Bootloader + */ + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vpu = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VPU_CLK_CNTL, + .mask = 1, + .shift = 31, + }, + .hw.init = &(struct clk_init_data){ + .name = "vpu", + .ops = &clk_regmap_mux_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vpu_0.hw, + &axg_vpu_1.hw + }, + .num_parents = 2, + .flags = CLK_SET_RATE_NO_REPARENT, + }, +}; + +/* VAPB Clock */ + +static struct clk_regmap axg_vapb_0_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VAPBCLK_CNTL, + .mask = 0x3, + .shift = 9, + }, + .hw.init = &(struct clk_init_data){ + .name = "vapb_0_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = axg_vpu_parent_hws, + .num_parents = ARRAY_SIZE(axg_vpu_parent_hws), + .flags = CLK_SET_RATE_NO_REPARENT, + }, +}; + +static struct clk_regmap axg_vapb_0_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_VAPBCLK_CNTL, + .shift = 0, + .width = 7, + }, + .hw.init = &(struct clk_init_data){ + .name = "vapb_0_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vapb_0_sel.hw + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + +static struct clk_regmap axg_vapb_0 = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VAPBCLK_CNTL, + .bit_idx = 8, + }, + .hw.init = &(struct clk_init_data) { + .name = "vapb_0", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vapb_0_div.hw + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vapb_1_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VAPBCLK_CNTL, + .mask = 0x3, + .shift = 25, + }, + .hw.init = &(struct clk_init_data){ + .name = "vapb_1_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = axg_vpu_parent_hws, + .num_parents = ARRAY_SIZE(axg_vpu_parent_hws), + .flags = CLK_SET_RATE_NO_REPARENT, + }, +}; + +static struct clk_regmap axg_vapb_1_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_VAPBCLK_CNTL, + .shift = 16, + .width = 7, + }, + .hw.init = &(struct clk_init_data){ + .name = "vapb_1_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vapb_1_sel.hw + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + +static struct clk_regmap axg_vapb_1 = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VAPBCLK_CNTL, + .bit_idx = 24, + }, + .hw.init = &(struct clk_init_data) { + .name = "vapb_1", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vapb_1_div.hw + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vapb_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VAPBCLK_CNTL, + .mask = 1, + .shift = 31, + }, + .hw.init = &(struct clk_init_data){ + .name = "vapb_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vapb_0.hw, + &axg_vapb_1.hw + }, + .num_parents = 2, + .flags = CLK_SET_RATE_NO_REPARENT, + }, +}; + +static struct clk_regmap axg_vapb = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VAPBCLK_CNTL, + .bit_idx = 30, + }, + .hw.init = &(struct clk_init_data) { + .name = "vapb", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vapb_sel.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +/* Video Clocks */ + +static const struct clk_hw *axg_vclk_parent_hws[] = { + &axg_gp0_pll.hw, + &axg_fclk_div4.hw, + &axg_fclk_div3.hw, + &axg_fclk_div5.hw, + &axg_fclk_div2.hw, + &axg_fclk_div7.hw, + &axg_mpll1.hw, +}; + +static struct clk_regmap axg_vclk_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VID_CLK_CNTL, + .mask = 0x7, + .shift = 16, + }, + .hw.init = &(struct clk_init_data){ + .name = "vclk_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = axg_vclk_parent_hws, + .num_parents = ARRAY_SIZE(axg_vclk_parent_hws), + .flags = CLK_SET_RATE_NO_REPARENT | CLK_GET_RATE_NOCACHE, + }, +}; + +static struct clk_regmap axg_vclk2_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VIID_CLK_CNTL, + .mask = 0x7, + .shift = 16, + }, + .hw.init = &(struct clk_init_data){ + .name = "vclk2_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = axg_vclk_parent_hws, + .num_parents = ARRAY_SIZE(axg_vclk_parent_hws), + .flags = CLK_SET_RATE_NO_REPARENT | CLK_GET_RATE_NOCACHE, + }, +}; + +static struct clk_regmap axg_vclk_input = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_DIV, + .bit_idx = 16, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk_input", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk_sel.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk2_input = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VIID_CLK_DIV, + .bit_idx = 16, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk2_input", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk2_sel.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_VID_CLK_DIV, + .shift = 0, + .width = 8, + }, + .hw.init = &(struct clk_init_data){ + .name = "vclk_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk_input.hw + }, + .num_parents = 1, + .flags = CLK_GET_RATE_NOCACHE, + }, +}; + +static struct clk_regmap axg_vclk2_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_VIID_CLK_DIV, + .shift = 0, + .width = 8, + }, + .hw.init = &(struct clk_init_data){ + .name = "vclk2_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk2_input.hw + }, + .num_parents = 1, + .flags = CLK_GET_RATE_NOCACHE, + }, +}; + +static struct clk_regmap axg_vclk = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_CNTL, + .bit_idx = 19, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk_div.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk2 = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VIID_CLK_CNTL, + .bit_idx = 19, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk2", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk2_div.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk_div1 = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_CNTL, + .bit_idx = 0, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk_div1", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk_div2_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_CNTL, + .bit_idx = 1, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk_div2_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk_div4_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_CNTL, + .bit_idx = 2, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk_div4_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk_div6_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_CNTL, + .bit_idx = 3, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk_div6_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk_div12_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_CNTL, + .bit_idx = 4, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk_div12_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk2_div1 = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VIID_CLK_CNTL, + .bit_idx = 0, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk2_div1", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk2_div2_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VIID_CLK_CNTL, + .bit_idx = 1, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk2_div2_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk2_div4_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VIID_CLK_CNTL, + .bit_idx = 2, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk2_div4_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk2_div6_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VIID_CLK_CNTL, + .bit_idx = 3, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk2_div6_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_regmap axg_vclk2_div12_en = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VIID_CLK_CNTL, + .bit_idx = 4, + }, + .hw.init = &(struct clk_init_data) { + .name = "vclk2_div12_en", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + +static struct clk_fixed_factor axg_vclk_div2 = { + .mult = 1, + .div = 2, + .hw.init = &(struct clk_init_data){ + .name = "vclk_div2", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk_div2_en.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_fixed_factor axg_vclk_div4 = { + .mult = 1, + .div = 4, + .hw.init = &(struct clk_init_data){ + .name = "vclk_div4", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk_div4_en.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_fixed_factor axg_vclk_div6 = { + .mult = 1, + .div = 6, + .hw.init = &(struct clk_init_data){ + .name = "vclk_div6", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk_div6_en.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_fixed_factor axg_vclk_div12 = { + .mult = 1, + .div = 12, + .hw.init = &(struct clk_init_data){ + .name = "vclk_div12", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk_div12_en.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_fixed_factor axg_vclk2_div2 = { + .mult = 1, + .div = 2, + .hw.init = &(struct clk_init_data){ + .name = "vclk2_div2", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk2_div2_en.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_fixed_factor axg_vclk2_div4 = { + .mult = 1, + .div = 4, + .hw.init = &(struct clk_init_data){ + .name = "vclk2_div4", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk2_div4_en.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_fixed_factor axg_vclk2_div6 = { + .mult = 1, + .div = 6, + .hw.init = &(struct clk_init_data){ + .name = "vclk2_div6", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk2_div6_en.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_fixed_factor axg_vclk2_div12 = { + .mult = 1, + .div = 12, + .hw.init = &(struct clk_init_data){ + .name = "vclk2_div12", + .ops = &clk_fixed_factor_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vclk2_div12_en.hw + }, + .num_parents = 1, + }, +}; + +static u32 mux_table_cts_sel[] = { 0, 1, 2, 3, 4, 8, 9, 10, 11, 12 }; +static const struct clk_hw *axg_cts_parent_hws[] = { + &axg_vclk_div1.hw, + &axg_vclk_div2.hw, + &axg_vclk_div4.hw, + &axg_vclk_div6.hw, + &axg_vclk_div12.hw, + &axg_vclk2_div1.hw, + &axg_vclk2_div2.hw, + &axg_vclk2_div4.hw, + &axg_vclk2_div6.hw, + &axg_vclk2_div12.hw, +}; + +static struct clk_regmap axg_cts_encl_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VIID_CLK_DIV, + .mask = 0xf, + .shift = 12, + .table = mux_table_cts_sel, + }, + .hw.init = &(struct clk_init_data){ + .name = "cts_encl_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = axg_cts_parent_hws, + .num_parents = ARRAY_SIZE(axg_cts_parent_hws), + .flags = CLK_SET_RATE_NO_REPARENT | CLK_GET_RATE_NOCACHE, + }, +}; + +static struct clk_regmap axg_cts_encl = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VID_CLK_CNTL2, + .bit_idx = 3, + }, + .hw.init = &(struct clk_init_data) { + .name = "cts_encl", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_cts_encl_sel.hw + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, + }, +}; + static u32 mux_table_gen_clk[] = { 0, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, }; static const struct clk_parent_data gen_clk_parent_data[] = { @@ -1246,6 +1923,49 @@ static struct clk_hw_onecell_data axg_hw_onecell_data = { [CLKID_HIFI_PLL_DCO] = &axg_hifi_pll_dco.hw, [CLKID_PCIE_PLL_DCO] = &axg_pcie_pll_dco.hw, [CLKID_PCIE_PLL_OD] = &axg_pcie_pll_od.hw, + [CLKID_VPU_0_DIV] = &axg_vpu_0_div.hw, + [CLKID_VPU_0_SEL] = &axg_vpu_0_sel.hw, + [CLKID_VPU_0] = &axg_vpu_0.hw, + [CLKID_VPU_1_DIV] = &axg_vpu_1_div.hw, + [CLKID_VPU_1_SEL] = &axg_vpu_1_sel.hw, + [CLKID_VPU_1] = &axg_vpu_1.hw, + [CLKID_VPU] = &axg_vpu.hw, + [CLKID_VAPB_0_DIV] = &axg_vapb_0_div.hw, + [CLKID_VAPB_0_SEL] = &axg_vapb_0_sel.hw, + [CLKID_VAPB_0] = &axg_vapb_0.hw, + [CLKID_VAPB_1_DIV] = &axg_vapb_1_div.hw, + [CLKID_VAPB_1_SEL] = &axg_vapb_1_sel.hw, + [CLKID_VAPB_1] = &axg_vapb_1.hw, + [CLKID_VAPB_SEL] = &axg_vapb_sel.hw, + [CLKID_VAPB] = &axg_vapb.hw, + [CLKID_VCLK] = &axg_vclk.hw, + [CLKID_VCLK2] = &axg_vclk2.hw, + [CLKID_VCLK_SEL] = &axg_vclk_sel.hw, + [CLKID_VCLK2_SEL] = &axg_vclk2_sel.hw, + [CLKID_VCLK_INPUT] = &axg_vclk_input.hw, + [CLKID_VCLK2_INPUT] = &axg_vclk2_input.hw, + [CLKID_VCLK_DIV] = &axg_vclk_div.hw, + [CLKID_VCLK2_DIV] = &axg_vclk2_div.hw, + [CLKID_VCLK_DIV2_EN] = &axg_vclk_div2_en.hw, + [CLKID_VCLK_DIV4_EN] = &axg_vclk_div4_en.hw, + [CLKID_VCLK_DIV6_EN] = &axg_vclk_div6_en.hw, + [CLKID_VCLK_DIV12_EN] = &axg_vclk_div12_en.hw, + [CLKID_VCLK2_DIV2_EN] = &axg_vclk2_div2_en.hw, + [CLKID_VCLK2_DIV4_EN] = &axg_vclk2_div4_en.hw, + [CLKID_VCLK2_DIV6_EN] = &axg_vclk2_div6_en.hw, + [CLKID_VCLK2_DIV12_EN] = &axg_vclk2_div12_en.hw, + [CLKID_VCLK_DIV1] = &axg_vclk_div1.hw, + [CLKID_VCLK_DIV2] = &axg_vclk_div2.hw, + [CLKID_VCLK_DIV4] = &axg_vclk_div4.hw, + [CLKID_VCLK_DIV6] = &axg_vclk_div6.hw, + [CLKID_VCLK_DIV12] = &axg_vclk_div12.hw, + [CLKID_VCLK2_DIV1] = &axg_vclk2_div1.hw, + [CLKID_VCLK2_DIV2] = &axg_vclk2_div2.hw, + [CLKID_VCLK2_DIV4] = &axg_vclk2_div4.hw, + [CLKID_VCLK2_DIV6] = &axg_vclk2_div6.hw, + [CLKID_VCLK2_DIV12] = &axg_vclk2_div12.hw, + [CLKID_CTS_ENCL_SEL] = &axg_cts_encl_sel.hw, + [CLKID_CTS_ENCL] = &axg_cts_encl.hw, [NR_CLKS] = NULL, }, .num = NR_CLKS, @@ -1341,6 +2061,39 @@ static struct clk_regmap *const axg_clk_regmaps[] = { &axg_hifi_pll_dco, &axg_pcie_pll_dco, &axg_pcie_pll_od, + &axg_vpu_0_div, + &axg_vpu_0_sel, + &axg_vpu_0, + &axg_vpu_1_div, + &axg_vpu_1_sel, + &axg_vpu_1, + &axg_vpu, + &axg_vapb_0_div, + &axg_vapb_0_sel, + &axg_vapb_0, + &axg_vapb_1_div, + &axg_vapb_1_sel, + &axg_vapb_1, + &axg_vapb_sel, + &axg_vapb, + &axg_vclk, + &axg_vclk2, + &axg_vclk_sel, + &axg_vclk2_sel, + &axg_vclk_input, + &axg_vclk2_input, + &axg_vclk_div, + &axg_vclk2_div, + &axg_vclk_div2_en, + &axg_vclk_div4_en, + &axg_vclk_div6_en, + &axg_vclk_div12_en, + &axg_vclk2_div2_en, + &axg_vclk2_div4_en, + &axg_vclk2_div6_en, + &axg_vclk2_div12_en, + &axg_cts_encl_sel, + &axg_cts_encl, }; static const struct meson_eeclkc_data axg_clkc_data = { diff --git a/drivers/clk/meson/axg.h b/drivers/clk/meson/axg.h index 0431dabac6294..a8787b394a47e 100644 --- a/drivers/clk/meson/axg.h +++ b/drivers/clk/meson/axg.h @@ -139,8 +139,27 @@ #define CLKID_HIFI_PLL_DCO 88 #define CLKID_PCIE_PLL_DCO 89 #define CLKID_PCIE_PLL_OD 90 +#define CLKID_VPU_0_DIV 91 +#define CLKID_VPU_1_DIV 94 +#define CLKID_VAPB_0_DIV 98 +#define CLKID_VAPB_1_DIV 101 +#define CLKID_VCLK_SEL 108 +#define CLKID_VCLK2_SEL 109 +#define CLKID_VCLK_INPUT 110 +#define CLKID_VCLK2_INPUT 111 +#define CLKID_VCLK_DIV 112 +#define CLKID_VCLK2_DIV 113 +#define CLKID_VCLK_DIV2_EN 114 +#define CLKID_VCLK_DIV4_EN 115 +#define CLKID_VCLK_DIV6_EN 116 +#define CLKID_VCLK_DIV12_EN 117 +#define CLKID_VCLK2_DIV2_EN 118 +#define CLKID_VCLK2_DIV4_EN 119 +#define CLKID_VCLK2_DIV6_EN 120 +#define CLKID_VCLK2_DIV12_EN 121 +#define CLKID_CTS_ENCL_SEL 132 -#define NR_CLKS 91 +#define NR_CLKS 134 /* include the CLKIDs that have been made part of the DT binding */ #include -- GitLab From e80d8510ffef3a9d2b2ce15882f5fd004d1e1645 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 15 Sep 2020 14:45:53 +0200 Subject: [PATCH 0244/1894] clk: meson: axg: add MIPI DSI Host clock This adds the MIPI DSI Host clock, used to measure the signal timings (ENC VSYNC or DW-MIPI-DSI eDPI timings). Signed-off-by: Neil Armstrong Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20200915124553.8056-5-narmstrong@baylibre.com --- drivers/clk/meson/axg.c | 66 +++++++++++++++++++++++++++++++++++++++++ drivers/clk/meson/axg.h | 4 ++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c index a4e8949297cf4..089a81ce23856 100644 --- a/drivers/clk/meson/axg.c +++ b/drivers/clk/meson/axg.c @@ -1703,6 +1703,66 @@ static struct clk_regmap axg_cts_encl = { }, }; +/* MIPI DSI Host Clock */ + +static u32 mux_table_axg_vdin_meas[] = { 0, 1, 2, 3, 6, 7 }; +static const struct clk_parent_data axg_vdin_meas_parent_data[] = { + { .fw_name = "xtal", }, + { .hw = &axg_fclk_div4.hw }, + { .hw = &axg_fclk_div3.hw }, + { .hw = &axg_fclk_div5.hw }, + { .hw = &axg_fclk_div2.hw }, + { .hw = &axg_fclk_div7.hw }, +}; + +static struct clk_regmap axg_vdin_meas_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_VDIN_MEAS_CLK_CNTL, + .mask = 0x7, + .shift = 21, + .flags = CLK_MUX_ROUND_CLOSEST, + .table = mux_table_axg_vdin_meas, + }, + .hw.init = &(struct clk_init_data){ + .name = "vdin_meas_sel", + .ops = &clk_regmap_mux_ops, + .parent_data = axg_vdin_meas_parent_data, + .num_parents = ARRAY_SIZE(axg_vdin_meas_parent_data), + .flags = CLK_SET_RATE_PARENT, + }, +}; + +static struct clk_regmap axg_vdin_meas_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_VDIN_MEAS_CLK_CNTL, + .shift = 12, + .width = 7, + }, + .hw.init = &(struct clk_init_data){ + .name = "vdin_meas_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vdin_meas_sel.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + +static struct clk_regmap axg_vdin_meas = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_VDIN_MEAS_CLK_CNTL, + .bit_idx = 20, + }, + .hw.init = &(struct clk_init_data) { + .name = "vdin_meas", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { + &axg_vdin_meas_div.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + static u32 mux_table_gen_clk[] = { 0, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, }; static const struct clk_parent_data gen_clk_parent_data[] = { @@ -1966,6 +2026,9 @@ static struct clk_hw_onecell_data axg_hw_onecell_data = { [CLKID_VCLK2_DIV12] = &axg_vclk2_div12.hw, [CLKID_CTS_ENCL_SEL] = &axg_cts_encl_sel.hw, [CLKID_CTS_ENCL] = &axg_cts_encl.hw, + [CLKID_VDIN_MEAS_SEL] = &axg_vdin_meas_sel.hw, + [CLKID_VDIN_MEAS_DIV] = &axg_vdin_meas_div.hw, + [CLKID_VDIN_MEAS] = &axg_vdin_meas.hw, [NR_CLKS] = NULL, }, .num = NR_CLKS, @@ -2094,6 +2157,9 @@ static struct clk_regmap *const axg_clk_regmaps[] = { &axg_vclk2_div12_en, &axg_cts_encl_sel, &axg_cts_encl, + &axg_vdin_meas_sel, + &axg_vdin_meas_div, + &axg_vdin_meas, }; static const struct meson_eeclkc_data axg_clkc_data = { diff --git a/drivers/clk/meson/axg.h b/drivers/clk/meson/axg.h index a8787b394a47e..481b307ea3cbb 100644 --- a/drivers/clk/meson/axg.h +++ b/drivers/clk/meson/axg.h @@ -158,8 +158,10 @@ #define CLKID_VCLK2_DIV6_EN 120 #define CLKID_VCLK2_DIV12_EN 121 #define CLKID_CTS_ENCL_SEL 132 +#define CLKID_VDIN_MEAS_SEL 134 +#define CLKID_VDIN_MEAS_DIV 135 -#define NR_CLKS 134 +#define NR_CLKS 137 /* include the CLKIDs that have been made part of the DT binding */ #include -- GitLab From bae69bfa3a586493469078ec4ca35499b754ba5c Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 18 Nov 2020 11:09:30 -0800 Subject: [PATCH 0245/1894] clk: meson: Kconfig: fix dependency for G12A When building only G12A, ensure that VID_PLL_DIV clock driver is selected, otherwise results in this build error: ERROR: modpost: "meson_vid_pll_div_ro_ops" [drivers/clk/meson/g12a.ko] undefined! Fixes: 085a4ea93d54 ("clk: meson: g12a: add peripheral clock controller") Signed-off-by: Kevin Hilman Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201118190930.34352-1-khilman@baylibre.com --- drivers/clk/meson/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/meson/Kconfig b/drivers/clk/meson/Kconfig index 034da203e8e0e..9a8a548d839d8 100644 --- a/drivers/clk/meson/Kconfig +++ b/drivers/clk/meson/Kconfig @@ -110,6 +110,7 @@ config COMMON_CLK_G12A select COMMON_CLK_MESON_AO_CLKC select COMMON_CLK_MESON_EE_CLKC select COMMON_CLK_MESON_CPU_DYNDIV + select COMMON_CLK_MESON_VID_PLL_DIV select MFD_SYSCON help Support for the clock controller on Amlogic S905D2, S905X2 and S905Y2 -- GitLab From 20425f6319480e84f48261fc7c0e4ce61a6d333e Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 18 Nov 2020 11:14:05 -0800 Subject: [PATCH 0246/1894] clk: meson: enable building as modules Make it possible to build all clk drivers as modules, but default remains built-in. No functional changes. Signed-off-by: Kevin Hilman Signed-off-by: Jerome Brunet Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20201118191405.36798-1-khilman@baylibre.com --- drivers/clk/meson/Kconfig | 6 +++--- drivers/clk/meson/axg-aoclk.c | 5 ++++- drivers/clk/meson/axg.c | 5 ++++- drivers/clk/meson/g12a-aoclk.c | 5 ++++- drivers/clk/meson/g12a.c | 5 ++++- drivers/clk/meson/gxbb-aoclk.c | 5 ++++- drivers/clk/meson/gxbb.c | 5 ++++- drivers/clk/meson/meson-aoclk.c | 4 ++++ drivers/clk/meson/meson-eeclk.c | 3 +++ 9 files changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/clk/meson/Kconfig b/drivers/clk/meson/Kconfig index 9a8a548d839d8..fc002c155bc3e 100644 --- a/drivers/clk/meson/Kconfig +++ b/drivers/clk/meson/Kconfig @@ -58,7 +58,7 @@ config COMMON_CLK_MESON8B want peripherals and CPU frequency scaling to work. config COMMON_CLK_GXBB - bool "GXBB and GXL SoC clock controllers support" + tristate "GXBB and GXL SoC clock controllers support" depends on ARM64 default y select COMMON_CLK_MESON_REGMAP @@ -74,7 +74,7 @@ config COMMON_CLK_GXBB Say Y if you want peripherals and CPU frequency scaling to work. config COMMON_CLK_AXG - bool "AXG SoC clock controllers support" + tristate "AXG SoC clock controllers support" depends on ARM64 default y select COMMON_CLK_MESON_REGMAP @@ -100,7 +100,7 @@ config COMMON_CLK_AXG_AUDIO aka axg, Say Y if you want audio subsystem to work. config COMMON_CLK_G12A - bool "G12 and SM1 SoC clock controllers support" + tristate "G12 and SM1 SoC clock controllers support" depends on ARM64 default y select COMMON_CLK_MESON_REGMAP diff --git a/drivers/clk/meson/axg-aoclk.c b/drivers/clk/meson/axg-aoclk.c index b488b40c9d0e3..af6db437bcd82 100644 --- a/drivers/clk/meson/axg-aoclk.c +++ b/drivers/clk/meson/axg-aoclk.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "meson-aoclk.h" #include "axg-aoclk.h" @@ -326,6 +327,7 @@ static const struct of_device_id axg_aoclkc_match_table[] = { }, { } }; +MODULE_DEVICE_TABLE(of, axg_aoclkc_match_table); static struct platform_driver axg_aoclkc_driver = { .probe = meson_aoclkc_probe, @@ -335,4 +337,5 @@ static struct platform_driver axg_aoclkc_driver = { }, }; -builtin_platform_driver(axg_aoclkc_driver); +module_platform_driver(axg_aoclkc_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c index 089a81ce23856..0e44695b8772d 100644 --- a/drivers/clk/meson/axg.c +++ b/drivers/clk/meson/axg.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "clk-regmap.h" #include "clk-pll.h" @@ -2173,6 +2174,7 @@ static const struct of_device_id clkc_match_table[] = { { .compatible = "amlogic,axg-clkc", .data = &axg_clkc_data }, {} }; +MODULE_DEVICE_TABLE(of, clkc_match_table); static struct platform_driver axg_driver = { .probe = meson_eeclkc_probe, @@ -2182,4 +2184,5 @@ static struct platform_driver axg_driver = { }, }; -builtin_platform_driver(axg_driver); +module_platform_driver(axg_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/meson/g12a-aoclk.c b/drivers/clk/meson/g12a-aoclk.c index 62499563e4f5a..b52990e574d2a 100644 --- a/drivers/clk/meson/g12a-aoclk.c +++ b/drivers/clk/meson/g12a-aoclk.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "meson-aoclk.h" #include "g12a-aoclk.h" @@ -461,6 +462,7 @@ static const struct of_device_id g12a_aoclkc_match_table[] = { }, { } }; +MODULE_DEVICE_TABLE(of, g12a_aoclkc_match_table); static struct platform_driver g12a_aoclkc_driver = { .probe = meson_aoclkc_probe, @@ -470,4 +472,5 @@ static struct platform_driver g12a_aoclkc_driver = { }, }; -builtin_platform_driver(g12a_aoclkc_driver); +module_platform_driver(g12a_aoclkc_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 108e4491b1e25..3cb8196c8e29c 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "clk-mpll.h" #include "clk-pll.h" @@ -5372,6 +5373,7 @@ static const struct of_device_id clkc_match_table[] = { }, {} }; +MODULE_DEVICE_TABLE(of, clkc_match_table); static struct platform_driver g12a_driver = { .probe = meson_g12a_probe, @@ -5381,4 +5383,5 @@ static struct platform_driver g12a_driver = { }, }; -builtin_platform_driver(g12a_driver); +module_platform_driver(g12a_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/meson/gxbb-aoclk.c b/drivers/clk/meson/gxbb-aoclk.c index e940861a396b5..fce95cf89836f 100644 --- a/drivers/clk/meson/gxbb-aoclk.c +++ b/drivers/clk/meson/gxbb-aoclk.c @@ -5,6 +5,7 @@ */ #include #include +#include #include "meson-aoclk.h" #include "gxbb-aoclk.h" @@ -287,6 +288,7 @@ static const struct of_device_id gxbb_aoclkc_match_table[] = { }, { } }; +MODULE_DEVICE_TABLE(of, gxbb_aoclkc_match_table); static struct platform_driver gxbb_aoclkc_driver = { .probe = meson_aoclkc_probe, @@ -295,4 +297,5 @@ static struct platform_driver gxbb_aoclkc_driver = { .of_match_table = gxbb_aoclkc_match_table, }, }; -builtin_platform_driver(gxbb_aoclkc_driver); +module_platform_driver(gxbb_aoclkc_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 0a68af6eec3dd..d6eed760327d0 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "gxbb.h" #include "clk-regmap.h" @@ -3519,6 +3520,7 @@ static const struct of_device_id clkc_match_table[] = { { .compatible = "amlogic,gxl-clkc", .data = &gxl_clkc_data }, {}, }; +MODULE_DEVICE_TABLE(of, clkc_match_table); static struct platform_driver gxbb_driver = { .probe = meson_eeclkc_probe, @@ -3528,4 +3530,5 @@ static struct platform_driver gxbb_driver = { }, }; -builtin_platform_driver(gxbb_driver); +module_platform_driver(gxbb_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/meson/meson-aoclk.c b/drivers/clk/meson/meson-aoclk.c index 3a6d84cd66012..27cd2c1f3f612 100644 --- a/drivers/clk/meson/meson-aoclk.c +++ b/drivers/clk/meson/meson-aoclk.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include "meson-aoclk.h" @@ -84,3 +86,5 @@ int meson_aoclkc_probe(struct platform_device *pdev) return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, (void *) data->hw_data); } +EXPORT_SYMBOL_GPL(meson_aoclkc_probe); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/meson/meson-eeclk.c b/drivers/clk/meson/meson-eeclk.c index a7cb1e7aedc46..8d5a5dab955a8 100644 --- a/drivers/clk/meson/meson-eeclk.c +++ b/drivers/clk/meson/meson-eeclk.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "clk-regmap.h" #include "meson-eeclk.h" @@ -54,3 +55,5 @@ int meson_eeclkc_probe(struct platform_device *pdev) return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, data->hw_onecell_data); } +EXPORT_SYMBOL_GPL(meson_eeclkc_probe); +MODULE_LICENSE("GPL v2"); -- GitLab From e44cdff05145b84293e3f424daa17e4f3ce0109c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 19 Nov 2020 17:45:09 +0100 Subject: [PATCH 0247/1894] clk: samsung: Allow compile testing of Exynos, S3C64xx and S5Pv210 So far all Exynos, S3C64xx and S5Pv210 clock units were selected by respective SOC/ARCH Kconfig option. On a kernel built for selected SoCs, this allowed to build only limited set of matching clock drivers. However compile testing was not possible in such case as Makefile object depends on SOC/ARCH option. Add separate Kconfig options for each of them to be able to compile test. Link: https://lore.kernel.org/r/20201119164509.754851-1-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/Kconfig | 67 ++++++++++++++++++++++++++++++++++-- drivers/clk/samsung/Makefile | 22 ++++++------ include/linux/clk/samsung.h | 4 +-- 3 files changed, 78 insertions(+), 15 deletions(-) diff --git a/drivers/clk/samsung/Kconfig b/drivers/clk/samsung/Kconfig index 57d4b3f20417f..9323fcfac6cc8 100644 --- a/drivers/clk/samsung/Kconfig +++ b/drivers/clk/samsung/Kconfig @@ -2,10 +2,73 @@ # Recent Exynos platforms should just select COMMON_CLK_SAMSUNG: config COMMON_CLK_SAMSUNG bool "Samsung Exynos clock controller support" if COMPILE_TEST - # Clocks on ARM64 SoCs (e.g. Exynos5433, Exynos7) are chosen by - # EXYNOS_ARM64_COMMON_CLK to avoid building them on ARMv7: + select S3C64XX_COMMON_CLK if ARM && ARCH_S3C64XX + select S5PV210_COMMON_CLK if ARM && ARCH_S5PV210 + select EXYNOS_3250_COMMON_CLK if ARM && SOC_EXYNOS3250 + select EXYNOS_4_COMMON_CLK if ARM && ARCH_EXYNOS4 + select EXYNOS_5250_COMMON_CLK if ARM && SOC_EXYNOS5250 + select EXYNOS_5260_COMMON_CLK if ARM && SOC_EXYNOS5260 + select EXYNOS_5410_COMMON_CLK if ARM && SOC_EXYNOS5410 + select EXYNOS_5420_COMMON_CLK if ARM && SOC_EXYNOS5420 select EXYNOS_ARM64_COMMON_CLK if ARM64 && ARCH_EXYNOS +config S3C64XX_COMMON_CLK + bool "Samsung S3C64xx clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung S3C64xx SoCs. + Choose Y here only if you build for this SoC. + +config S5PV210_COMMON_CLK + bool "Samsung S5Pv210 clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung S5Pv210 SoCs. + Choose Y here only if you build for this SoC. + +config EXYNOS_3250_COMMON_CLK + bool "Samsung Exynos3250 clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung + Exynos3250 SoCs. Choose Y here only if you build for this SoC. + +config EXYNOS_4_COMMON_CLK + bool "Samsung Exynos4 clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung + Exynos4212 and Exynos4412 SoCs. Choose Y here only if you build for + this SoC. + +config EXYNOS_5250_COMMON_CLK + bool "Samsung Exynos5250 clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung + Exynos5250 SoCs. Choose Y here only if you build for this SoC. + +config EXYNOS_5260_COMMON_CLK + bool "Samsung Exynos5260 clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung + Exynos5260 SoCs. Choose Y here only if you build for this SoC. + +config EXYNOS_5410_COMMON_CLK + bool "Samsung Exynos5410 clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung + Exynos5410 SoCs. Choose Y here only if you build for this SoC. + +config EXYNOS_5420_COMMON_CLK + bool "Samsung Exynos5420 clock controller support" if COMPILE_TEST + depends on COMMON_CLK_SAMSUNG + help + Support for the clock controller present on the Samsung + Exynos5420 SoCs. Choose Y here only if you build for this SoC. + config EXYNOS_ARM64_COMMON_CLK bool "Samsung Exynos ARMv8-family clock controller support" if COMPILE_TEST depends on COMMON_CLK_SAMSUNG diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile index 1a4e6b7879783..bb1433f11c880 100644 --- a/drivers/clk/samsung/Makefile +++ b/drivers/clk/samsung/Makefile @@ -4,15 +4,15 @@ # obj-$(CONFIG_COMMON_CLK) += clk.o clk-pll.o clk-cpu.o -obj-$(CONFIG_SOC_EXYNOS3250) += clk-exynos3250.o -obj-$(CONFIG_ARCH_EXYNOS4) += clk-exynos4.o -obj-$(CONFIG_ARCH_EXYNOS4) += clk-exynos4412-isp.o -obj-$(CONFIG_SOC_EXYNOS5250) += clk-exynos5250.o -obj-$(CONFIG_SOC_EXYNOS5250) += clk-exynos5-subcmu.o -obj-$(CONFIG_SOC_EXYNOS5260) += clk-exynos5260.o -obj-$(CONFIG_SOC_EXYNOS5410) += clk-exynos5410.o -obj-$(CONFIG_SOC_EXYNOS5420) += clk-exynos5420.o -obj-$(CONFIG_SOC_EXYNOS5420) += clk-exynos5-subcmu.o +obj-$(CONFIG_EXYNOS_3250_COMMON_CLK) += clk-exynos3250.o +obj-$(CONFIG_EXYNOS_4_COMMON_CLK) += clk-exynos4.o +obj-$(CONFIG_EXYNOS_4_COMMON_CLK) += clk-exynos4412-isp.o +obj-$(CONFIG_EXYNOS_5250_COMMON_CLK) += clk-exynos5250.o +obj-$(CONFIG_EXYNOS_5250_COMMON_CLK) += clk-exynos5-subcmu.o +obj-$(CONFIG_EXYNOS_5260_COMMON_CLK) += clk-exynos5260.o +obj-$(CONFIG_EXYNOS_5410_COMMON_CLK) += clk-exynos5410.o +obj-$(CONFIG_EXYNOS_5420_COMMON_CLK) += clk-exynos5420.o +obj-$(CONFIG_EXYNOS_5420_COMMON_CLK) += clk-exynos5-subcmu.o obj-$(CONFIG_EXYNOS_ARM64_COMMON_CLK) += clk-exynos5433.o obj-$(CONFIG_EXYNOS_AUDSS_CLK_CON) += clk-exynos-audss.o obj-$(CONFIG_ARCH_EXYNOS) += clk-exynos-clkout.o @@ -21,5 +21,5 @@ obj-$(CONFIG_S3C2410_COMMON_CLK)+= clk-s3c2410.o obj-$(CONFIG_S3C2410_COMMON_DCLK)+= clk-s3c2410-dclk.o obj-$(CONFIG_S3C2412_COMMON_CLK)+= clk-s3c2412.o obj-$(CONFIG_S3C2443_COMMON_CLK)+= clk-s3c2443.o -obj-$(CONFIG_ARCH_S3C64XX) += clk-s3c64xx.o -obj-$(CONFIG_ARCH_S5PV210) += clk-s5pv210.o clk-s5pv210-audss.o +obj-$(CONFIG_S3C64XX_COMMON_CLK) += clk-s3c64xx.o +obj-$(CONFIG_S5PV210_COMMON_CLK) += clk-s5pv210.o clk-s5pv210-audss.o diff --git a/include/linux/clk/samsung.h b/include/linux/clk/samsung.h index 79097e365f7f2..38b7740017123 100644 --- a/include/linux/clk/samsung.h +++ b/include/linux/clk/samsung.h @@ -10,7 +10,7 @@ struct device_node; -#ifdef CONFIG_ARCH_S3C64XX +#ifdef CONFIG_S3C64XX_COMMON_CLK void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, unsigned long xusbxti_f, bool s3c6400, void __iomem *base); @@ -19,7 +19,7 @@ static inline void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, unsigned long xusbxti_f, bool s3c6400, void __iomem *base) { } -#endif /* CONFIG_ARCH_S3C64XX */ +#endif /* CONFIG_S3C64XX_COMMON_CLK */ #ifdef CONFIG_S3C2410_COMMON_CLK void s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, -- GitLab From 44a9e78f9242872c889f176782777fa2ed535650 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 20 Nov 2020 16:57:31 +0100 Subject: [PATCH 0248/1894] clk: samsung: Prevent potential endless loop in the PLL ops The PLL status polling loops in the set_rate callbacks of some PLLs have no timeout detection and may become endless loops when something goes wrong with the PLL. For some PLLs there is already the ktime API based timeout detection, but it will not work in all conditions when .set_rate gets called. In particular, before the clocksource is initialized or when the timekeeping is suspended. This patch adds a common helper with the PLL status bit polling and timeout detection. For conditions where the timekeeping API should not be used a simple readl_relaxed/cpu_relax() busy loop is added with the iterations limit derived from measurements of readl_relaxed() execution time for various PLL types and Exynos SoCs variants. Actual PLL lock time depends on the P divider value, the VCO frequency and a constant PLL type specific LOCK_FACTOR and can be calculated as lock_time = Pdiv * LOCK_FACTOR / VCO_freq For the ktime API use cases a common timeout value of 20 ms is applied for all the PLLs with an assumption that maximum possible value of Pdiv is 64, maximum possible LOCK_FACTOR value is 3000 and minimum VCO frequency is 24 MHz. Link: https://lore.kernel.org/r/20201120155731.26898-1-s.nawrocki@samsung.com Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-pll.c | 147 ++++++++++++++++------------------ 1 file changed, 71 insertions(+), 76 deletions(-) diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c index ac70ad785d8ed..5873a9354b507 100644 --- a/drivers/clk/samsung/clk-pll.c +++ b/drivers/clk/samsung/clk-pll.c @@ -8,14 +8,17 @@ #include #include +#include #include #include +#include #include #include #include "clk.h" #include "clk-pll.h" -#define PLL_TIMEOUT_MS 10 +#define PLL_TIMEOUT_US 20000U +#define PLL_TIMEOUT_LOOPS 1000000U struct samsung_clk_pll { struct clk_hw hw; @@ -63,6 +66,53 @@ static long samsung_pll_round_rate(struct clk_hw *hw, return rate_table[i - 1].rate; } +static bool pll_early_timeout = true; + +static int __init samsung_pll_disable_early_timeout(void) +{ + pll_early_timeout = false; + return 0; +} +arch_initcall(samsung_pll_disable_early_timeout); + +/* Wait until the PLL is locked */ +static int samsung_pll_lock_wait(struct samsung_clk_pll *pll, + unsigned int reg_mask) +{ + int i, ret; + u32 val; + + /* + * This function might be called when the timekeeping API can't be used + * to detect timeouts. One situation is when the clocksource is not yet + * initialized, another when the timekeeping is suspended. udelay() also + * cannot be used when the clocksource is not running on arm64, since + * the current timer is used as cycle counter. So a simple busy loop + * is used here in that special cases. The limit of iterations has been + * derived from experimental measurements of various PLLs on multiple + * Exynos SoC variants. Single register read time was usually in range + * 0.4...1.5 us, never less than 0.4 us. + */ + if (pll_early_timeout || timekeeping_suspended) { + i = PLL_TIMEOUT_LOOPS; + while (i-- > 0) { + if (readl_relaxed(pll->con_reg) & reg_mask) + return 0; + + cpu_relax(); + } + ret = -ETIMEDOUT; + } else { + ret = readl_relaxed_poll_timeout_atomic(pll->con_reg, val, + val & reg_mask, 0, PLL_TIMEOUT_US); + } + + if (ret < 0) + pr_err("Could not lock PLL %s\n", clk_hw_get_name(&pll->hw)); + + return ret; +} + static int samsung_pll3xxx_enable(struct clk_hw *hw) { struct samsung_clk_pll *pll = to_clk_pll(hw); @@ -72,13 +122,7 @@ static int samsung_pll3xxx_enable(struct clk_hw *hw) tmp |= BIT(pll->enable_offs); writel_relaxed(tmp, pll->con_reg); - /* wait lock time */ - do { - cpu_relax(); - tmp = readl_relaxed(pll->con_reg); - } while (!(tmp & BIT(pll->lock_offs))); - - return 0; + return samsung_pll_lock_wait(pll, BIT(pll->lock_offs)); } static void samsung_pll3xxx_disable(struct clk_hw *hw) @@ -240,13 +284,10 @@ static int samsung_pll35xx_set_rate(struct clk_hw *hw, unsigned long drate, (rate->sdiv << PLL35XX_SDIV_SHIFT); writel_relaxed(tmp, pll->con_reg); - /* Wait until the PLL is locked if it is enabled. */ - if (tmp & BIT(pll->enable_offs)) { - do { - cpu_relax(); - tmp = readl_relaxed(pll->con_reg); - } while (!(tmp & BIT(pll->lock_offs))); - } + /* Wait for PLL lock if the PLL is enabled */ + if (tmp & BIT(pll->enable_offs)) + return samsung_pll_lock_wait(pll, BIT(pll->lock_offs)); + return 0; } @@ -318,7 +359,7 @@ static int samsung_pll36xx_set_rate(struct clk_hw *hw, unsigned long drate, unsigned long parent_rate) { struct samsung_clk_pll *pll = to_clk_pll(hw); - u32 tmp, pll_con0, pll_con1; + u32 pll_con0, pll_con1; const struct samsung_pll_rate_table *rate; rate = samsung_get_pll_settings(pll, drate); @@ -356,13 +397,8 @@ static int samsung_pll36xx_set_rate(struct clk_hw *hw, unsigned long drate, pll_con1 |= rate->kdiv << PLL36XX_KDIV_SHIFT; writel_relaxed(pll_con1, pll->con_reg + 4); - /* wait_lock_time */ - if (pll_con0 & BIT(pll->enable_offs)) { - do { - cpu_relax(); - tmp = readl_relaxed(pll->con_reg); - } while (!(tmp & BIT(pll->lock_offs))); - } + if (pll_con0 & BIT(pll->enable_offs)) + return samsung_pll_lock_wait(pll, BIT(pll->lock_offs)); return 0; } @@ -437,7 +473,6 @@ static int samsung_pll45xx_set_rate(struct clk_hw *hw, unsigned long drate, struct samsung_clk_pll *pll = to_clk_pll(hw); const struct samsung_pll_rate_table *rate; u32 con0, con1; - ktime_t start; /* Get required rate settings from table */ rate = samsung_get_pll_settings(pll, drate); @@ -488,21 +523,8 @@ static int samsung_pll45xx_set_rate(struct clk_hw *hw, unsigned long drate, writel_relaxed(con1, pll->con_reg + 0x4); writel_relaxed(con0, pll->con_reg); - /* Wait for locking. */ - start = ktime_get(); - while (!(readl_relaxed(pll->con_reg) & PLL45XX_LOCKED)) { - ktime_t delta = ktime_sub(ktime_get(), start); - - if (ktime_to_ms(delta) > PLL_TIMEOUT_MS) { - pr_err("%s: could not lock PLL %s\n", - __func__, clk_hw_get_name(hw)); - return -EFAULT; - } - - cpu_relax(); - } - - return 0; + /* Wait for PLL lock */ + return samsung_pll_lock_wait(pll, PLL45XX_LOCKED); } static const struct clk_ops samsung_pll45xx_clk_ops = { @@ -588,7 +610,6 @@ static int samsung_pll46xx_set_rate(struct clk_hw *hw, unsigned long drate, struct samsung_clk_pll *pll = to_clk_pll(hw); const struct samsung_pll_rate_table *rate; u32 con0, con1, lock; - ktime_t start; /* Get required rate settings from table */ rate = samsung_get_pll_settings(pll, drate); @@ -647,21 +668,8 @@ static int samsung_pll46xx_set_rate(struct clk_hw *hw, unsigned long drate, writel_relaxed(con0, pll->con_reg); writel_relaxed(con1, pll->con_reg + 0x4); - /* Wait for locking. */ - start = ktime_get(); - while (!(readl_relaxed(pll->con_reg) & PLL46XX_LOCKED)) { - ktime_t delta = ktime_sub(ktime_get(), start); - - if (ktime_to_ms(delta) > PLL_TIMEOUT_MS) { - pr_err("%s: could not lock PLL %s\n", - __func__, clk_hw_get_name(hw)); - return -EFAULT; - } - - cpu_relax(); - } - - return 0; + /* Wait for PLL lock */ + return samsung_pll_lock_wait(pll, PLL46XX_LOCKED); } static const struct clk_ops samsung_pll46xx_clk_ops = { @@ -1035,14 +1043,9 @@ static int samsung_pll2550xx_set_rate(struct clk_hw *hw, unsigned long drate, (rate->sdiv << PLL2550XX_S_SHIFT); writel_relaxed(tmp, pll->con_reg); - /* wait_lock_time */ - do { - cpu_relax(); - tmp = readl_relaxed(pll->con_reg); - } while (!(tmp & (PLL2550XX_LOCK_STAT_MASK - << PLL2550XX_LOCK_STAT_SHIFT))); - - return 0; + /* Wait for PLL lock */ + return samsung_pll_lock_wait(pll, + PLL2550XX_LOCK_STAT_MASK << PLL2550XX_LOCK_STAT_SHIFT); } static const struct clk_ops samsung_pll2550xx_clk_ops = { @@ -1132,13 +1135,9 @@ static int samsung_pll2650x_set_rate(struct clk_hw *hw, unsigned long drate, con1 |= ((rate->kdiv & PLL2650X_K_MASK) << PLL2650X_K_SHIFT); writel_relaxed(con1, pll->con_reg + 4); - do { - cpu_relax(); - con0 = readl_relaxed(pll->con_reg); - } while (!(con0 & (PLL2650X_LOCK_STAT_MASK - << PLL2650X_LOCK_STAT_SHIFT))); - - return 0; + /* Wait for PLL lock */ + return samsung_pll_lock_wait(pll, + PLL2650X_LOCK_STAT_MASK << PLL2650X_LOCK_STAT_SHIFT); } static const struct clk_ops samsung_pll2650x_clk_ops = { @@ -1196,7 +1195,7 @@ static int samsung_pll2650xx_set_rate(struct clk_hw *hw, unsigned long drate, unsigned long parent_rate) { struct samsung_clk_pll *pll = to_clk_pll(hw); - u32 tmp, pll_con0, pll_con2; + u32 pll_con0, pll_con2; const struct samsung_pll_rate_table *rate; rate = samsung_get_pll_settings(pll, drate); @@ -1229,11 +1228,7 @@ static int samsung_pll2650xx_set_rate(struct clk_hw *hw, unsigned long drate, writel_relaxed(pll_con0, pll->con_reg); writel_relaxed(pll_con2, pll->con_reg + 8); - do { - tmp = readl_relaxed(pll->con_reg); - } while (!(tmp & (0x1 << PLL2650XX_PLL_LOCKTIME_SHIFT))); - - return 0; + return samsung_pll_lock_wait(pll, 0x1 << PLL2650XX_PLL_LOCKTIME_SHIFT); } static const struct clk_ops samsung_pll2650xx_clk_ops = { -- GitLab From 9a7e3d7f056831a6193d6d737fb7a26dfdceb04b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 23 Nov 2020 17:43:53 -0800 Subject: [PATCH 0249/1894] ACPI: NFIT: Fix input validation of bus-family Dan reports that smatch thinks userspace can craft an out-of-bound bus family number. However, nd_cmd_clear_to_send() blocks all non-zero values of bus-family since only the kernel can initiate these commands. However, in the speculation path, family is a user controlled array index value so mask it for speculation safety. Also, since the nd_cmd_clear_to_send() safety is non-obvious and possibly may change in the future include input validation as if userspace could get past the nd_cmd_clear_to_send() gatekeeper. Link: http://lore.kernel.org/r/20201111113000.GA1237157@mwanda Reported-by: Dan Carpenter Fixes: 6450ddbd5d8e ("ACPI: NFIT: Define runtime firmware activation commands") Cc: Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index cda7b6c525049..b11b08a606843 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -479,8 +480,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_mask = nd_desc->cmd_mask; if (cmd == ND_CMD_CALL && call_pkg->nd_family) { family = call_pkg->nd_family; - if (!test_bit(family, &nd_desc->bus_family_mask)) + if (family > NVDIMM_BUS_FAMILY_MAX || + !test_bit(family, &nd_desc->bus_family_mask)) return -EINVAL; + family = array_index_nospec(family, + NVDIMM_BUS_FAMILY_MAX + 1); dsm_mask = acpi_desc->family_dsm_mask[family]; guid = to_nfit_bus_uuid(family); } else { -- GitLab From 8d5d3c7a5bdd32044f595575f1aa16cd3bdd93a8 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 19 Nov 2020 10:52:28 -0500 Subject: [PATCH 0250/1894] dt-bindings: clock: Add entry for crypto engine RPMH clock resource Add clock id forc CE clock resource which is required to bring up the crypto engine on sdm845. Reviewed-by: Bjorn Andersson Signed-off-by: Thara Gopinath Link: https://lore.kernel.org/r/20201119155233.3974286-2-thara.gopinath@linaro.org Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/qcom,rpmh.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dt-bindings/clock/qcom,rpmh.h b/include/dt-bindings/clock/qcom,rpmh.h index 2e6c54e65455c..30111c8f7fe9b 100644 --- a/include/dt-bindings/clock/qcom,rpmh.h +++ b/include/dt-bindings/clock/qcom,rpmh.h @@ -21,5 +21,6 @@ #define RPMH_IPA_CLK 12 #define RPMH_LN_BB_CLK1 13 #define RPMH_LN_BB_CLK1_A 14 +#define RPMH_CE_CLK 15 #endif -- GitLab From dba6bc51975b54b0778678d581df5b423a5a0d81 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 19 Nov 2020 10:52:29 -0500 Subject: [PATCH 0251/1894] clk: qcom: rpmh: Add CE clock on sdm845. Qualcomm CE clock resource that is managed by BCM is required by crypto driver to access the core clock. Reviewed-by: Bjorn Andersson Signed-off-by: Thara Gopinath Link: https://lore.kernel.org/r/20201119155233.3974286-3-thara.gopinath@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rpmh.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c index e2c669b08affc..7e2a4a9b9bf6b 100644 --- a/drivers/clk/qcom/clk-rpmh.c +++ b/drivers/clk/qcom/clk-rpmh.c @@ -349,6 +349,7 @@ DEFINE_CLK_RPMH_VRM(sdm845, rf_clk2, rf_clk2_ao, "rfclka2", 1); DEFINE_CLK_RPMH_VRM(sdm845, rf_clk3, rf_clk3_ao, "rfclka3", 1); DEFINE_CLK_RPMH_VRM(sm8150, rf_clk3, rf_clk3_ao, "rfclka3", 1); DEFINE_CLK_RPMH_BCM(sdm845, ipa, "IP0"); +DEFINE_CLK_RPMH_BCM(sdm845, ce, "CE0"); static struct clk_hw *sdm845_rpmh_clocks[] = { [RPMH_CXO_CLK] = &sdm845_bi_tcxo.hw, @@ -364,6 +365,7 @@ static struct clk_hw *sdm845_rpmh_clocks[] = { [RPMH_RF_CLK3] = &sdm845_rf_clk3.hw, [RPMH_RF_CLK3_A] = &sdm845_rf_clk3_ao.hw, [RPMH_IPA_CLK] = &sdm845_ipa.hw, + [RPMH_CE_CLK] = &sdm845_ce.hw, }; static const struct clk_rpmh_desc clk_rpmh_sdm845 = { -- GitLab From 01341fbd0d8d4e717fc1231cdffe00343088ce0b Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Thu, 19 Nov 2020 14:21:25 +0800 Subject: [PATCH 0252/1894] workqueue: Kick a worker based on the actual activation of delayed works In realtime scenario, We do not want to have interference on the isolated cpu cores. but when invoking alloc_workqueue() for percpu wq on the housekeeping cpu, it kick a kworker on the isolated cpu. alloc_workqueue pwq_adjust_max_active wake_up_worker The comment in pwq_adjust_max_active() said: "Need to kick a worker after thawed or an unbound wq's max_active is bumped" So it is unnecessary to kick a kworker for percpu's wq when invoking alloc_workqueue(). this patch only kick a worker based on the actual activation of delayed works. Signed-off-by: Yunfeng Ye Reviewed-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 437935e7a1991..0695c7895c892 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3728,17 +3728,24 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) * is updated and visible. */ if (!freezable || !workqueue_freezing) { + bool kick = false; + pwq->max_active = wq->saved_max_active; while (!list_empty(&pwq->delayed_works) && - pwq->nr_active < pwq->max_active) + pwq->nr_active < pwq->max_active) { pwq_activate_first_delayed(pwq); + kick = true; + } /* * Need to kick a worker after thawed or an unbound wq's - * max_active is bumped. It's a slow path. Do it always. + * max_active is bumped. In realtime scenarios, always kicking a + * worker will cause interference on the isolated cpu cores, so + * let's kick iff work items were activated. */ - wake_up_worker(pwq->pool); + if (kick) + wake_up_worker(pwq->pool); } else { pwq->max_active = 0; } -- GitLab From 58315c96651152b9f438e5e56c910994234e2c7a Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Mon, 9 Nov 2020 16:01:11 +0530 Subject: [PATCH 0253/1894] kernel: cgroup: Mundane spelling fixes throughout the file Few spelling fixes throughout the file. Signed-off-by: Bhaskar Chowdhury Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e41c21819ba08..690f477d537cc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -244,7 +244,7 @@ bool cgroup_ssid_enabled(int ssid) * * The default hierarchy is the v2 interface of cgroup and this function * can be used to test whether a cgroup is on the default hierarchy for - * cases where a subsystem should behave differnetly depending on the + * cases where a subsystem should behave differently depending on the * interface version. * * List of changed behaviors: @@ -262,7 +262,7 @@ bool cgroup_ssid_enabled(int ssid) * "cgroup.procs" instead. * * - "cgroup.procs" is not sorted. pids will be unique unless they got - * recycled inbetween reads. + * recycled in-between reads. * * - "release_agent" and "notify_on_release" are removed. Replacement * notification mechanism will be implemented. @@ -345,7 +345,7 @@ static bool cgroup_is_mixable(struct cgroup *cgrp) return !cgroup_parent(cgrp); } -/* can @cgrp become a thread root? should always be true for a thread root */ +/* can @cgrp become a thread root? Should always be true for a thread root */ static bool cgroup_can_be_thread_root(struct cgroup *cgrp) { /* mixables don't care */ @@ -530,7 +530,7 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, * the root css is returned, so this function always returns a valid css. * * The returned css is not guaranteed to be online, and therefore it is the - * callers responsiblity to tryget a reference for it. + * callers responsibility to try get a reference for it. */ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, struct cgroup_subsys *ss) @@ -702,7 +702,7 @@ EXPORT_SYMBOL_GPL(of_css); ; \ else -/* walk live descendants in preorder */ +/* walk live descendants in pre order */ #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ if (({ lockdep_assert_held(&cgroup_mutex); \ @@ -936,7 +936,7 @@ void put_css_set_locked(struct css_set *cset) WARN_ON_ONCE(!list_empty(&cset->threaded_csets)); - /* This css_set is dead. unlink it and release cgroup and css refs */ + /* This css_set is dead. Unlink it and release cgroup and css refs */ for_each_subsys(ss, ssid) { list_del(&cset->e_cset_node[ssid]); css_put(cset->subsys[ssid]); @@ -1061,7 +1061,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, /* * Build the set of subsystem state objects that we want to see in the - * new css_set. while subsystems can change globally, the entries here + * new css_set. While subsystems can change globally, the entries here * won't change, so no need for locking. */ for_each_subsys(ss, i) { @@ -1151,7 +1151,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, /* * Always add links to the tail of the lists so that the lists are - * in choronological order. + * in chronological order. */ list_move_tail(&link->cset_link, &cgrp->cset_links); list_add_tail(&link->cgrp_link, &cset->cgrp_links); @@ -4137,7 +4137,7 @@ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, * implies that if we observe !CSS_RELEASED on @pos in this RCU * critical section, the one pointed to by its next pointer is * guaranteed to not have finished its RCU grace period even if we - * have dropped rcu_read_lock() inbetween iterations. + * have dropped rcu_read_lock() in-between iterations. * * If @pos has CSS_RELEASED set, its next pointer can't be * dereferenced; however, as each css is given a monotonically @@ -4385,7 +4385,7 @@ static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it) } /** - * css_task_iter_advance_css_set - advance a task itererator to the next css_set + * css_task_iter_advance_css_set - advance a task iterator to the next css_set * @it: the iterator to advance * * Advance @it to the next css_set to walk. @@ -6320,7 +6320,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) * * Find the cgroup at @path on the default hierarchy, increment its * reference count and return it. Returns pointer to the found cgroup on - * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR) + * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR) * if @path points to a non-directory. */ struct cgroup *cgroup_get_from_path(const char *path) -- GitLab From 5a7b5f32c5aa628841502d19a813c633ff6ecbe4 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Fri, 6 Nov 2020 22:47:40 +0800 Subject: [PATCH 0254/1894] cgroup/cgroup.c: replace 'of->kn->priv' with of_cft() we have supplied the inline function: of_cft() in cgroup.h. So replace the direct use 'of->kn->priv' with inline func of_cft(), which is more readable. Signed-off-by: Hui Su Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 690f477d537cc..385f80cf7d0c4 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3657,7 +3657,7 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, static int cgroup_file_open(struct kernfs_open_file *of) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); if (cft->open) return cft->open(of); @@ -3666,7 +3666,7 @@ static int cgroup_file_open(struct kernfs_open_file *of) static void cgroup_file_release(struct kernfs_open_file *of) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); if (cft->release) cft->release(of); @@ -3677,7 +3677,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *cgrp = of->kn->parent->priv; - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; int ret; @@ -3727,7 +3727,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); if (cft->poll) return cft->poll(of, pt); -- GitLab From 75eeaddd57f4a0ac89110547221df8f3757d5a6f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:26 +0800 Subject: [PATCH 0255/1894] perf arm-spe: Refactor printing string to buffer When outputs strings to the decoding buffer with function snprintf(), SPE decoder needs to detects if any error returns from snprintf() and if so needs to directly bail out. If snprintf() returns success, it needs to update buffer pointer and reduce the buffer length so can continue to output the next string into the consequent memory space. This complex logics are spreading in the function arm_spe_pkt_desc() so there has many duplicate codes for handling error detecting, increment buffer pointer and decrement buffer size. To avoid the duplicate code, this patch introduces a new helper function arm_spe_pkt_out_string() which is used to wrap up the complex logics, and it's used by the caller arm_spe_pkt_desc(). This patch moves the variable 'blen' as the function's local variable so allows to remove the unnecessary braces and improve the readability. This patch simplifies the return value for arm_spe_pkt_desc(): '0' means success and other values mean an error has occurred. To realize this, it relies on arm_spe_pkt_out_string()'s parameter 'err', the 'err' is a cumulative value, returns its final value if printing buffer is called for one time or multiple times. Finally, the error is handled in a central place, rather than directly bailing out in switch-cases, it returns error at the end of arm_spe_pkt_desc(). This patch changes the caller arm_spe_dump() to respect the updated return value semantics of arm_spe_pkt_desc(). Suggested-by: Dave Martin Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Reviewed-by: Dave Martin Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 302 +++++++++--------- tools/perf/util/arm-spe.c | 2 +- 2 files changed, 151 insertions(+), 153 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 671a4763fb47b..fbededc1bcd42 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "arm-spe-pkt-decoder.h" @@ -258,192 +259,189 @@ int arm_spe_get_packet(const unsigned char *buf, size_t len, return ret; } +static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, + const char *fmt, ...) +{ + va_list ap; + int ret; + + /* Bail out if any error occurred */ + if (err && *err) + return *err; + + va_start(ap, fmt); + ret = vsnprintf(*buf_p, *blen, fmt, ap); + va_end(ap); + + if (ret < 0) { + if (err && !*err) + *err = ret; + + /* + * A return value of *blen or more means that the output was + * truncated and the buffer is overrun. + */ + } else if ((size_t)ret >= *blen) { + (*buf_p)[*blen - 1] = '\0'; + + /* + * Set *err to 'ret' to avoid overflow if tries to + * fill this buffer sequentially. + */ + if (err && !*err) + *err = ret; + } else { + *buf_p += ret; + *blen -= ret; + } + + return ret; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { - int ret, ns, el, idx = packet->index; + int ns, el, idx = packet->index; unsigned long long payload = packet->payload; const char *name = arm_spe_pkt_name(packet->type); + char *buf_orig = buf; + size_t blen = buf_len; + int err = 0; switch (packet->type) { case ARM_SPE_BAD: case ARM_SPE_PAD: case ARM_SPE_END: - return snprintf(buf, buf_len, "%s", name); - case ARM_SPE_EVENTS: { - size_t blen = buf_len; - - ret = 0; - ret = snprintf(buf, buf_len, "EV"); - buf += ret; - blen -= ret; - if (payload & 0x1) { - ret = snprintf(buf, buf_len, " EXCEPTION-GEN"); - buf += ret; - blen -= ret; - } - if (payload & 0x2) { - ret = snprintf(buf, buf_len, " RETIRED"); - buf += ret; - blen -= ret; - } - if (payload & 0x4) { - ret = snprintf(buf, buf_len, " L1D-ACCESS"); - buf += ret; - blen -= ret; - } - if (payload & 0x8) { - ret = snprintf(buf, buf_len, " L1D-REFILL"); - buf += ret; - blen -= ret; - } - if (payload & 0x10) { - ret = snprintf(buf, buf_len, " TLB-ACCESS"); - buf += ret; - blen -= ret; - } - if (payload & 0x20) { - ret = snprintf(buf, buf_len, " TLB-REFILL"); - buf += ret; - blen -= ret; - } - if (payload & 0x40) { - ret = snprintf(buf, buf_len, " NOT-TAKEN"); - buf += ret; - blen -= ret; - } - if (payload & 0x80) { - ret = snprintf(buf, buf_len, " MISPRED"); - buf += ret; - blen -= ret; - } + arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name); + break; + case ARM_SPE_EVENTS: + arm_spe_pkt_out_string(&err, &buf, &blen, "EV"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED"); + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS"); + if (payload & 0x20) + arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL"); + if (payload & 0x40) + arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN"); + if (payload & 0x80) + arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED"); if (idx > 1) { - if (payload & 0x100) { - ret = snprintf(buf, buf_len, " LLC-ACCESS"); - buf += ret; - blen -= ret; - } - if (payload & 0x200) { - ret = snprintf(buf, buf_len, " LLC-REFILL"); - buf += ret; - blen -= ret; - } - if (payload & 0x400) { - ret = snprintf(buf, buf_len, " REMOTE-ACCESS"); - buf += ret; - blen -= ret; - } + if (payload & 0x100) + arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS"); + if (payload & 0x200) + arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL"); + if (payload & 0x400) + arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS"); } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; - } + break; case ARM_SPE_OP_TYPE: switch (idx) { - case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ? - "COND-SELECT" : "INSN-OTHER"); - case 1: { - size_t blen = buf_len; + case 0: + arm_spe_pkt_out_string(&err, &buf, &blen, + payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &blen, + payload & 0x1 ? "ST" : "LD"); - if (payload & 0x1) - ret = snprintf(buf, buf_len, "ST"); - else - ret = snprintf(buf, buf_len, "LD"); - buf += ret; - blen -= ret; if (payload & 0x2) { - if (payload & 0x4) { - ret = snprintf(buf, buf_len, " AT"); - buf += ret; - blen -= ret; - } - if (payload & 0x8) { - ret = snprintf(buf, buf_len, " EXCL"); - buf += ret; - blen -= ret; - } - if (payload & 0x10) { - ret = snprintf(buf, buf_len, " AR"); - buf += ret; - blen -= ret; - } + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &blen, " AT"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &blen, " AR"); } else if (payload & 0x4) { - ret = snprintf(buf, buf_len, " SIMD-FP"); - buf += ret; - blen -= ret; - } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; - } - case 2: { - size_t blen = buf_len; - - ret = snprintf(buf, buf_len, "B"); - buf += ret; - blen -= ret; - if (payload & 0x1) { - ret = snprintf(buf, buf_len, " COND"); - buf += ret; - blen -= ret; - } - if (payload & 0x2) { - ret = snprintf(buf, buf_len, " IND"); - buf += ret; - blen -= ret; - } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; + arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP"); } - default: return 0; + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, "B"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &blen, " COND"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &blen, " IND"); + + break; + default: + /* Unknown index */ + err = -1; + break; } + break; case ARM_SPE_DATA_SOURCE: case ARM_SPE_TIMESTAMP: - return snprintf(buf, buf_len, "%s %lld", name, payload); + arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload); + break; case ARM_SPE_ADDRESS: switch (idx) { case 0: - case 1: ns = !!(packet->payload & NS_FLAG); + case 1: + ns = !!(packet->payload & NS_FLAG); el = (packet->payload & EL_FLAG) >> 61; payload &= ~(0xffULL << 56); - return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d", + arm_spe_pkt_out_string(&err, &buf, &blen, + "%s 0x%llx el%d ns=%d", (idx == 1) ? "TGT" : "PC", payload, el, ns); - case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload); - case 3: ns = !!(packet->payload & NS_FLAG); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, + "VA 0x%llx", payload); + break; + case 3: + ns = !!(packet->payload & NS_FLAG); payload &= ~(0xffULL << 56); - return snprintf(buf, buf_len, "PA 0x%llx ns=%d", - payload, ns); - default: return 0; + arm_spe_pkt_out_string(&err, &buf, &blen, + "PA 0x%llx ns=%d", payload, ns); + break; + default: + /* Unknown index */ + err = -1; + break; } + break; case ARM_SPE_CONTEXT: - return snprintf(buf, buf_len, "%s 0x%lx el%d", name, - (unsigned long)payload, idx + 1); - case ARM_SPE_COUNTER: { - size_t blen = buf_len; - - ret = snprintf(buf, buf_len, "%s %d ", name, - (unsigned short)payload); - buf += ret; - blen -= ret; + arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d", + name, (unsigned long)payload, idx + 1); + break; + case ARM_SPE_COUNTER: + arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name, + (unsigned short)payload); switch (idx) { - case 0: ret = snprintf(buf, buf_len, "TOT"); break; - case 1: ret = snprintf(buf, buf_len, "ISSUE"); break; - case 2: ret = snprintf(buf, buf_len, "XLAT"); break; - default: ret = 0; + case 0: + arm_spe_pkt_out_string(&err, &buf, &blen, "TOT"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE"); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT"); + break; + default: + break; } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; - } + break; default: + /* Unknown packet type */ + err = -1; break; } - return snprintf(buf, buf_len, "%s 0x%llx (%d)", - name, payload, packet->index); + /* Output raw data if detect any error */ + if (err) { + err = 0; + arm_spe_pkt_out_string(&err, &buf_orig, &buf_len, "%s 0x%llx (%d)", + name, payload, packet->index); + } + + return err; } diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 3882a5360ada4..8901a1656a41d 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -113,7 +113,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, if (ret > 0) { ret = arm_spe_pkt_desc(&packet, desc, ARM_SPE_PKT_DESC_MAX); - if (ret > 0) + if (!ret) color_fprintf(stdout, color, " %s\n", desc); } else { color_fprintf(stdout, color, " Bad packet!\n"); -- GitLab From 11695142e25e957dc3e56c29dc5f9daaf9530b10 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:27 +0800 Subject: [PATCH 0256/1894] perf arm-spe: Refactor packet header parsing The packet header parsing uses the hard coded values and it uses nested if-else statements. To improve the readability, this patch refactors the macros for packet header format so it removes the hard coded values. Furthermore, based on the new mask macros it reduces the nested if-else statements and changes to use the flat conditions checking, this is directive and can easily map to the descriptions in ARMv8-a architecture reference manual (ARM DDI 0487E.a), chapter 'D10.1.5 Statistical Profiling Extension protocol packet headers'. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 92 +++++++++---------- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 20 ++++ 2 files changed, 61 insertions(+), 51 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index fbededc1bcd42..a769fe5a4496e 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -16,28 +16,6 @@ #define NS_FLAG BIT_ULL(63) #define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) -#define SPE_HEADER0_PAD 0x0 -#define SPE_HEADER0_END 0x1 -#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */ -#define SPE_HEADER0_ADDRESS_MASK 0x38 -#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */ -#define SPE_HEADER0_COUNTER_MASK 0x38 -#define SPE_HEADER0_TIMESTAMP 0x71 -#define SPE_HEADER0_TIMESTAMP 0x71 -#define SPE_HEADER0_EVENTS 0x2 -#define SPE_HEADER0_EVENTS_MASK 0xf -#define SPE_HEADER0_SOURCE 0x3 -#define SPE_HEADER0_SOURCE_MASK 0xf -#define SPE_HEADER0_CONTEXT 0x24 -#define SPE_HEADER0_CONTEXT_MASK 0x3c -#define SPE_HEADER0_OP_TYPE 0x8 -#define SPE_HEADER0_OP_TYPE_MASK 0x3c -#define SPE_HEADER1_ALIGNMENT 0x0 -#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */ -#define SPE_HEADER1_ADDRESS_MASK 0xf8 -#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */ -#define SPE_HEADER1_COUNTER_MASK 0xf8 - #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -200,46 +178,58 @@ static int arm_spe_get_addr(const unsigned char *buf, size_t len, static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { - unsigned int byte; + unsigned int hdr; + unsigned char ext_hdr = 0; memset(packet, 0, sizeof(struct arm_spe_pkt)); if (!len) return ARM_SPE_NEED_MORE_BYTES; - byte = buf[0]; - if (byte == SPE_HEADER0_PAD) + hdr = buf[0]; + + if (hdr == SPE_HEADER0_PAD) return arm_spe_get_pad(packet); - else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */ + + if (hdr == SPE_HEADER0_END) /* no timestamp at end of record */ return arm_spe_get_end(packet); - else if (byte & 0xc0 /* 0y11xxxxxx */) { - if (byte & 0x80) { - if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS) - return arm_spe_get_addr(buf, len, 0, packet); - if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER) - return arm_spe_get_counter(buf, len, 0, packet); - } else - if (byte == SPE_HEADER0_TIMESTAMP) - return arm_spe_get_timestamp(buf, len, packet); - else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS) - return arm_spe_get_events(buf, len, packet); - else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE) - return arm_spe_get_data_source(buf, len, packet); - else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT) - return arm_spe_get_context(buf, len, packet); - else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE) - return arm_spe_get_op_type(buf, len, packet); - } else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) { - /* 16-bit header */ - byte = buf[1]; - if (byte == SPE_HEADER1_ALIGNMENT) + + if (hdr == SPE_HEADER0_TIMESTAMP) + return arm_spe_get_timestamp(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_EVENTS) + return arm_spe_get_events(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_SOURCE) + return arm_spe_get_data_source(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_CONTEXT) + return arm_spe_get_context(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_OP_TYPE) + return arm_spe_get_op_type(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) { + /* 16-bit extended format header */ + ext_hdr = 1; + + hdr = buf[1]; + if (hdr == SPE_HEADER1_ALIGNMENT) return arm_spe_get_alignment(buf, len, packet); - else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS) - return arm_spe_get_addr(buf, len, 1, packet); - else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER) - return arm_spe_get_counter(buf, len, 1, packet); } + /* + * The short format header's byte 0 or the extended format header's + * byte 1 has been assigned to 'hdr', which uses the same encoding for + * address packet and counter packet, so don't need to distinguish if + * it's short format or extended format and handle in once. + */ + if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_ADDRESS) + return arm_spe_get_addr(buf, len, ext_hdr, packet); + + if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_COUNTER) + return arm_spe_get_counter(buf, len, ext_hdr, packet); + return ARM_SPE_BAD_PACKET; } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 4c870521b8eb0..129f43405eb14 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -36,6 +36,26 @@ struct arm_spe_pkt { uint64_t payload; }; +/* Short header (HEADER0) and extended header (HEADER1) */ +#define SPE_HEADER0_PAD 0x0 +#define SPE_HEADER0_END 0x1 +#define SPE_HEADER0_TIMESTAMP 0x71 +/* Mask for event & data source */ +#define SPE_HEADER0_MASK1 (GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0)) +#define SPE_HEADER0_EVENTS 0x42 +#define SPE_HEADER0_SOURCE 0x43 +/* Mask for context & operation */ +#define SPE_HEADER0_MASK2 GENMASK_ULL(7, 2) +#define SPE_HEADER0_CONTEXT 0x64 +#define SPE_HEADER0_OP_TYPE 0x48 +/* Mask for extended format */ +#define SPE_HEADER0_EXTENDED 0x20 +/* Mask for address & counter */ +#define SPE_HEADER0_MASK3 GENMASK_ULL(7, 3) +#define SPE_HEADER0_ADDRESS 0xb0 +#define SPE_HEADER0_COUNTER 0x98 +#define SPE_HEADER1_ALIGNMENT 0x0 + #define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) #define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) -- GitLab From ab2aa439e4aaa3ce0fdcfa0f847aed4bf13bf353 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:28 +0800 Subject: [PATCH 0257/1894] perf arm-spe: Add new function arm_spe_pkt_desc_addr() This patch moves out the address parsing code from arm_spe_pkt_desc() and uses the new introduced function arm_spe_pkt_desc_addr() to process address packet. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index a769fe5a4496e..b16d68b40bbdd 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -288,10 +288,46 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, return ret; } +static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + int ns, el, idx = packet->index; + u64 payload = packet->payload; + int err = 0; + + switch (idx) { + case 0: + case 1: + ns = !!(packet->payload & NS_FLAG); + el = (packet->payload & EL_FLAG) >> 61; + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "%s 0x%llx el%d ns=%d", + (idx == 1) ? "TGT" : "PC", payload, el, ns); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "VA 0x%llx", payload); + break; + case 3: + ns = !!(packet->payload & NS_FLAG); + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "PA 0x%llx ns=%d", payload, ns); + break; + default: + /* Unknown index */ + err = -1; + break; + } + + return err; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { - int ns, el, idx = packet->index; + int idx = packet->index; unsigned long long payload = packet->payload; const char *name = arm_spe_pkt_name(packet->type); char *buf_orig = buf; @@ -373,31 +409,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload); break; case ARM_SPE_ADDRESS: - switch (idx) { - case 0: - case 1: - ns = !!(packet->payload & NS_FLAG); - el = (packet->payload & EL_FLAG) >> 61; - payload &= ~(0xffULL << 56); - arm_spe_pkt_out_string(&err, &buf, &blen, - "%s 0x%llx el%d ns=%d", - (idx == 1) ? "TGT" : "PC", payload, el, ns); - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, - "VA 0x%llx", payload); - break; - case 3: - ns = !!(packet->payload & NS_FLAG); - payload &= ~(0xffULL << 56); - arm_spe_pkt_out_string(&err, &buf, &blen, - "PA 0x%llx ns=%d", payload, ns); - break; - default: - /* Unknown index */ - err = -1; - break; - } + err = arm_spe_pkt_desc_addr(packet, buf, buf_len); break; case ARM_SPE_CONTEXT: arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d", -- GitLab From 09935ca7b64cfa379b6ebf2b8cdb3126e09bffab Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:29 +0800 Subject: [PATCH 0258/1894] perf arm-spe: Refactor address packet handling This patch is to refactor address packet handling, it defines macros for address packet's header and payload, these macros are used by decoder and the dump flow. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-decoder.c | 29 ++++++++------- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 26 +++++++------- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 35 ++++++++++++------- 3 files changed, 48 insertions(+), 42 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index cc18a1e8c212b..776b3e6628bbf 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -24,36 +24,35 @@ static u64 arm_spe_calc_ip(int index, u64 payload) { - u8 *addr = (u8 *)&payload; - int ns, el; + u64 ns, el; /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { - ns = addr[7] & SPE_ADDR_PKT_NS; - el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET; + ns = SPE_ADDR_PKT_GET_NS(payload); + el = SPE_ADDR_PKT_GET_EL(payload); + + /* Clean highest byte */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); /* Fill highest byte for EL1 or EL2 (VHE) mode */ if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2)) - addr[7] = 0xff; - /* Clean highest byte for other cases */ - else - addr[7] = 0x0; + payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access virtual address */ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) { + /* Clean tags */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); + /* Fill highest byte if bits [48..55] is 0xff */ - if (addr[6] == 0xff) - addr[7] = 0xff; - /* Otherwise, cleanup tags */ - else - addr[7] = 0x0; + if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL) + payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access physical address */ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) { - /* Cleanup byte 7 */ - addr[7] = 0x0; + /* Clean highest byte */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); } else { pr_err("unsupported address packet index: 0x%x\n", index); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index b16d68b40bbdd..d37c4008adbcc 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,9 +13,6 @@ #include "arm-spe-pkt-decoder.h" -#define NS_FLAG BIT_ULL(63) -#define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) - #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -167,10 +164,11 @@ static int arm_spe_get_addr(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_ADDRESS; + if (ext_hdr) - packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]); else - packet->index = buf[0] & 0x7; + packet->index = SPE_HDR_SHORT_INDEX(buf[0]); return arm_spe_get_payload(buf, len, ext_hdr, packet); } @@ -296,22 +294,22 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, int err = 0; switch (idx) { - case 0: - case 1: - ns = !!(packet->payload & NS_FLAG); - el = (packet->payload & EL_FLAG) >> 61; - payload &= ~(0xffULL << 56); + case SPE_ADDR_PKT_HDR_INDEX_INS: + case SPE_ADDR_PKT_HDR_INDEX_BRANCH: + ns = !!SPE_ADDR_PKT_GET_NS(payload); + el = SPE_ADDR_PKT_GET_EL(payload); + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s 0x%llx el%d ns=%d", (idx == 1) ? "TGT" : "PC", payload, el, ns); break; - case 2: + case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "VA 0x%llx", payload); break; - case 3: - ns = !!(packet->payload & NS_FLAG); - payload &= ~(0xffULL << 56); + case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS: + ns = !!SPE_ADDR_PKT_GET_NS(payload); + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "PA 0x%llx ns=%d", payload, ns); break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 129f43405eb14..f97d6840be3aa 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -56,19 +56,28 @@ struct arm_spe_pkt { #define SPE_HEADER0_COUNTER 0x98 #define SPE_HEADER1_ALIGNMENT 0x0 -#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) -#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) -#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) -#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3) - -#define SPE_ADDR_PKT_NS BIT(7) -#define SPE_ADDR_PKT_CH BIT(6) -#define SPE_ADDR_PKT_EL_OFFSET (5) -#define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET) -#define SPE_ADDR_PKT_EL0 (0) -#define SPE_ADDR_PKT_EL1 (1) -#define SPE_ADDR_PKT_EL2 (2) -#define SPE_ADDR_PKT_EL3 (3) +#define SPE_HDR_SHORT_INDEX(h) ((h) & GENMASK_ULL(2, 0)) +#define SPE_HDR_EXTENDED_INDEX(h0, h1) (((h0) & GENMASK_ULL(1, 0)) << 3 | \ + SPE_HDR_SHORT_INDEX(h1)) + +/* Address packet header */ +#define SPE_ADDR_PKT_HDR_INDEX_INS 0x0 +#define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1 +#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2 +#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3 + +/* Address packet payload */ +#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56 +#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v) ((v) & GENMASK_ULL(55, 0)) +#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v) (((v) & GENMASK_ULL(55, 48)) >> 48) + +#define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63) +#define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61) + +#define SPE_ADDR_PKT_EL0 0 +#define SPE_ADDR_PKT_EL1 1 +#define SPE_ADDR_PKT_EL2 2 +#define SPE_ADDR_PKT_EL3 3 const char *arm_spe_pkt_name(enum arm_spe_pkt_type); -- GitLab From 5513ddaf103c62dd1eabe9403c0a8d9f810492dc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:30 +0800 Subject: [PATCH 0259/1894] perf arm_spe: Fixup top byte for data virtual address To establish a valid address from the address packet payload and finally the address value can be used for parsing data symbol in DSO, current code uses 0xff to replace the tag in the top byte of data virtual address. So far the code only fixups top byte for the memory layouts with 4KB pages, it misses to support memory layouts with 64KB pages. This patch adds the conditions for checking bits [55:52] are 0xf, if detects the pattern it will fill 0xff into the top byte of the address, also adds comment to explain the fixing up. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-decoder.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 776b3e6628bbf..cac2ef79c025b 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -24,7 +24,7 @@ static u64 arm_spe_calc_ip(int index, u64 payload) { - u64 ns, el; + u64 ns, el, val; /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || @@ -45,8 +45,22 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Clean tags */ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); - /* Fill highest byte if bits [48..55] is 0xff */ - if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL) + /* + * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet" + * defines the data virtual address payload format, the top byte + * (bits [63:56]) is assigned as top-byte tag; so we only can + * retrieve address value from bits [55:0]. + * + * According to Documentation/arm64/memory.rst, if detects the + * specific pattern in bits [55:52] of payload which falls in + * the kernel space, should fixup the top byte and this allows + * perf tool to parse DSO symbol for data address correctly. + * + * For this reason, if detects the bits [55:52] is 0xf, will + * fill 0xff into the top byte. + */ + val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload); + if ((val & 0xf0ULL) == 0xf0ULL) payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access physical address */ -- GitLab From 6550149e801a32b1533ed86509af76319cb75eba Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:31 +0800 Subject: [PATCH 0260/1894] perf arm-spe: Refactor context packet handling Minor refactoring to use macro for index mask. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index d37c4008adbcc..978f5551b82c9 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -136,7 +136,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_CONTEXT; - packet->index = buf[0] & 0x3; + packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]); return arm_spe_get_payload(buf, len, 0, packet); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index f97d6840be3aa..9bc876bffd356 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -79,6 +79,9 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_EL2 2 #define SPE_ADDR_PKT_EL3 3 +/* Context packet header */ +#define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- GitLab From c52cfe9872132407eef6d734014d6fd7790146f5 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:32 +0800 Subject: [PATCH 0261/1894] perf arm-spe: Add new function arm_spe_pkt_desc_counter() This patch moves out the counter packet parsing code from arm_spe_pkt_desc() to the new function arm_spe_pkt_desc_counter(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 978f5551b82c9..397ade5ffdebb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -322,6 +322,33 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, return err; } +static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + const char *name = arm_spe_pkt_name(packet->type); + int err = 0; + + arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name, + (unsigned short)payload); + + switch (packet->index) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE"); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT"); + break; + default: + break; + } + + return err; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -414,21 +441,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, name, (unsigned long)payload, idx + 1); break; case ARM_SPE_COUNTER: - arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name, - (unsigned short)payload); - switch (idx) { - case 0: - arm_spe_pkt_out_string(&err, &buf, &blen, "TOT"); - break; - case 1: - arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE"); - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT"); - break; - default: - break; - } + err = arm_spe_pkt_desc_counter(packet, buf, buf_len); break; default: /* Unknown packet type */ -- GitLab From d158aa408f221756f99edb128ef35bfd4d3361d5 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:33 +0800 Subject: [PATCH 0262/1894] perf arm-spe: Refactor counter packet handling This patch defines macros for counter packet header, and uses macros to replace hard code values in functions arm_spe_get_counter() and arm_spe_pkt_desc(). In the function arm_spe_get_counter(), adds a new line for more readable. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-9-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 11 ++++++----- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 5 +++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 397ade5ffdebb..52f4339b1f0cb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -152,10 +152,11 @@ static int arm_spe_get_counter(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_COUNTER; + if (ext_hdr) - packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]); else - packet->index = buf[0] & 0x7; + packet->index = SPE_HDR_SHORT_INDEX(buf[0]); return arm_spe_get_payload(buf, len, ext_hdr, packet); } @@ -333,13 +334,13 @@ static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet, (unsigned short)payload); switch (packet->index) { - case 0: + case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT"); break; - case 1: + case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE"); break; - case 2: + case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT"); break; default: diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 9bc876bffd356..7d8e34e35f05f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -82,6 +82,11 @@ struct arm_spe_pkt { /* Context packet header */ #define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) +/* Counter packet header */ +#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT 0x0 +#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 +#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- GitLab From e66f6d75960220001ce94afe93c981826235c003 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:34 +0800 Subject: [PATCH 0263/1894] perf arm-spe: Add new function arm_spe_pkt_desc_event() This patch moves out the event packet parsing from arm_spe_pkt_desc() to the new function arm_spe_pkt_desc_event(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-10-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 63 +++++++++++-------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 52f4339b1f0cb..da6b9f76739c8 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -287,6 +287,42 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, return ret; } +static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + int err = 0; + + arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED"); + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS"); + if (payload & 0x20) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL"); + if (payload & 0x40) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); + if (payload & 0x80) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); + if (packet->index > 1) { + if (payload & 0x100) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); + if (payload & 0x200) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); + if (payload & 0x400) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); + } + + return err; +} + static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -367,32 +403,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name); break; case ARM_SPE_EVENTS: - arm_spe_pkt_out_string(&err, &buf, &blen, "EV"); - - if (payload & 0x1) - arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN"); - if (payload & 0x2) - arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED"); - if (payload & 0x4) - arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS"); - if (payload & 0x8) - arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL"); - if (payload & 0x10) - arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS"); - if (payload & 0x20) - arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL"); - if (payload & 0x40) - arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN"); - if (payload & 0x80) - arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED"); - if (idx > 1) { - if (payload & 0x100) - arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS"); - if (payload & 0x200) - arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL"); - if (payload & 0x400) - arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS"); - } + err = arm_spe_pkt_desc_event(packet, buf, buf_len); break; case ARM_SPE_OP_TYPE: switch (idx) { -- GitLab From 889d1a675fcfe734f83c459de023a6f0a91a7a0e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:35 +0800 Subject: [PATCH 0264/1894] perf arm-spe: Refactor event type handling Move the enums of event types to arm-spe-pkt-decoder.h, thus function arm_spe_pkt_desc_event() can use them for bitmasks. Suggested-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-11-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-decoder.h | 17 -------------- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 22 +++++++++---------- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 18 +++++++++++++++ 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index a5111a8d43600..24727b8ca7ffa 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -13,23 +13,6 @@ #include "arm-spe-pkt-decoder.h" -enum arm_spe_events { - EV_EXCEPTION_GEN = 0, - EV_RETIRED = 1, - EV_L1D_ACCESS = 2, - EV_L1D_REFILL = 3, - EV_TLB_ACCESS = 4, - EV_TLB_WALK = 5, - EV_NOT_TAKEN = 6, - EV_MISPRED = 7, - EV_LLC_ACCESS = 8, - EV_LLC_MISS = 9, - EV_REMOTE_ACCESS = 10, - EV_ALIGNMENT = 11, - EV_PARTIAL_PREDICATE = 17, - EV_EMPTY_PREDICATE = 18, -}; - enum arm_spe_sample_type { ARM_SPE_L1D_ACCESS = 1 << 0, ARM_SPE_L1D_MISS = 1 << 1, diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index da6b9f76739c8..3f30b29377152 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -295,28 +295,28 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV"); - if (payload & 0x1) + if (payload & BIT(EV_EXCEPTION_GEN)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN"); - if (payload & 0x2) + if (payload & BIT(EV_RETIRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED"); - if (payload & 0x4) + if (payload & BIT(EV_L1D_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS"); - if (payload & 0x8) + if (payload & BIT(EV_L1D_REFILL)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL"); - if (payload & 0x10) + if (payload & BIT(EV_TLB_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS"); - if (payload & 0x20) + if (payload & BIT(EV_TLB_WALK)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL"); - if (payload & 0x40) + if (payload & BIT(EV_NOT_TAKEN)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); - if (payload & 0x80) + if (payload & BIT(EV_MISPRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); if (packet->index > 1) { - if (payload & 0x100) + if (payload & BIT(EV_LLC_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); - if (payload & 0x200) + if (payload & BIT(EV_LLC_MISS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); - if (payload & 0x400) + if (payload & BIT(EV_REMOTE_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 7d8e34e35f05f..42ed4e61ede25 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -87,6 +87,24 @@ struct arm_spe_pkt { #define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 #define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 +/* Event packet payload */ +enum arm_spe_events { + EV_EXCEPTION_GEN = 0, + EV_RETIRED = 1, + EV_L1D_ACCESS = 2, + EV_L1D_REFILL = 3, + EV_TLB_ACCESS = 4, + EV_TLB_WALK = 5, + EV_NOT_TAKEN = 6, + EV_MISPRED = 7, + EV_LLC_ACCESS = 8, + EV_LLC_MISS = 9, + EV_REMOTE_ACCESS = 10, + EV_ALIGNMENT = 11, + EV_PARTIAL_PREDICATE = 17, + EV_EMPTY_PREDICATE = 18, +}; + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- GitLab From 4d0f4ca273aa95bf592b8bad3c619b5766c8ecc7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:36 +0800 Subject: [PATCH 0265/1894] perf arm-spe: Remove size condition checking for events In the Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.6 Events packet", it describes the event bit is valid with specific payload requirement. For example, the Last Level cache access event, the bit is defined as: E[8], byte 1 bit [0], when SZ == 0b01 , when SZ == 0b10 , or when SZ == 0b11 It requires the payload size is at least 2 bytes, when byte 1 (start counting from 0) is valid, E[8] (bit 0 in byte 1) can be used for LLC access event type. For safety, the code checks the condition for payload size firstly, if meet the requirement for payload size, then continue to parse event type. If review function arm_spe_get_payload(), it has used cast, so any bytes beyond the valid size have been set to zeros. For this reason, we don't need to check payload size anymore afterwards when parse events, thus this patch removes payload size conditions. Suggested-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-12-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 9 +++------ .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 14 ++++++-------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index cac2ef79c025b..90d575cee1b90 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -192,16 +192,13 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) if (payload & BIT(EV_TLB_ACCESS)) decoder->record.type |= ARM_SPE_TLB_ACCESS; - if ((idx == 2 || idx == 4 || idx == 8) && - (payload & BIT(EV_LLC_MISS))) + if (payload & BIT(EV_LLC_MISS)) decoder->record.type |= ARM_SPE_LLC_MISS; - if ((idx == 2 || idx == 4 || idx == 8) && - (payload & BIT(EV_LLC_ACCESS))) + if (payload & BIT(EV_LLC_ACCESS)) decoder->record.type |= ARM_SPE_LLC_ACCESS; - if ((idx == 2 || idx == 4 || idx == 8) && - (payload & BIT(EV_REMOTE_ACCESS))) + if (payload & BIT(EV_REMOTE_ACCESS)) decoder->record.type |= ARM_SPE_REMOTE_ACCESS; if (payload & BIT(EV_MISPRED)) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 3f30b29377152..88bcf7e5be768 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -311,14 +311,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); if (payload & BIT(EV_MISPRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); - if (packet->index > 1) { - if (payload & BIT(EV_LLC_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); - if (payload & BIT(EV_LLC_MISS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); - if (payload & BIT(EV_REMOTE_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); - } + if (payload & BIT(EV_LLC_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); + if (payload & BIT(EV_LLC_MISS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); + if (payload & BIT(EV_REMOTE_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); return err; } -- GitLab From 7488ffc4d981e19feddfe36a619051bf6216c7a1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:37 +0800 Subject: [PATCH 0266/1894] perf arm-spe: Add new function arm_spe_pkt_desc_op_type() The operation type packet is complex and contains subclass; the parsing flow causes deep indentation; for more readable, this patch introduces a new function arm_spe_pkt_desc_op_type() which is used for operation type parsing. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-13-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 79 +++++++++++-------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 88bcf7e5be768..d6c060f119b4e 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -321,6 +321,50 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, return err; } +static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + int err = 0; + + switch (packet->index) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + payload & 0x1 ? "ST" : "LD"); + + if (payload & 0x2) { + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); + } else if (payload & 0x4) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); + } + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); + + break; + default: + /* Unknown index */ + err = -1; + break; + } + + return err; +} + static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -404,40 +448,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, err = arm_spe_pkt_desc_event(packet, buf, buf_len); break; case ARM_SPE_OP_TYPE: - switch (idx) { - case 0: - arm_spe_pkt_out_string(&err, &buf, &blen, - payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); - break; - case 1: - arm_spe_pkt_out_string(&err, &buf, &blen, - payload & 0x1 ? "ST" : "LD"); - - if (payload & 0x2) { - if (payload & 0x4) - arm_spe_pkt_out_string(&err, &buf, &blen, " AT"); - if (payload & 0x8) - arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL"); - if (payload & 0x10) - arm_spe_pkt_out_string(&err, &buf, &blen, " AR"); - } else if (payload & 0x4) { - arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP"); - } - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, "B"); - - if (payload & 0x1) - arm_spe_pkt_out_string(&err, &buf, &blen, " COND"); - if (payload & 0x2) - arm_spe_pkt_out_string(&err, &buf, &blen, " IND"); - - break; - default: - /* Unknown index */ - err = -1; - break; - } + err = arm_spe_pkt_desc_op_type(packet, buf, buf_len); break; case ARM_SPE_DATA_SOURCE: case ARM_SPE_TIMESTAMP: -- GitLab From e771218f32f97c0940ae46c23e20d27f3d4c05e3 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:38 +0800 Subject: [PATCH 0267/1894] perf arm-spe: Refactor operation packet handling Defines macros for operation packet header and formats (support sub classes for 'other', 'branch', 'load and store', etc). Uses these macros for operation packet decoding and dumping. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-14-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 26 ++++++++++--------- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 23 ++++++++++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index d6c060f119b4e..1d1354a0eef4a 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -144,7 +144,7 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_OP_TYPE; - packet->index = buf[0] & 0x3; + packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]); return arm_spe_get_payload(buf, len, 0, packet); } @@ -328,31 +328,33 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, int err = 0; switch (packet->index) { - case 0: + case SPE_OP_PKT_HDR_CLASS_OTHER: arm_spe_pkt_out_string(&err, &buf, &buf_len, - payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); break; - case 1: + case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: arm_spe_pkt_out_string(&err, &buf, &buf_len, payload & 0x1 ? "ST" : "LD"); - if (payload & 0x2) { - if (payload & 0x4) + if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) { + if (payload & SPE_OP_PKT_AT) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); - if (payload & 0x8) + if (payload & SPE_OP_PKT_EXCL) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); - if (payload & 0x10) + if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } else if (payload & 0x4) { + } else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) == + SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); } break; - case 2: + case SPE_OP_PKT_HDR_CLASS_BR_ERET: arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); - if (payload & 0x1) + if (payload & SPE_OP_PKT_COND) arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); - if (payload & 0x2) + + if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 42ed4e61ede25..7032fc141ad4b 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -105,6 +105,29 @@ enum arm_spe_events { EV_EMPTY_PREDICATE = 18, }; +/* Operation packet header */ +#define SPE_OP_PKT_HDR_CLASS(h) ((h) & GENMASK_ULL(1, 0)) +#define SPE_OP_PKT_HDR_CLASS_OTHER 0x0 +#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 +#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 + +#define SPE_OP_PKT_COND BIT(0) + +#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) +#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 +#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 +#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 +#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 + +#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) + +#define SPE_OP_PKT_AR BIT(4) +#define SPE_OP_PKT_EXCL BIT(3) +#define SPE_OP_PKT_AT BIT(2) +#define SPE_OP_PKT_ST BIT(0) + +#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- GitLab From 3d829724b16c5d2de42e6c9601c696c93a10bc61 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:39 +0800 Subject: [PATCH 0268/1894] perf arm-spe: Add more sub classes for operation packet For the operation type packet payload with load/store class, it misses to support these sub classes: - A load/store targeting the general-purpose registers; - A load/store targeting unspecified registers; - The ARMv8.4 nested virtualisation extension can redirect system register accesses to a memory page controlled by the hypervisor. The SPE profiling feature in newer implementations can tag those memory accesses accordingly. Add the bit pattern describing load/store sub classes, so that the perf tool can decode it properly. Inspired by Andre Przywara, refined the commit log and code for more clear description. Co-developed-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-15-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 1d1354a0eef4a..84d661aab54fc 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -343,9 +343,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) == - SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) { + } + + switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) { + case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP: arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_GP_REG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG"); + break; + default: + break; } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: -- GitLab From 3601e605501df289db149785e1e6a8d16e557d31 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 19 Nov 2020 23:24:40 +0800 Subject: [PATCH 0269/1894] perf arm_spe: Decode memory tagging properties When SPE records a physical address, it can additionally tag the event with information from the Memory Tagging architecture extension. Decode the two additional fields in the SPE event payload. [leoy: Refined patch to use predefined macros] Signed-off-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Dave Martin Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-16-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 84d661aab54fc..57c01ce279154 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -385,6 +385,7 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { int ns, el, idx = packet->index; + int ch, pat; u64 payload = packet->payload; int err = 0; @@ -404,9 +405,12 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, break; case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS: ns = !!SPE_ADDR_PKT_GET_NS(payload); + ch = !!SPE_ADDR_PKT_GET_CH(payload); + pat = SPE_ADDR_PKT_GET_PAT(payload); payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, - "PA 0x%llx ns=%d", payload, ns); + "PA 0x%llx ns=%d ch=%d pat=%x", + payload, ns, ch, pat); break; default: /* Unknown index */ diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 7032fc141ad4b..1ad14885c2a19 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -73,6 +73,8 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63) #define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61) +#define SPE_ADDR_PKT_GET_CH(v) (((v) & BIT_ULL(62)) >> 62) +#define SPE_ADDR_PKT_GET_PAT(v) (((v) & GENMASK_ULL(59, 56)) >> 56) #define SPE_ADDR_PKT_EL0 0 #define SPE_ADDR_PKT_EL1 1 -- GitLab From 05e91e7fe26c6fb116fa16f43c1eed78020f9463 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 19 Nov 2020 23:24:41 +0800 Subject: [PATCH 0270/1894] perf arm-spe: Add support for ARMv8.3-SPE This patch is to support Armv8.3 extension for SPE, it adds alignment field in the Events packet and it supports the Scalable Vector Extension (SVE) for Operation packet and Events packet with two additions: - The vector length for SVE operations in the Operation Type packet; - The incomplete predicate and empty predicate fields in the Events packet. Signed-off-by: Wei Li Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20201119152441.6972-17-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 36 +++++++++++++++++-- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 16 +++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 57c01ce279154..f3ac9d40cebf4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -317,6 +317,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); if (payload & BIT(EV_REMOTE_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); + if (payload & BIT(EV_ALIGNMENT)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT"); + if (payload & BIT(EV_PARTIAL_PREDICATE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED"); + if (payload & BIT(EV_EMPTY_PREDICATE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED"); return err; } @@ -329,8 +335,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, switch (packet->index) { case SPE_OP_PKT_HDR_CLASS_OTHER: - arm_spe_pkt_out_string(&err, &buf, &buf_len, - payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); + if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER"); + + /* SVE effective vector length */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", + SPE_OP_PKG_SVE_EVL(payload)); + + if (payload & SPE_OP_PKT_SVE_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); + if (payload & SPE_OP_PKT_SVE_PRED) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); + } else { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER"); + arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s", + payload & SPE_OP_PKT_COND ? + "COND-SELECT" : "INSN-OTHER"); + } break; case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: arm_spe_pkt_out_string(&err, &buf, &buf_len, @@ -361,6 +382,17 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, default: break; } + + if (SPE_OP_PKT_IS_LDST_SVE(payload)) { + /* SVE effective vector length */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", + SPE_OP_PKG_SVE_EVL(payload)); + + if (payload & SPE_OP_PKT_SVE_PRED) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); + if (payload & SPE_OP_PKT_SVE_SG) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG"); + } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 1ad14885c2a19..9b970e7bf1e21 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -113,6 +113,8 @@ enum arm_spe_events { #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 +#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) + #define SPE_OP_PKT_COND BIT(0) #define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) @@ -128,6 +130,20 @@ enum arm_spe_events { #define SPE_OP_PKT_AT BIT(2) #define SPE_OP_PKT_ST BIT(0) +#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) + +#define SPE_OP_PKT_SVE_SG BIT(7) +/* + * SVE effective vector length (EVL) is stored in byte 0 bits [6:4]; + * the length is rounded up to a power of two and use 32 as one step, + * so EVL calculation is: + * + * 32 * (2 ^ bits [6:4]) = 32 << (bits [6:4]) + */ +#define SPE_OP_PKG_SVE_EVL(v) (32 << (((v) & GENMASK_ULL(6, 4)) >> 4)) +#define SPE_OP_PKT_SVE_PRED BIT(2) +#define SPE_OP_PKT_SVE_FP BIT(1) + #define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) const char *arm_spe_pkt_name(enum arm_spe_pkt_type); -- GitLab From 312489790cc6c760f8b7795b8f1ded45bafc318c Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 26 Nov 2020 15:15:59 +0100 Subject: [PATCH 0271/1894] dt-bindings: clk: g12a-clkc: add DSI Pixel clock bindings This adds the MIPI DSI Host Pixel Clock bindings. Signed-off-by: Neil Armstrong Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201126141600.2084586-2-narmstrong@baylibre.com --- include/dt-bindings/clock/g12a-clkc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h index 40d49940d8a8d..a93b58c5e18ec 100644 --- a/include/dt-bindings/clock/g12a-clkc.h +++ b/include/dt-bindings/clock/g12a-clkc.h @@ -147,5 +147,7 @@ #define CLKID_SPICC1_SCLK 261 #define CLKID_NNA_AXI_CLK 264 #define CLKID_NNA_CORE_CLK 267 +#define CLKID_MIPI_DSI_PXCLK_SEL 269 +#define CLKID_MIPI_DSI_PXCLK 270 #endif /* __G12A_CLKC_H */ -- GitLab From 88b9ae600138baff18c7f4c4870622584acc6111 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 26 Nov 2020 15:16:00 +0100 Subject: [PATCH 0272/1894] clk: meson: g12a: add MIPI DSI Host Pixel Clock This adds the MIPI DSI Host Pixel Clock, unlike AXG, the pixel clock can be different from the VPU ENCL output clock to feed the DSI Host controller with a different clock rate. Signed-off-by: Neil Armstrong Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201126141600.2084586-3-narmstrong@baylibre.com --- drivers/clk/meson/g12a.c | 74 ++++++++++++++++++++++++++++++++++++++++ drivers/clk/meson/g12a.h | 3 +- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 3cb8196c8e29c..b080359b4645e 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -3658,6 +3658,68 @@ static struct clk_regmap g12a_hdmi_tx = { }, }; +/* MIPI DSI Host Clocks */ + +static const struct clk_hw *g12a_mipi_dsi_pxclk_parent_hws[] = { + &g12a_vid_pll.hw, + &g12a_gp0_pll.hw, + &g12a_hifi_pll.hw, + &g12a_mpll1.hw, + &g12a_fclk_div2.hw, + &g12a_fclk_div2p5.hw, + &g12a_fclk_div3.hw, + &g12a_fclk_div7.hw, +}; + +static struct clk_regmap g12a_mipi_dsi_pxclk_sel = { + .data = &(struct clk_regmap_mux_data){ + .offset = HHI_MIPIDSI_PHY_CLK_CNTL, + .mask = 0x7, + .shift = 12, + .flags = CLK_MUX_ROUND_CLOSEST, + }, + .hw.init = &(struct clk_init_data){ + .name = "mipi_dsi_pxclk_sel", + .ops = &clk_regmap_mux_ops, + .parent_hws = g12a_mipi_dsi_pxclk_parent_hws, + .num_parents = ARRAY_SIZE(g12a_mipi_dsi_pxclk_parent_hws), + .flags = CLK_SET_RATE_NO_REPARENT, + }, +}; + +static struct clk_regmap g12a_mipi_dsi_pxclk_div = { + .data = &(struct clk_regmap_div_data){ + .offset = HHI_MIPIDSI_PHY_CLK_CNTL, + .shift = 0, + .width = 7, + }, + .hw.init = &(struct clk_init_data){ + .name = "mipi_dsi_pxclk_div", + .ops = &clk_regmap_divider_ops, + .parent_hws = (const struct clk_hw *[]) { + &g12a_mipi_dsi_pxclk_sel.hw + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + +static struct clk_regmap g12a_mipi_dsi_pxclk = { + .data = &(struct clk_regmap_gate_data){ + .offset = HHI_MIPIDSI_PHY_CLK_CNTL, + .bit_idx = 8, + }, + .hw.init = &(struct clk_init_data) { + .name = "mipi_dsi_pxclk", + .ops = &clk_regmap_gate_ops, + .parent_hws = (const struct clk_hw *[]) { + &g12a_mipi_dsi_pxclk_div.hw + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + }, +}; + /* HDMI Clocks */ static const struct clk_parent_data g12a_hdmi_parent_data[] = { @@ -4403,6 +4465,9 @@ static struct clk_hw_onecell_data g12a_hw_onecell_data = { [CLKID_SPICC1_SCLK_SEL] = &g12a_spicc1_sclk_sel.hw, [CLKID_SPICC1_SCLK_DIV] = &g12a_spicc1_sclk_div.hw, [CLKID_SPICC1_SCLK] = &g12a_spicc1_sclk.hw, + [CLKID_MIPI_DSI_PXCLK_SEL] = &g12a_mipi_dsi_pxclk_sel.hw, + [CLKID_MIPI_DSI_PXCLK_DIV] = &g12a_mipi_dsi_pxclk_div.hw, + [CLKID_MIPI_DSI_PXCLK] = &g12a_mipi_dsi_pxclk.hw, [NR_CLKS] = NULL, }, .num = NR_CLKS, @@ -4658,6 +4723,9 @@ static struct clk_hw_onecell_data g12b_hw_onecell_data = { [CLKID_SPICC1_SCLK_SEL] = &g12a_spicc1_sclk_sel.hw, [CLKID_SPICC1_SCLK_DIV] = &g12a_spicc1_sclk_div.hw, [CLKID_SPICC1_SCLK] = &g12a_spicc1_sclk.hw, + [CLKID_MIPI_DSI_PXCLK_SEL] = &g12a_mipi_dsi_pxclk_sel.hw, + [CLKID_MIPI_DSI_PXCLK_DIV] = &g12a_mipi_dsi_pxclk_div.hw, + [CLKID_MIPI_DSI_PXCLK] = &g12a_mipi_dsi_pxclk.hw, [NR_CLKS] = NULL, }, .num = NR_CLKS, @@ -4904,6 +4972,9 @@ static struct clk_hw_onecell_data sm1_hw_onecell_data = { [CLKID_NNA_CORE_CLK_SEL] = &sm1_nna_core_clk_sel.hw, [CLKID_NNA_CORE_CLK_DIV] = &sm1_nna_core_clk_div.hw, [CLKID_NNA_CORE_CLK] = &sm1_nna_core_clk.hw, + [CLKID_MIPI_DSI_PXCLK_SEL] = &g12a_mipi_dsi_pxclk_sel.hw, + [CLKID_MIPI_DSI_PXCLK_DIV] = &g12a_mipi_dsi_pxclk_div.hw, + [CLKID_MIPI_DSI_PXCLK] = &g12a_mipi_dsi_pxclk.hw, [NR_CLKS] = NULL, }, .num = NR_CLKS, @@ -5151,6 +5222,9 @@ static struct clk_regmap *const g12a_clk_regmaps[] = { &sm1_nna_core_clk_sel, &sm1_nna_core_clk_div, &sm1_nna_core_clk, + &g12a_mipi_dsi_pxclk_sel, + &g12a_mipi_dsi_pxclk_div, + &g12a_mipi_dsi_pxclk, }; static const struct reg_sequence g12a_init_regs[] = { diff --git a/drivers/clk/meson/g12a.h b/drivers/clk/meson/g12a.h index 69b6a69549c77..a97613df38b34 100644 --- a/drivers/clk/meson/g12a.h +++ b/drivers/clk/meson/g12a.h @@ -264,8 +264,9 @@ #define CLKID_NNA_AXI_CLK_DIV 263 #define CLKID_NNA_CORE_CLK_SEL 265 #define CLKID_NNA_CORE_CLK_DIV 266 +#define CLKID_MIPI_DSI_PXCLK_DIV 268 -#define NR_CLKS 268 +#define NR_CLKS 271 /* include the CLKIDs that have been made part of the DT binding */ #include -- GitLab From a886c310d9fcb0e66253d4af225cba13f9bdf5d2 Mon Sep 17 00:00:00 2001 From: Sivaram Nair Date: Wed, 21 Oct 2020 13:10:54 +0300 Subject: [PATCH 0273/1894] clk: tegra: bpmp: Clamp clock rates on requests BPMP firmware ABI expects the rate inputs in int64_t. However, tegra_bpmp_clk_round_rate() and tegra_bpmp_clk_set_rate() functions directly assign 'unsigned long' inputs to a int64_t value causing unexpected rounding errors. Fix this by clipping the input rate to S64_MAX. Signed-off-by: Sivaram Nair [mperttunen: slight cleanup] Signed-off-by: Mikko Perttunen Reviewed-by: Sivaram Nair Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-bpmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/tegra/clk-bpmp.c b/drivers/clk/tegra/clk-bpmp.c index a66263b6490d3..6ecf18f71c329 100644 --- a/drivers/clk/tegra/clk-bpmp.c +++ b/drivers/clk/tegra/clk-bpmp.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2016 NVIDIA Corporation + * Copyright (C) 2016-2020 NVIDIA Corporation */ #include @@ -174,7 +174,7 @@ static long tegra_bpmp_clk_round_rate(struct clk_hw *hw, unsigned long rate, int err; memset(&request, 0, sizeof(request)); - request.rate = rate; + request.rate = min_t(u64, rate, S64_MAX); memset(&msg, 0, sizeof(msg)); msg.cmd = CMD_CLK_ROUND_RATE; @@ -256,7 +256,7 @@ static int tegra_bpmp_clk_set_rate(struct clk_hw *hw, unsigned long rate, struct tegra_bpmp_clk_message msg; memset(&request, 0, sizeof(request)); - request.rate = rate; + request.rate = min_t(u64, rate, S64_MAX); memset(&msg, 0, sizeof(msg)); msg.cmd = CMD_CLK_SET_RATE; -- GitLab From 8d8d53cf8fd028310b1189165b939cde124895d7 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Oct 2020 12:52:40 +1100 Subject: [PATCH 0274/1894] dma-mapping: Allow mixing bypass and mapped DMA operation At the moment we allow bypassing DMA ops only when we can do this for the entire RAM. However there are configs with mixed type memory where we could still allow bypassing IOMMU in most cases; POWERPC with persistent memory is one example. This adds an arch hook to determine where bypass can still work and we invoke direct DMA API. The following patch checks the bus limit on POWERPC to allow or disallow direct mapping. This adds a ARCH_HAS_DMA_MAP_DIRECT config option to make the arch_xxxx hooks no-op by default. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 14 ++++++++++++++ kernel/dma/Kconfig | 4 ++++ kernel/dma/mapping.c | 12 ++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index a5f89fc4d6df1..38c8a4558e08e 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -314,6 +314,20 @@ static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); +#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT +bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr); +bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle); +bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +#else +#define arch_dma_map_page_direct(d, a) (false) +#define arch_dma_unmap_page_direct(d, a) (false) +#define arch_dma_map_sg_direct(d, s, n) (false) +#define arch_dma_unmap_sg_direct(d, s, n) (false) +#endif + #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c99de4a214588..43d106598e82f 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -20,6 +20,10 @@ config DMA_OPS config DMA_OPS_BYPASS bool +# Lets platform IOMMU driver choose between bypass and IOMMU +config ARCH_HAS_DMA_MAP_DIRECT + bool + config NEED_SG_DMA_LENGTH bool diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 51bb8fa8eb894..f87a89d086544 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -149,7 +149,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); else addr = ops->map_page(dev, page, offset, size, dir, attrs); @@ -165,7 +166,8 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); else if (ops->unmap_page) ops->unmap_page(dev, addr, size, dir, attrs); @@ -188,7 +190,8 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, if (WARN_ON_ONCE(!dev->dma_mask)) return 0; - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); else ents = ops->map_sg(dev, sg, nents, dir, attrs); @@ -207,7 +210,8 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); -- GitLab From bf6e2d562bbc4d115cf322b0bca57fe5bbd26f48 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Oct 2020 12:52:41 +1100 Subject: [PATCH 0275/1894] powerpc/dma: Fallback to dma_ops when persistent memory present So far we have been using huge DMA windows to map all the RAM available. The RAM is normally mapped to the VM address space contiguously, and there is always a reasonable upper limit for possible future hot plugged RAM which makes it easy to map all RAM via IOMMU. Now there is persistent memory ("ibm,pmemory" in the FDT) which (unlike normal RAM) can map anywhere in the VM space beyond the maximum RAM size and since it can be used for DMA, it requires extending the huge window up to MAX_PHYSMEM_BITS which requires hypervisor support for: 1. huge TCE tables; 2. multilevel TCE tables; 3. huge IOMMU pages. Certain hypervisors cannot do either so the only option left is restricting the huge DMA window to include only RAM and fallback to the default DMA window for persistent memory. This defines arch_dma_map_direct/etc to allow generic DMA code perform additional checks on whether direct DMA is still possible. This checks if the system has persistent memory. If it does not, the DMA bypass mode is selected, i.e. * dev->bus_dma_limit = 0 * dev->dma_ops_bypass = true <- this avoid calling dma_ops for mapping. If there is such memory, this creates identity mapping only for RAM and sets the dev->bus_dma_limit to let the generic code decide whether to call into the direct DMA or the indirect DMA ops. This should not change the existing behaviour when no persistent memory as dev->dma_ops_bypass is expected to be set. Signed-off-by: Alexey Kardashevskiy Acked-by: Michael Ellerman Signed-off-by: Christoph Hellwig --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/dma-iommu.c | 71 +++++++++++++++++++++++++- arch/powerpc/platforms/pseries/iommu.c | 51 ++++++++++++++---- 3 files changed, 111 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe084929..b2d4580acf79a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -159,6 +159,7 @@ config PPC select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DMA_OPS if PPC64 select DMA_OPS_BYPASS if PPC64 + select ARCH_HAS_DMA_MAP_DIRECT if PPC64 && PPC_PSERIES select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index a1c7441940184..111249fd619de 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -10,6 +10,63 @@ #include #include +#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT +#define can_map_direct(dev, addr) \ + ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr))) + +bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr) +{ + if (likely(!dev->bus_dma_limit)) + return false; + + return can_map_direct(dev, addr); +} + +#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset) + +bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle) +{ + if (likely(!dev->bus_dma_limit)) + return false; + + return is_direct_handle(dev, dma_handle); +} + +bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, + int nents) +{ + struct scatterlist *s; + int i; + + if (likely(!dev->bus_dma_limit)) + return false; + + for_each_sg(sg, s, nents, i) { + if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length)) + return false; + } + + return true; +} + +bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, + int nents) +{ + struct scatterlist *s; + int i; + + if (likely(!dev->bus_dma_limit)) + return false; + + for_each_sg(sg, s, nents, i) { + if (!is_direct_handle(dev, s->dma_address + s->length)) + return false; + } + + return true; +} +#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */ + /* * Generic iommu implementation */ @@ -90,8 +147,18 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) struct iommu_table *tbl = get_iommu_table_base(dev); if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { - dev->dma_ops_bypass = true; - dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + /* + * dma_iommu_bypass_supported() sets dma_max when there is + * 1:1 mapping but it is somehow limited. + * ibm,pmemory is one example. + */ + dev->dma_ops_bypass = dev->bus_dma_limit == 0; + if (!dev->dma_ops_bypass) + dev_warn(dev, + "iommu: 64-bit OK but direct DMA is limited by %llx\n", + dev->bus_dma_limit); + else + dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); return 1; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e4198700ed1a3..9fc5217f0c8e5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -839,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop) np, ret); } -static u64 find_existing_ddw(struct device_node *pdn) +static u64 find_existing_ddw(struct device_node *pdn, int *window_shift) { struct direct_window *window; const struct dynamic_dma_window_prop *direct64; @@ -851,6 +851,7 @@ static u64 find_existing_ddw(struct device_node *pdn) if (window->device == pdn) { direct64 = window->prop; dma_addr = be64_to_cpu(direct64->dma_base); + *window_shift = be32_to_cpu(direct64->window_shift); break; } } @@ -1111,11 +1112,12 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) */ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) { - int len, ret; + int len = 0, ret; + int max_ram_len = order_base_2(ddw_memory_hotplug_max()); struct ddw_query_response query; struct ddw_create_response create; int page_shift; - u64 dma_addr, max_addr; + u64 dma_addr; struct device_node *dn; u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct direct_window *window; @@ -1123,10 +1125,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct dynamic_dma_window_prop *ddwprop; struct failed_ddw_pdn *fpdn; bool default_win_removed = false; + bool pmem_present; + + dn = of_find_node_by_type(NULL, "ibm,pmemory"); + pmem_present = dn != NULL; + of_node_put(dn); mutex_lock(&direct_window_init_mutex); - dma_addr = find_existing_ddw(pdn); + dma_addr = find_existing_ddw(pdn, &len); if (dma_addr != 0) goto out_unlock; @@ -1212,14 +1219,29 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) } /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ - max_addr = ddw_memory_hotplug_max(); - if (query.largest_available_block < (max_addr >> page_shift)) { - dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu " - "%llu-sized pages\n", max_addr, query.largest_available_block, - 1ULL << page_shift); + /* + * The "ibm,pmemory" can appear anywhere in the address space. + * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS + * for the upper limit and fallback to max RAM otherwise but this + * disables device::dma_ops_bypass. + */ + len = max_ram_len; + if (pmem_present) { + if (query.largest_available_block >= + (1ULL << (MAX_PHYSMEM_BITS - page_shift))) + len = MAX_PHYSMEM_BITS - page_shift; + else + dev_info(&dev->dev, "Skipping ibm,pmemory"); + } + + if (query.largest_available_block < (1ULL << (len - page_shift))) { + dev_dbg(&dev->dev, + "can't map partition max 0x%llx with %llu %llu-sized pages\n", + 1ULL << len, + query.largest_available_block, + 1ULL << page_shift); goto out_failed; } - len = order_base_2(max_addr); win64 = kzalloc(sizeof(struct property), GFP_KERNEL); if (!win64) { dev_info(&dev->dev, @@ -1299,6 +1321,15 @@ out_failed: out_unlock: mutex_unlock(&direct_window_init_mutex); + + /* + * If we have persistent memory and the window size is only as big + * as RAM, then we failed to create a window to cover persistent + * memory and need to set the DMA limit. + */ + if (pmem_present && dma_addr && (len == max_ram_len)) + dev->dev.bus_dma_limit = dma_addr + (1ULL << len); + return dma_addr; } -- GitLab From 94035edcb4e3bbc9f445bee706722ef64e044095 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 7 Nov 2020 18:03:12 +0800 Subject: [PATCH 0276/1894] dma-pool: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Signed-off-by: Tiezhu Yang Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/pool.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index d4637f72239b4..5f84e6cdb78ea 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -38,9 +38,6 @@ static void __init dma_atomic_pool_debugfs_init(void) struct dentry *root; root = debugfs_create_dir("dma_pools", NULL); - if (IS_ERR_OR_NULL(root)) - return; - debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma); debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32); debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel); -- GitLab From 819b70ad620119d21a9e4be6ad665ece26fc0db8 Mon Sep 17 00:00:00 2001 From: tangjianqiang Date: Tue, 24 Nov 2020 18:40:19 +0800 Subject: [PATCH 0277/1894] dma-contiguous: fix a typo error in a comment Fix a typo error in cma description comment: "then" -> "than". Signed-off-by: tangjianqiang Signed-off-by: Christoph Hellwig --- kernel/dma/contiguous.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 16b95ff12e4df..3d63d91cba5cf 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -20,7 +20,7 @@ * coders, etc. * * Such devices often require big memory buffers (a full HD frame - * is, for instance, more then 2 mega pixels large, i.e. more than 6 + * is, for instance, more than 2 mega pixels large, i.e. more than 6 * MB of memory), which makes mechanisms such as kmalloc() or * alloc_page() ineffective. * -- GitLab From 65789daa8087e125927230ccb7e1eab13999b0cf Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 16 Nov 2020 19:08:47 +1300 Subject: [PATCH 0278/1894] dma-mapping: add benchmark support for streaming DMA APIs Nowadays, there are increasing requirements to benchmark the performance of dma_map and dma_unmap particually while the device is attached to an IOMMU. This patch enables the support. Users can run specified number of threads to do dma_map_page and dma_unmap_page on a specific NUMA node with the specified duration. Then dma_map_benchmark will calculate the average latency for map and unmap. A difficulity for this benchmark is that dma_map/unmap APIs must run on a particular device. Each device might have different backend of IOMMU or non-IOMMU. So we use the driver_override to bind dma_map_benchmark to a particual device by: For platform devices: echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override echo xxx > /sys/bus/platform/drivers/xxx/unbind echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind For PCI devices: echo dma_map_benchmark > /sys/bus/pci/devices/0000:00:01.0/driver_override echo 0000:00:01.0 > /sys/bus/pci/drivers/xxx/unbind echo 0000:00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind Cc: Will Deacon Cc: Shuah Khan Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Signed-off-by: Barry Song [hch: folded in two fixes from Colin Ian King ] Signed-off-by: Christoph Hellwig --- kernel/dma/Kconfig | 9 + kernel/dma/Makefile | 1 + kernel/dma/map_benchmark.c | 361 +++++++++++++++++++++++++++++++++++++ 3 files changed, 371 insertions(+) create mode 100644 kernel/dma/map_benchmark.c diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 43d106598e82f..ba50d4588bc5f 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -229,3 +229,12 @@ config DMA_API_DEBUG_SG is technically out-of-spec. If unsure, say N. + +config DMA_MAP_BENCHMARK + bool "Enable benchmarking of streaming DMA mapping" + depends on DEBUG_FS + help + Provides /sys/kernel/debug/dma_map_benchmark that helps with testing + performance of dma_(un)map_page. + + See tools/testing/selftests/dma/dma_map_benchmark.c diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index dc755ab68aabf..7aa6b26b1348e 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o obj-$(CONFIG_DMA_REMAP) += remap.o +obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c new file mode 100644 index 0000000000000..b1496e744c687 --- /dev/null +++ b/kernel/dma/map_benchmark.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Hisilicon Limited. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u64 expansion[10]; /* For future use */ +}; + +struct map_benchmark_data { + struct map_benchmark bparam; + struct device *dev; + struct dentry *debugfs; + enum dma_data_direction dir; + atomic64_t sum_map_100ns; + atomic64_t sum_unmap_100ns; + atomic64_t sum_sq_map; + atomic64_t sum_sq_unmap; + atomic64_t loops; +}; + +static int map_benchmark_thread(void *data) +{ + void *buf; + dma_addr_t dma_addr; + struct map_benchmark_data *map = data; + int ret = 0; + + buf = (void *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + + while (!kthread_should_stop()) { + u64 map_100ns, unmap_100ns, map_sq, unmap_sq; + ktime_t map_stime, map_etime, unmap_stime, unmap_etime; + ktime_t map_delta, unmap_delta; + + /* + * for a non-coherent device, if we don't stain them in the + * cache, this will give an underestimate of the real-world + * overhead of BIDIRECTIONAL or TO_DEVICE mappings; + * 66 means evertything goes well! 66 is lucky. + */ + if (map->dir != DMA_FROM_DEVICE) + memset(buf, 0x66, PAGE_SIZE); + + map_stime = ktime_get(); + dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir); + if (unlikely(dma_mapping_error(map->dev, dma_addr))) { + pr_err("dma_map_single failed on %s\n", + dev_name(map->dev)); + ret = -ENOMEM; + goto out; + } + map_etime = ktime_get(); + map_delta = ktime_sub(map_etime, map_stime); + + unmap_stime = ktime_get(); + dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); + unmap_etime = ktime_get(); + unmap_delta = ktime_sub(unmap_etime, unmap_stime); + + /* calculate sum and sum of squares */ + + map_100ns = div64_ul(map_delta, 100); + unmap_100ns = div64_ul(unmap_delta, 100); + map_sq = map_100ns * map_100ns; + unmap_sq = unmap_100ns * unmap_100ns; + + atomic64_add(map_100ns, &map->sum_map_100ns); + atomic64_add(unmap_100ns, &map->sum_unmap_100ns); + atomic64_add(map_sq, &map->sum_sq_map); + atomic64_add(unmap_sq, &map->sum_sq_unmap); + atomic64_inc(&map->loops); + } + +out: + free_page((unsigned long)buf); + return ret; +} + +static int do_map_benchmark(struct map_benchmark_data *map) +{ + struct task_struct **tsk; + int threads = map->bparam.threads; + int node = map->bparam.node; + const cpumask_t *cpu_mask = cpumask_of_node(node); + u64 loops; + int ret = 0; + int i; + + tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL); + if (!tsk) + return -ENOMEM; + + get_device(map->dev); + + for (i = 0; i < threads; i++) { + tsk[i] = kthread_create_on_node(map_benchmark_thread, map, + map->bparam.node, "dma-map-benchmark/%d", i); + if (IS_ERR(tsk[i])) { + pr_err("create dma_map thread failed\n"); + ret = PTR_ERR(tsk[i]); + goto out; + } + + if (node != NUMA_NO_NODE) + kthread_bind_mask(tsk[i], cpu_mask); + } + + /* clear the old value in the previous benchmark */ + atomic64_set(&map->sum_map_100ns, 0); + atomic64_set(&map->sum_unmap_100ns, 0); + atomic64_set(&map->sum_sq_map, 0); + atomic64_set(&map->sum_sq_unmap, 0); + atomic64_set(&map->loops, 0); + + for (i = 0; i < threads; i++) + wake_up_process(tsk[i]); + + msleep_interruptible(map->bparam.seconds * 1000); + + /* wait for the completion of benchmark threads */ + for (i = 0; i < threads; i++) { + ret = kthread_stop(tsk[i]); + if (ret) + goto out; + } + + loops = atomic64_read(&map->loops); + if (likely(loops > 0)) { + u64 map_variance, unmap_variance; + u64 sum_map = atomic64_read(&map->sum_map_100ns); + u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns); + u64 sum_sq_map = atomic64_read(&map->sum_sq_map); + u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap); + + /* average latency */ + map->bparam.avg_map_100ns = div64_u64(sum_map, loops); + map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops); + + /* standard deviation of latency */ + map_variance = div64_u64(sum_sq_map, loops) - + map->bparam.avg_map_100ns * + map->bparam.avg_map_100ns; + unmap_variance = div64_u64(sum_sq_unmap, loops) - + map->bparam.avg_unmap_100ns * + map->bparam.avg_unmap_100ns; + map->bparam.map_stddev = int_sqrt64(map_variance); + map->bparam.unmap_stddev = int_sqrt64(unmap_variance); + } + +out: + put_device(map->dev); + kfree(tsk); + return ret; +} + +static long map_benchmark_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct map_benchmark_data *map = file->private_data; + void __user *argp = (void __user *)arg; + u64 old_dma_mask; + + int ret; + + if (copy_from_user(&map->bparam, argp, sizeof(map->bparam))) + return -EFAULT; + + switch (cmd) { + case DMA_MAP_BENCHMARK: + if (map->bparam.threads == 0 || + map->bparam.threads > DMA_MAP_MAX_THREADS) { + pr_err("invalid thread number\n"); + return -EINVAL; + } + + if (map->bparam.seconds == 0 || + map->bparam.seconds > DMA_MAP_MAX_SECONDS) { + pr_err("invalid duration seconds\n"); + return -EINVAL; + } + + if (map->bparam.node != NUMA_NO_NODE && + !node_possible(map->bparam.node)) { + pr_err("invalid numa node\n"); + return -EINVAL; + } + + switch (map->bparam.dma_dir) { + case DMA_MAP_BIDIRECTIONAL: + map->dir = DMA_BIDIRECTIONAL; + break; + case DMA_MAP_FROM_DEVICE: + map->dir = DMA_FROM_DEVICE; + break; + case DMA_MAP_TO_DEVICE: + map->dir = DMA_TO_DEVICE; + break; + default: + pr_err("invalid DMA direction\n"); + return -EINVAL; + } + + old_dma_mask = dma_get_mask(map->dev); + + ret = dma_set_mask(map->dev, + DMA_BIT_MASK(map->bparam.dma_bits)); + if (ret) { + pr_err("failed to set dma_mask on device %s\n", + dev_name(map->dev)); + return -EINVAL; + } + + ret = do_map_benchmark(map); + + /* + * restore the original dma_mask as many devices' dma_mask are + * set by architectures, acpi, busses. When we bind them back + * to their original drivers, those drivers shouldn't see + * dma_mask changed by benchmark + */ + dma_set_mask(map->dev, old_dma_mask); + break; + default: + return -EINVAL; + } + + if (copy_to_user(argp, &map->bparam, sizeof(map->bparam))) + return -EFAULT; + + return ret; +} + +static const struct file_operations map_benchmark_fops = { + .open = simple_open, + .unlocked_ioctl = map_benchmark_ioctl, +}; + +static void map_benchmark_remove_debugfs(void *data) +{ + struct map_benchmark_data *map = (struct map_benchmark_data *)data; + + debugfs_remove(map->debugfs); +} + +static int __map_benchmark_probe(struct device *dev) +{ + struct dentry *entry; + struct map_benchmark_data *map; + int ret; + + map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map->dev = dev; + + ret = devm_add_action(dev, map_benchmark_remove_debugfs, map); + if (ret) { + pr_err("Can't add debugfs remove action\n"); + return ret; + } + + /* + * we only permit a device bound with this driver, 2nd probe + * will fail + */ + entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map, + &map_benchmark_fops); + if (IS_ERR(entry)) + return PTR_ERR(entry); + map->debugfs = entry; + + return 0; +} + +static int map_benchmark_platform_probe(struct platform_device *pdev) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct platform_driver map_benchmark_platform_driver = { + .driver = { + .name = "dma_map_benchmark", + }, + .probe = map_benchmark_platform_probe, +}; + +static int +map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct pci_driver map_benchmark_pci_driver = { + .name = "dma_map_benchmark", + .probe = map_benchmark_pci_probe, +}; + +static int __init map_benchmark_init(void) +{ + int ret; + + ret = pci_register_driver(&map_benchmark_pci_driver); + if (ret) + return ret; + + ret = platform_driver_register(&map_benchmark_platform_driver); + if (ret) { + pci_unregister_driver(&map_benchmark_pci_driver); + return ret; + } + + return 0; +} + +static void __exit map_benchmark_cleanup(void) +{ + platform_driver_unregister(&map_benchmark_platform_driver); + pci_unregister_driver(&map_benchmark_pci_driver); +} + +module_init(map_benchmark_init); +module_exit(map_benchmark_cleanup); + +MODULE_AUTHOR("Barry Song "); +MODULE_DESCRIPTION("dma_map benchmark driver"); +MODULE_LICENSE("GPL"); -- GitLab From 7679325702c90aecd393cd7cde685576c14489c0 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 16 Nov 2020 19:08:48 +1300 Subject: [PATCH 0279/1894] selftests/dma: add test application for DMA_MAP_BENCHMARK This patch provides the test application for DMA_MAP_BENCHMARK. Before running the test application, we need to bind a device to dma_map_ benchmark driver. For example, unbind "xxx" from its original driver and bind to dma_map_benchmark: echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override echo xxx > /sys/bus/platform/drivers/xxx/unbind echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind Another example for PCI devices: echo dma_map_benchmark > /sys/bus/pci/devices/0000:00:01.0/driver_override echo 0000:00:01.0 > /sys/bus/pci/drivers/xxx/unbind echo 0000:00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind The below command will run 16 threads on numa node 0 for 10 seconds on the device bound to dma_map_benchmark platform_driver or pci_driver: ./dma_map_benchmark -t 16 -s 10 -n 0 dma mapping benchmark: threads:16 seconds:10 average map latency(us):1.1 standard deviation:1.9 average unmap latency(us):0.5 standard deviation:0.8 Cc: Will Deacon Cc: Shuah Khan Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- MAINTAINERS | 6 + tools/testing/selftests/dma/Makefile | 6 + tools/testing/selftests/dma/config | 1 + .../testing/selftests/dma/dma_map_benchmark.c | 123 ++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 tools/testing/selftests/dma/Makefile create mode 100644 tools/testing/selftests/dma/config create mode 100644 tools/testing/selftests/dma/dma_map_benchmark.c diff --git a/MAINTAINERS b/MAINTAINERS index a008b70f3c161..c88abd1d323b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5237,6 +5237,12 @@ F: include/linux/dma-mapping.h F: include/linux/dma-map-ops.h F: kernel/dma/ +DMA MAPPING BENCHMARK +M: Barry Song +L: iommu@lists.linux-foundation.org +F: kernel/dma/map_benchmark.c +F: tools/testing/selftests/dma/ + DMA-BUF HEAPS FRAMEWORK M: Sumit Semwal R: Benjamin Gaignard diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile new file mode 100644 index 0000000000000..aa8e8b5b3864e --- /dev/null +++ b/tools/testing/selftests/dma/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -I../../../../usr/include/ + +TEST_GEN_PROGS := dma_map_benchmark + +include ../lib.mk diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config new file mode 100644 index 0000000000000..6102ee3c43cde --- /dev/null +++ b/tools/testing/selftests/dma/config @@ -0,0 +1 @@ +CONFIG_DMA_MAP_BENCHMARK=y diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c new file mode 100644 index 0000000000000..7065163a83880 --- /dev/null +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Hisilicon Limited. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +static char *directions[] = { + "BIDIRECTIONAL", + "TO_DEVICE", + "FROM_DEVICE", +}; + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u64 expansion[10]; /* For future use */ +}; + +int main(int argc, char **argv) +{ + struct map_benchmark map; + int fd, opt; + /* default single thread, run 20 seconds on NUMA_NO_NODE */ + int threads = 1, seconds = 20, node = -1; + /* default dma mask 32bit, bidirectional DMA */ + int bits = 32, dir = DMA_MAP_BIDIRECTIONAL; + + int cmd = DMA_MAP_BENCHMARK; + char *p; + + while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) { + switch (opt) { + case 't': + threads = atoi(optarg); + break; + case 's': + seconds = atoi(optarg); + break; + case 'n': + node = atoi(optarg); + break; + case 'b': + bits = atoi(optarg); + break; + case 'd': + dir = atoi(optarg); + break; + default: + return -1; + } + } + + if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) { + fprintf(stderr, "invalid number of threads, must be in 1-%d\n", + DMA_MAP_MAX_THREADS); + exit(1); + } + + if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) { + fprintf(stderr, "invalid number of seconds, must be in 1-%d\n", + DMA_MAP_MAX_SECONDS); + exit(1); + } + + /* suppose the mininum DMA zone is 1MB in the world */ + if (bits < 20 || bits > 64) { + fprintf(stderr, "invalid dma mask bit, must be in 20-64\n"); + exit(1); + } + + if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE && + dir != DMA_MAP_FROM_DEVICE) { + fprintf(stderr, "invalid dma direction\n"); + exit(1); + } + + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); + if (fd == -1) { + perror("open"); + exit(1); + } + + map.seconds = seconds; + map.threads = threads; + map.node = node; + map.dma_bits = bits; + map.dma_dir = dir; + if (ioctl(fd, cmd, &map)) { + perror("ioctl"); + exit(1); + } + + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", + threads, seconds, node, dir[directions]); + printf("average map latency(us):%.1f standard deviation:%.1f\n", + map.avg_map_100ns/10.0, map.map_stddev/10.0); + printf("average unmap latency(us):%.1f standard deviation:%.1f\n", + map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0); + + return 0; +} -- GitLab From 68b824e428c5fb5c3dc5ef80b1543e767534b58e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 24 Oct 2020 16:33:38 +0100 Subject: [PATCH 0280/1894] KVM: arm64: Patch kimage_voffset instead of loading the EL1 value Directly using the kimage_voffset variable is fine for now, but will become more problematic as we start distrusting EL1. Instead, patch the kimage_voffset into the HYP text, ensuring we don't have to load an untrusted value later on. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_mmu.h | 22 ++++++++++++++++++++++ arch/arm64/kernel/image-vars.h | 4 +--- arch/arm64/kvm/hyp/nvhe/host.S | 5 +---- arch/arm64/kvm/va_layout.c | 6 ++++++ 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 608c3a83e740d..5633c5a27aad3 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -72,6 +72,28 @@ alternative_cb kvm_update_va_mask alternative_cb_end .endm +/* + * Convert a kernel image address to a PA + * reg: kernel address to be converted in place + * tmp: temporary register + * + * The actual code generation takes place in kvm_get_kimage_voffset, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_get_kimage_voffset uses the + * specific registers encoded in the instructions). + */ +.macro kimg_pa reg, tmp +alternative_cb kvm_get_kimage_voffset + movz \tmp, #0 + movk \tmp, #0, lsl #16 + movk \tmp, #0, lsl #32 + movk \tmp, #0, lsl #48 +alternative_cb_end + + /* reg = __pa(reg) */ + sub \reg, \reg, \tmp +.endm + #else #include diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index e8c194f8de887..4b32588918d90 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -65,13 +65,11 @@ __efistub__ctype = _ctype; KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset); +KVM_NVHE_ALIAS(kvm_get_kimage_voffset); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); -/* Kernel constant needed to compute idmap addresses. */ -KVM_NVHE_ALIAS(kimage_voffset); - /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ KVM_NVHE_ALIAS(__hyp_panic_string); KVM_NVHE_ALIAS(panic); diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ed27f06a31ba2..4e207c1c51267 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -115,10 +115,7 @@ SYM_FUNC_END(__hyp_do_panic) * Preserve x0-x4, which may contain stub parameters. */ ldr x5, =__kvm_handle_stub_hvc - ldr_l x6, kimage_voffset - - /* x5 = __pa(x5) */ - sub x5, x5, x6 + kimg_pa x5, x6 br x5 .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 1d00d2cb93fd5..d61117805de06 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -251,3 +251,9 @@ void kvm_update_kimg_phys_offset(struct alt_instr *alt, { generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst); } + +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset, origptr, updptr, nr_inst); +} -- GitLab From 29052f1b92f2bcb0419c544459f4c919bfb20898 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Oct 2020 21:52:54 +0100 Subject: [PATCH 0281/1894] KVM: arm64: Simplify __kvm_enable_ssbs() Move the setting of SSBS directly into the HVC handler, using the C helpers rather than the inline asssembly code. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 2 -- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +++++- arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 11 ----------- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 54387ccd1ab26..a542c422a036f 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -189,8 +189,6 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); -extern void __kvm_enable_ssbs(void); - extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 174817ba119cb..153faa15d4909 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -461,6 +461,7 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index c0543b2e760ea..82df7fc247606 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -58,7 +58,11 @@ static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) { - __kvm_enable_ssbs(); + u64 tmp; + + tmp = read_sysreg_el2(SYS_SCTLR); + tmp |= SCTLR_ELx_DSSBS; + write_sysreg_el2(tmp, SYS_SCTLR); } static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 88a25fc8fcd36..29305022bc048 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -33,14 +33,3 @@ void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); } - -void __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} -- GitLab From 83fa381f66ccb025f9e182d0b2ef9cd53c1f33ab Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 24 Oct 2020 10:56:55 +0100 Subject: [PATCH 0282/1894] KVM: arm64: Avoid repetitive stack access on host EL1 to EL2 exception Registers x0/x1 get repeateadly pushed and poped during a host HVC call. Instead, leave the registers on the stack, trading a store instruction on the fast path for an add on the slow path. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/host.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 4e207c1c51267..fe2740b224cf0 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -13,8 +13,6 @@ .text SYM_FUNC_START(__host_exit) - stp x0, x1, [sp, #-16]! - get_host_ctxt x0, x1 /* Store the host regs x2 and x3 */ @@ -99,13 +97,15 @@ SYM_FUNC_END(__hyp_do_panic) mrs x0, esr_el2 lsr x0, x0, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 - ldp x0, x1, [sp], #16 b.ne __host_exit + ldp x0, x1, [sp] // Don't fixup the stack yet + /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit + add sp, sp, #16 /* * Compute the idmap address of __kvm_handle_stub_hvc and * jump there. Since we use kimage_voffset, do not use the -- GitLab From b3e453272d436aab8adbe810c6d7043670281487 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:06 +0100 Subject: [PATCH 0283/1894] tools lib: Adopt memchr_inv() from kernel We'll use it to check for undefined/zero data. Signed-off-by: Jiri Olsa Suggested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/string.h | 1 + tools/lib/string.c | 58 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 5e9e781905edc..db5c99318c799 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -46,4 +46,5 @@ extern char * __must_check skip_spaces(const char *); extern char *strim(char *); +extern void *memchr_inv(const void *start, int c, size_t bytes); #endif /* _TOOLS_LINUX_STRING_H_ */ diff --git a/tools/lib/string.c b/tools/lib/string.c index f645343815de6..8b6892f959abd 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -168,3 +168,61 @@ char *strreplace(char *s, char old, char new) *s = new; return s; } + +static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes) +{ + while (bytes) { + if (*start != value) + return (void *)start; + start++; + bytes--; + } + return NULL; +} + +/** + * memchr_inv - Find an unmatching character in an area of memory. + * @start: The memory area + * @c: Find a character other than c + * @bytes: The size of the area. + * + * returns the address of the first character other than @c, or %NULL + * if the whole buffer contains just @c. + */ +void *memchr_inv(const void *start, int c, size_t bytes) +{ + u8 value = c; + u64 value64; + unsigned int words, prefix; + + if (bytes <= 16) + return check_bytes8(start, value, bytes); + + value64 = value; + value64 |= value64 << 8; + value64 |= value64 << 16; + value64 |= value64 << 32; + + prefix = (unsigned long)start % 8; + if (prefix) { + u8 *r; + + prefix = 8 - prefix; + r = check_bytes8(start, value, prefix); + if (r) + return r; + start += prefix; + bytes -= prefix; + } + + words = bytes / 8; + + while (words) { + if (*(u64 *)start != value64) + return check_bytes8(start, value, 8); + start += 8; + words--; + } + + return check_bytes8(start, value, bytes % 8); +} -- GitLab From f45edd86b23a7dc576b881b3da53936ac9f8dffb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:08 +0100 Subject: [PATCH 0284/1894] perf tools: Add build_id__is_defined function Adding build_id__is_defined helper to check build id is defined and is != zero build id. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 6 ++++++ tools/perf/util/build-id.h | 1 + 2 files changed, 7 insertions(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6b410c3d52dca..2aacc8b29f7e1 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -37,6 +37,7 @@ #include #include +#include #include static bool no_buildid_cache; @@ -912,3 +913,8 @@ void build_id__init(struct build_id *bid, const u8 *data, size_t size) memcpy(bid->data, data, size); bid->size = size; } + +bool build_id__is_defined(const struct build_id *bid) +{ + return bid && bid->size ? !!memchr_inv(bid->data, 0, bid->size) : false; +} diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index f293f99d5dba2..d53415feaf69a 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -21,6 +21,7 @@ struct feat_fd; void build_id__init(struct build_id *bid, const u8 *data, size_t size); int build_id__sprintf(const struct build_id *build_id, char *bf); +bool build_id__is_defined(const struct build_id *bid); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, -- GitLab From 7ac22b088afe26a42978ff7576730ca419da76aa Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:09 +0100 Subject: [PATCH 0285/1894] perf tools: Add filename__decompress function Factor filename__decompress from decompress_kmodule function. It can decompress files with compressions supported in perf - xz and gz, the support needs to be compiled in. It will to be used in following changes to get build id out of compressed elf objects. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 31 +++++++++++++++++++------------ tools/perf/util/dso.h | 2 ++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 89b5fd2b5de3b..d786cf6b0cfa6 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -279,18 +279,12 @@ bool dso__needs_decompress(struct dso *dso) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; } -static int decompress_kmodule(struct dso *dso, const char *name, - char *pathname, size_t len) +int filename__decompress(const char *name, char *pathname, + size_t len, int comp, int *err) { char tmpbuf[] = KMOD_DECOMP_NAME; int fd = -1; - if (!dso__needs_decompress(dso)) - return -1; - - if (dso->comp == COMP_ID__NONE) - return -1; - /* * We have proper compression id for DSO and yet the file * behind the 'name' can still be plain uncompressed object. @@ -304,17 +298,17 @@ static int decompress_kmodule(struct dso *dso, const char *name, * To keep this transparent, we detect this and return the file * descriptor to the uncompressed file. */ - if (!compressions[dso->comp].is_compressed(name)) + if (!compressions[comp].is_compressed(name)) return open(name, O_RDONLY); fd = mkstemp(tmpbuf); if (fd < 0) { - dso->load_errno = errno; + *err = errno; return -1; } - if (compressions[dso->comp].decompress(name, fd)) { - dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; + if (compressions[comp].decompress(name, fd)) { + *err = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; close(fd); fd = -1; } @@ -328,6 +322,19 @@ static int decompress_kmodule(struct dso *dso, const char *name, return fd; } +static int decompress_kmodule(struct dso *dso, const char *name, + char *pathname, size_t len) +{ + if (!dso__needs_decompress(dso)) + return -1; + + if (dso->comp == COMP_ID__NONE) + return -1; + + return filename__decompress(name, pathname, len, dso->comp, + &dso->load_errno); +} + int dso__decompress_kmodule_fd(struct dso *dso, const char *name) { return decompress_kmodule(dso, name, NULL, 0); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index d8cb4f5680a4a..cd2fe64a3c5de 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -274,6 +274,8 @@ bool dso__needs_decompress(struct dso *dso); int dso__decompress_kmodule_fd(struct dso *dso, const char *name); int dso__decompress_kmodule_path(struct dso *dso, const char *name, char *pathname, size_t len); +int filename__decompress(const char *name, char *pathname, + size_t len, int comp, int *err); #define KMOD_DECOMP_NAME "/tmp/perf-kmod-XXXXXX" #define KMOD_DECOMP_LEN sizeof(KMOD_DECOMP_NAME) -- GitLab From af21c579c860d10da1b0620e3d5d14abdc0b5fff Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:11 +0100 Subject: [PATCH 0286/1894] perf build-id: Add check for existing link in buildid dir When adding new build id link we fail if the link is already there. Adding check for existing link and output debug message that the build id is already linked. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 2aacc8b29f7e1..4a391f13f40d0 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -755,8 +755,25 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, tmp = dir_name + strlen(buildid_dir) - 5; memcpy(tmp, "../..", 5); - if (symlink(tmp, linkname) == 0) + if (symlink(tmp, linkname) == 0) { err = 0; + } else if (errno == EEXIST) { + char path[PATH_MAX]; + ssize_t len; + + len = readlink(linkname, path, sizeof(path) - 1); + if (len <= 0) { + pr_err("Cant read link: %s\n", linkname); + goto out_free; + } + path[len] = '\0'; + + if (strcmp(tmp, path)) { + pr_debug("build <%s> already linked to %s\n", + sbuild_id, linkname); + } + err = 0; + } /* Update SDT cache : error is just warned */ if (realname && -- GitLab From 031f112f8dc0f211b59b1b33032671f035edc25d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:12 +0100 Subject: [PATCH 0287/1894] perf tools: Use struct extra_kernel_map in machine__process_kernel_mmap_event Using struct extra_kernel_map in machine__process_kernel_mmap_event, to pass mmap details. This way we can used single function for all 3 mmap versions. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 62 +++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 15385ea00190f..1ae32a81639c6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1581,32 +1581,25 @@ static bool machine__uses_kcore(struct machine *machine) } static bool perf_event__is_extra_kernel_mmap(struct machine *machine, - union perf_event *event) + struct extra_kernel_map *xm) { return machine__is(machine, "x86_64") && - is_entry_trampoline(event->mmap.filename); + is_entry_trampoline(xm->name); } static int machine__process_extra_kernel_map(struct machine *machine, - union perf_event *event) + struct extra_kernel_map *xm) { struct dso *kernel = machine__kernel_dso(machine); - struct extra_kernel_map xm = { - .start = event->mmap.start, - .end = event->mmap.start + event->mmap.len, - .pgoff = event->mmap.pgoff, - }; if (kernel == NULL) return -1; - strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); - - return machine__create_extra_kernel_map(machine, kernel, &xm); + return machine__create_extra_kernel_map(machine, kernel, xm); } static int machine__process_kernel_mmap_event(struct machine *machine, - union perf_event *event) + struct extra_kernel_map *xm) { struct map *map; enum dso_space_type dso_space; @@ -1621,20 +1614,18 @@ static int machine__process_kernel_mmap_event(struct machine *machine, else dso_space = DSO_SPACE__KERNEL_GUEST; - is_kernel_mmap = memcmp(event->mmap.filename, - machine->mmap_name, + is_kernel_mmap = memcmp(xm->name, machine->mmap_name, strlen(machine->mmap_name) - 1) == 0; - if (event->mmap.filename[0] == '/' || - (!is_kernel_mmap && event->mmap.filename[0] == '[')) { - map = machine__addnew_module_map(machine, event->mmap.start, - event->mmap.filename); + if (xm->name[0] == '/' || + (!is_kernel_mmap && xm->name[0] == '[')) { + map = machine__addnew_module_map(machine, xm->start, + xm->name); if (map == NULL) goto out_problem; - map->end = map->start + event->mmap.len; + map->end = map->start + xm->end - xm->start; } else if (is_kernel_mmap) { - const char *symbol_name = (event->mmap.filename + - strlen(machine->mmap_name)); + const char *symbol_name = (xm->name + strlen(machine->mmap_name)); /* * Should be there already, from the build-id table in * the header. @@ -1688,18 +1679,17 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__update_kernel_mmap(machine, event->mmap.start, - event->mmap.start + event->mmap.len); + machine__update_kernel_mmap(machine, xm->start, xm->end); /* * Avoid using a zero address (kptr_restrict) for the ref reloc * symbol. Effectively having zero here means that at record * time /proc/sys/kernel/kptr_restrict was non zero. */ - if (event->mmap.pgoff != 0) { + if (xm->pgoff != 0) { map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, symbol_name, - event->mmap.pgoff); + xm->pgoff); } if (machine__is_default_guest(machine)) { @@ -1708,8 +1698,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, */ dso__load(kernel, machine__kernel_map(machine)); } - } else if (perf_event__is_extra_kernel_mmap(machine, event)) { - return machine__process_extra_kernel_map(machine, event); + } else if (perf_event__is_extra_kernel_mmap(machine, xm)) { + return machine__process_extra_kernel_map(machine, xm); } return 0; out_problem: @@ -1735,7 +1725,14 @@ int machine__process_mmap2_event(struct machine *machine, if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || sample->cpumode == PERF_RECORD_MISC_KERNEL) { - ret = machine__process_kernel_mmap_event(machine, event); + struct extra_kernel_map xm = { + .start = event->mmap2.start, + .end = event->mmap2.start + event->mmap2.len, + .pgoff = event->mmap2.pgoff, + }; + + strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN); + ret = machine__process_kernel_mmap_event(machine, &xm); if (ret < 0) goto out_problem; return 0; @@ -1785,7 +1782,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || sample->cpumode == PERF_RECORD_MISC_KERNEL) { - ret = machine__process_kernel_mmap_event(machine, event); + struct extra_kernel_map xm = { + .start = event->mmap.start, + .end = event->mmap.start + event->mmap.len, + .pgoff = event->mmap.pgoff, + }; + + strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); + ret = machine__process_kernel_mmap_event(machine, &xm); if (ret < 0) goto out_problem; return 0; -- GitLab From ca8ea73ae109900cec4c3a1f0d3486a01a0e4434 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:13 +0100 Subject: [PATCH 0288/1894] perf symbols: Try to load vmlinux from buildid database Currently we don't check on kernel's vmlinux the same way as we do for normal binaries, but we either look for kallsyms file in build id database or check on known vmlinux locations (plus some other optional paths). This patch adds the check for standard build id binary location, so we are ready once we start to store it there from debuginfod in following changes. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 13 ++++++++++--- tools/perf/util/build-id.h | 2 ++ tools/perf/util/symbol.c | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 4a391f13f40d0..1fd58703c2d49 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -261,10 +261,9 @@ static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso, "debug" : "elf")); } -char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, - bool is_debug) +char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug, bool is_kallsyms) { - bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); bool is_vdso = dso__is_vdso((struct dso *)dso); char sbuild_id[SBUILD_ID_SIZE]; char *linkname; @@ -293,6 +292,14 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, return bf; } +char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug) +{ + bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); + + return __dso__build_id_filename(dso, bf, size, is_debug, is_kallsyms); +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index d53415feaf69a..f1a2f67df6e48 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -29,6 +29,8 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, bool is_debug); +char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug, bool is_kallsyms); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0d14abdf3d722..64a039cbba1b5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2189,6 +2189,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) int err; const char *kallsyms_filename = NULL; char *kallsyms_allocated_filename = NULL; + char *filename = NULL; + /* * Step 1: if the user specified a kallsyms or vmlinux filename, use * it and only it, reporting errors to the user if it cannot be used. @@ -2213,6 +2215,20 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false); } + /* + * Before checking on common vmlinux locations, check if it's + * stored as standard build id binary (not kallsyms) under + * .debug cache. + */ + if (!symbol_conf.ignore_vmlinux_buildid) + filename = __dso__build_id_filename(dso, NULL, 0, false, false); + if (filename != NULL) { + err = dso__load_vmlinux(dso, map, filename, true); + if (err > 0) + return err; + free(filename); + } + if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) { err = dso__load_vmlinux_path(dso, map); if (err > 0) -- GitLab From 058f15113042bc2fa03b0b134bfc7fb8cd156878 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:21 +0100 Subject: [PATCH 0289/1894] perf data: Add is_perf_data function Adding is_perf_data function that returns true if the given path is perf data file. It will be used in following patches. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 19 +++++++++++++++++++ tools/perf/util/data.h | 1 + 2 files changed, 20 insertions(+) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 05bbcb663c41c..f29af4fc3d093 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -492,3 +492,22 @@ char *perf_data__kallsyms_name(struct perf_data *data) return kallsyms_name; } + +bool is_perf_data(const char *path) +{ + bool ret = false; + FILE *file; + u64 magic; + + file = fopen(path, "r"); + if (!file) + return false; + + if (fread(&magic, 1, 8, file) < 8) + goto out; + + ret = is_perf_magic(magic); +out: + fclose(file); + return ret; +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index c563fcbb0288c..62a3e66fbee8a 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -98,4 +98,5 @@ int perf_data__update_dir(struct perf_data *data); unsigned long perf_data__size(struct perf_data *data); int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz); char *perf_data__kallsyms_name(struct perf_data *data); +bool is_perf_data(const char *path); #endif /* __PERF_DATA_H */ -- GitLab From 0b7b9e83c76ccffb994da8266110592e5e767718 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:19 +0100 Subject: [PATCH 0290/1894] perf build-id: Use machine__for_each_dso in perf_session__cache_build_ids Using machine__for_each_dso in perf_session__cache_build_ids, so we can reuse perf_session__cache_build_ids with different callback in following changes. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 41 +++++++++++++++----------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1fd58703c2d49..948a7f48d6681 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -859,12 +859,16 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, struct machine *machine) +static int dso__cache_build_id(struct dso *dso, struct machine *machine, + void *priv __maybe_unused) { bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; + if (!dso->has_build_id) + return 0; + if (dso__is_kcore(dso)) { is_kallsyms = true; name = machine->mmap_name; @@ -873,43 +877,30 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) is_kallsyms, is_vdso); } -static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine) +static int +machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv) { - struct dso *pos; - int err = 0; - - dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine)) - err = -1; + int ret = machine__for_each_dso(&machines->host, fn, priv); + struct rb_node *nd; - return err; -} + for (nd = rb_first_cached(&machines->guests); nd; + nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); -static int machine__cache_build_ids(struct machine *machine) -{ - return __dsos__cache_build_ids(&machine->dsos.head, machine); + ret |= machine__for_each_dso(pos, fn, priv); + } + return ret ? -1 : 0; } int perf_session__cache_build_ids(struct perf_session *session) { - struct rb_node *nd; - int ret; - if (no_buildid_cache) return 0; if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host); - - for (nd = rb_first_cached(&session->machines.guests); nd; - nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos); - } - return ret ? -1 : 0; + return machines__for_each_dso(&session->machines, dso__cache_build_id, NULL) ? -1 : 0; } static bool machine__read_build_ids(struct machine *machine, bool with_hits) -- GitLab From 75fb2af68e358324c2bdcb61be8376cffcb2d034 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:20 +0100 Subject: [PATCH 0291/1894] perf build-id: Add __perf_session__cache_build_ids function Adding __perf_session__cache_build_ids function as an interface for caching sessions build ids with callback function and its data pointer. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 10 ++++++++-- tools/perf/util/build-id.h | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 948a7f48d6681..6bf3cc79c5d5d 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -892,7 +892,8 @@ machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv) return ret ? -1 : 0; } -int perf_session__cache_build_ids(struct perf_session *session) +int __perf_session__cache_build_ids(struct perf_session *session, + machine__dso_t fn, void *priv) { if (no_buildid_cache) return 0; @@ -900,7 +901,12 @@ int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - return machines__for_each_dso(&session->machines, dso__cache_build_id, NULL) ? -1 : 0; + return machines__for_each_dso(&session->machines, fn, priv) ? -1 : 0; +} + +int perf_session__cache_build_ids(struct perf_session *session) +{ + return __perf_session__cache_build_ids(session, dso__cache_build_id, NULL); } static bool machine__read_build_ids(struct machine *machine, bool with_hits) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index f1a2f67df6e48..c6f10e3d21523 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -5,6 +5,7 @@ #define BUILD_ID_SIZE 20 #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) +#include "machine.h" #include "tool.h" #include @@ -46,6 +47,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, struct feat_fd *fd); int perf_session__cache_build_ids(struct perf_session *session); +int __perf_session__cache_build_ids(struct perf_session *session, + machine__dso_t fn, void *priv); char *build_id_cache__origname(const char *sbuild_id); char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size); -- GitLab From fd4ebb457c9ca90d10a74aeb85d54e27b08d5e76 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:22 +0100 Subject: [PATCH 0292/1894] perf build-id: Add build_id_cache__add function Adding build_id_cache__add function as core function that adds file into build id database. It will be set from another callers in following changes. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 42 ++++++++++++++++++++++++-------------- tools/perf/util/build-id.h | 2 ++ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6bf3cc79c5d5d..02df36b30ac54 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -671,24 +671,15 @@ out: return realname; } -int build_id_cache__add_s(const char *sbuild_id, const char *name, - struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) +int +build_id_cache__add(const char *sbuild_id, const char *name, const char *realname, + struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; - char *realname = NULL, *filename = NULL, *dir_name = NULL, - *linkname = zalloc(size), *tmp; + char *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *tmp; char *debugfile = NULL; int err = -1; - if (!is_kallsyms) { - if (!is_vdso) - realname = nsinfo__realpath(name, nsi); - else - realname = realpath(name, NULL); - if (!realname) - goto out_free; - } - dir_name = build_id_cache__cachedir(sbuild_id, name, nsi, is_kallsyms, is_vdso); if (!dir_name) @@ -788,8 +779,6 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, pr_debug4("Failed to update/scan SDT cache for %s\n", realname); out_free: - if (!is_kallsyms) - free(realname); free(filename); free(debugfile); free(dir_name); @@ -797,6 +786,29 @@ out_free: return err; } +int build_id_cache__add_s(const char *sbuild_id, const char *name, + struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) +{ + char *realname = NULL; + int err = -1; + + if (!is_kallsyms) { + if (!is_vdso) + realname = nsinfo__realpath(name, nsi); + else + realname = realpath(name, NULL); + if (!realname) + goto out_free; + } + + err = build_id_cache__add(sbuild_id, name, realname, nsi, is_kallsyms, is_vdso); + +out_free: + if (!is_kallsyms) + free(realname); + return err; +} + static int build_id_cache__add_b(const struct build_id *bid, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index c6f10e3d21523..02613f4b2c291 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -63,6 +63,8 @@ char *build_id_cache__complement(const char *incomplete_sbuild_id); int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi, struct strlist **result); bool build_id_cache__cached(const char *sbuild_id); +int build_id_cache__add(const char *sbuild_id, const char *name, const char *realname, + struct nsinfo *nsi, bool is_kallsyms, bool is_vdso); int build_id_cache__add_s(const char *sbuild_id, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso); -- GitLab From 14bda7a927336055d7c0deb1483f9cdb687c2080 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 16:39:44 +0000 Subject: [PATCH 0293/1894] KVM: arm64: Add kvm_vcpu_has_pmu() helper There are a number of places where we check for the KVM_ARM_VCPU_PMU_V3 feature. Wrap this check into a new kvm_vcpu_has_pmu(), and use it at the existing locations. No functional change. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/pmu-emul.c | 8 +++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 709f892f7a142..8c681d621a820 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -731,4 +731,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_vcpu_has_pmu(vcpu) \ + (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2ed5ef8f274b1..e7e3b46298640 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -913,8 +913,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (!kvm_arm_support_pmu_v3() || - !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_arm_support_pmu_v3() || !kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) @@ -1015,7 +1014,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (!kvm_arm_pmu_irq_initialized(vcpu)) @@ -1035,8 +1034,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: - if (kvm_arm_support_pmu_v3() && - test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (kvm_arm_support_pmu_v3() && kvm_vcpu_has_pmu(vcpu)) return 0; } -- GitLab From 9bbfa4b565379eeb2fb8fdbcc9979549ae0e48d9 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 26 Nov 2020 14:49:16 +0000 Subject: [PATCH 0294/1894] KVM: arm64: Refuse to run VCPU if PMU is not initialized When enabling the PMU in kvm_arm_pmu_v3_enable(), KVM returns early if the PMU flag created is false and skips any other checks. Because PMU emulation is gated only on the VCPU feature being set, this makes it possible for userspace to get away with setting the VCPU feature but not doing any initialization for the PMU. Fix it by returning an error when trying to run the VCPU if the PMU hasn't been initialized correctly. The PMU is marked as created only if the interrupt ID has been set when using an in-kernel irqchip. This means the same check in kvm_arm_pmu_v3_enable() is redundant, remove it. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201126144916.164075-1-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index e7e3b46298640..c640d16d1bbdb 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -825,9 +825,12 @@ bool kvm_arm_support_pmu_v3(void) int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.pmu.created) + if (!kvm_vcpu_has_pmu(vcpu)) return 0; + if (!vcpu->arch.pmu.created) + return -EINVAL; + /* * A valid interrupt configuration for the PMU is either to have a * properly configured interrupt number and using an in-kernel @@ -835,9 +838,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) */ if (irqchip_in_kernel(vcpu->kvm)) { int irq = vcpu->arch.pmu.irq_num; - if (!kvm_arm_pmu_irq_initialized(vcpu)) - return -EINVAL; - /* * If we are using an in-kernel vgic, at this point we know * the vgic will be initialized, so we can check the PMU irq -- GitLab From 04355e41a60338206d6498fe463a86131d5ca06b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:00:30 +0000 Subject: [PATCH 0295/1894] KVM: arm64: Set ID_AA64DFR0_EL1.PMUVer to 0 when no PMU support We always expose the HW view of PMU in ID_AA64FDR0_EL1.PMUver, even when the PMU feature is disabled, while the architecture says that FEAT_PMUv3 not being implemented should result in this field being zero. Let's follow the architecture's guidance. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d2e1d745f0676..6629cfde28384 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1070,10 +1070,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); } else if (id == SYS_ID_AA64DFR0_EL1) { + u64 cap = 0; + /* Limit guests to PMUv3 for ARMv8.1 */ + if (kvm_vcpu_has_pmu(vcpu)) + cap = ID_AA64DFR0_PMUVER_8_1; + val = cpuid_feature_cap_perfmon_field(val, ID_AA64DFR0_PMUVER_SHIFT, - ID_AA64DFR0_PMUVER_8_1); + cap); } else if (id == SYS_ID_DFR0_EL1) { /* Limit guests to PMUv3 for ARMv8.1 */ val = cpuid_feature_cap_perfmon_field(val, -- GitLab From 77da43039ab5cfc9631159fd87fe38d4c34cdaf5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:13:27 +0000 Subject: [PATCH 0296/1894] KVM: arm64: Refuse illegal KVM_ARM_VCPU_PMU_V3 at reset time We accept to configure a PMU when a vcpu is created, even if the HW (or the host) doesn't support it. This results in failures when attributes get set, which is a bit odd as we should have failed the vcpu creation the first place. Move the check to the point where we check the vcpu feature set, and fail early if we cannot support a PMU. This further simplifies the attribute handling. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 4 ++-- arch/arm64/kvm/reset.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index c640d16d1bbdb..812495e915e47 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -913,7 +913,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (!kvm_arm_support_pmu_v3() || !kvm_vcpu_has_pmu(vcpu)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) @@ -1034,7 +1034,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: - if (kvm_arm_support_pmu_v3() && kvm_vcpu_has_pmu(vcpu)) + if (kvm_vcpu_has_pmu(vcpu)) return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 74ce92a4988c1..3e772ea4e066d 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -285,6 +285,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) pstate = VCPU_RESET_PSTATE_EL1; } + if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) { + ret = -EINVAL; + goto out; + } break; } -- GitLab From b0737e999ec0af007b10ac0b7db97932394a248f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:49:28 +0000 Subject: [PATCH 0297/1894] KVM: arm64: Inject UNDEF on PMU access when no PMU configured The ARMv8 architecture says that in the absence of FEAT_PMUv3, all the PMU-related register generate an UNDEF. Let's make sure that all our PMU handers catch this case by hooking into check_pmu_access_disabled(), and add checks in a couple of other places. Note that we still cannot deliver an exception into the guest as the offending cases are already caught by the RAZ/WI handling. But this puts us one step away to architectural compliance. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6629cfde28384..b098d667bb427 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -609,8 +609,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); - bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); + bool enabled = kvm_vcpu_has_pmu(vcpu); + enabled &= (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) kvm_inject_undefined(vcpu); @@ -857,10 +858,8 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); - if (!vcpu_mode_priv(vcpu)) { - kvm_inject_undefined(vcpu); + if (check_pmu_access_disabled(vcpu, 0)) return false; - } if (p->is_write) { u64 val = p->regval & mask; @@ -928,6 +927,11 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); + if (!kvm_vcpu_has_pmu(vcpu)) { + kvm_inject_undefined(vcpu); + return false; + } + if (p->is_write) { if (!vcpu_mode_priv(vcpu)) { kvm_inject_undefined(vcpu); -- GitLab From f975ccb08d6530e58bac660c7a938f98bae5a651 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 14:12:53 +0000 Subject: [PATCH 0298/1894] KVM: arm64: Remove PMU RAZ/WI handling There is no RAZ/WI handling allowed for the PMU registers in the ARMv8 architecture. Nobody can remember how we cam to the conclusion that we could do this, but the ARMv8 ARM is pretty clear that we cannot. Remove the RAZ/WI handling of the PMU system registers when it is not configured. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b098d667bb427..3bd4cc40536b6 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -643,9 +643,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -672,9 +669,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_event_counter_el0_disabled(vcpu)) return false; @@ -693,9 +687,6 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 pmceid; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - BUG_ON(p->is_write); if (pmu_access_el0_disabled(vcpu)) @@ -728,9 +719,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, { u64 idx; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (r->CRn == 9 && r->CRm == 13) { if (r->Op2 == 2) { /* PMXEVCNTR_EL0 */ @@ -784,9 +772,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 idx, reg; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -824,9 +809,6 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val, mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -855,9 +837,6 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (check_pmu_access_disabled(vcpu, 0)) return false; @@ -882,9 +861,6 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -907,9 +883,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (!p->is_write) return read_from_write_only(vcpu, p, r); @@ -924,9 +897,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (!kvm_vcpu_has_pmu(vcpu)) { kvm_inject_undefined(vcpu); return false; -- GitLab From a3da93580202ac9075d4e96f73c8435b9d7262c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:50:06 +0000 Subject: [PATCH 0299/1894] KVM: arm64: Remove dead PMU sysreg decoding code The handling of traps in access_pmu_evcntr() has a couple of omminous "else return false;" statements that don't make any sense: the decoding tree coverse all the registers that trap to this handler, and returning false implies that we change PC, which we don't. Get rid of what is evidently dead code. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3bd4cc40536b6..dd7a73468286d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -717,7 +717,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 idx; + u64 idx = ~0UL; if (r->CRn == 9 && r->CRm == 13) { if (r->Op2 == 2) { @@ -733,8 +733,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ARMV8_PMU_CYCLE_IDX; - } else { - return false; } } else if (r->CRn == 0 && r->CRm == 9) { /* PMCCNTR */ @@ -748,10 +746,11 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); - } else { - return false; } + /* Catch any decoding mistake */ + WARN_ON(idx == ~0UL); + if (!pmu_counter_idx_valid(vcpu, idx)) return false; -- GitLab From 46acf89de499b2db07e120c62a796e8a0efbad8d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 16:41:40 +0000 Subject: [PATCH 0300/1894] KVM: arm64: Gate kvm_pmu_update_state() on the PMU feature We currently gate the update of the PMU state on the PMU being "ready". The "ready" state is only set to true when the first vcpu run is successful, and if it isn't, we never reach the update code. So the "ready" state is never the right thing to check for, and it should instead be the presence of the PMU feature, which makes a bit more sense. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 812495e915e47..5ad900c609ee4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -384,7 +384,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = &vcpu->arch.pmu; bool overflow; - if (!kvm_arm_pmu_v3_ready(vcpu)) + if (!kvm_vcpu_has_pmu(vcpu)) return; overflow = !!kvm_pmu_overflow_status(vcpu); -- GitLab From 7521c3a9e63041602d531e36c07a340f188dc1fa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 16:42:08 +0000 Subject: [PATCH 0301/1894] KVM: arm64: Get rid of the PMU ready state The PMU ready state has no user left. Goodbye. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 1 - include/kvm/arm_pmu.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 5ad900c609ee4..398f6df1bbe40 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -851,7 +851,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) } kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; return 0; } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1d94acd0bc85a..fc85f50fa0e96 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -24,13 +24,11 @@ struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); - bool ready; bool created; bool irq_level; struct irq_work overflow_work; }; -#define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); @@ -61,7 +59,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); struct kvm_pmu { }; -#define kvm_arm_pmu_v3_ready(v) (false) #define kvm_arm_pmu_irq_initialized(v) (false) static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) -- GitLab From 8cce12b3c82717df72afb955ce74c769b0eb2b4f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Nov 2020 12:46:36 -0500 Subject: [PATCH 0302/1894] KVM: nSVM: set fixed bits by hand SVM generally ignores fixed-1 bits. Set them manually so that we do not end up by mistake without those bits set in struct kvm_vcpu; it is part of userspace API that KVM always returns value with the bits set. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b0f37183e8f55..b0b667456b2e7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -381,7 +381,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.ds = vmcb12->save.ds; svm->vmcb->save.gdtr = vmcb12->save.gdtr; svm->vmcb->save.idtr = vmcb12->save.idtr; - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags); + kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, vmcb12->save.efer); svm_set_cr0(&svm->vcpu, vmcb12->save.cr0); svm_set_cr4(&svm->vcpu, vmcb12->save.cr4); @@ -394,8 +394,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.rax = vmcb12->save.rax; svm->vmcb->save.rsp = vmcb12->save.rsp; svm->vmcb->save.rip = vmcb12->save.rip; - svm->vmcb->save.dr7 = vmcb12->save.dr7; - svm->vcpu.arch.dr6 = vmcb12->save.dr6; + svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; + svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM; svm->vmcb->save.cpl = vmcb12->save.cpl; } @@ -660,13 +660,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; kvm_set_rflags(&svm->vcpu, hsave->save.rflags); + kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); kvm_rax_write(&svm->vcpu, hsave->save.rax); kvm_rsp_write(&svm->vcpu, hsave->save.rsp); kvm_rip_write(&svm->vcpu, hsave->save.rip); - svm->vmcb->save.dr7 = 0; + svm->vmcb->save.dr7 = DR7_FIXED_1; svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; -- GitLab From 8d14797b53f044fda3ed42b5b6357c7622b8af58 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2020 19:44:00 +0000 Subject: [PATCH 0303/1894] KVM: arm64: Move 'struct kvm_arch_memory_slot' out of uapi/ 'struct kvm_arch_memory_slot' isn't part of the user ABI, so move it out of the uapi/ headers in case we start using it in future and accidentally back ourselves into a corner. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201118194402.2892-2-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/uapi/asm/kvm.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 781d029b8aa85..32db7196504f7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -89,6 +89,9 @@ struct kvm_s2_mmu { struct kvm *kvm; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch { struct kvm_s2_mmu mmu; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 1c17c3a24411d..24223adae150c 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -156,9 +156,6 @@ struct kvm_sync_regs { __u64 device_irq_level; }; -struct kvm_arch_memory_slot { -}; - /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. -- GitLab From 36fb4cd55f626dff0f6e76bed14707fa00147b7f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2020 19:44:01 +0000 Subject: [PATCH 0304/1894] KVM: arm64: Remove kvm_arch_vm_ioctl_check_extension() kvm_arch_vm_ioctl_check_extension() is only called from kvm_vm_ioctl_check_extension(), so we can inline it and remove the extra function. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Link: https://lore.kernel.org/r/20201118194402.2892-3-will@kernel.org --- arch/arm64/include/asm/cpufeature.h | 5 +++ arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/arm.c | 31 +++++++++++++++-- arch/arm64/kvm/reset.c | 52 ----------------------------- 4 files changed, 34 insertions(+), 55 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 97244d4feca9c..04ab88db4b438 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -697,6 +697,11 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); } +static inline bool system_has_full_ptr_auth(void) +{ + return system_supports_address_auth() && system_supports_generic_auth(); +} + static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32db7196504f7..6691043d89307 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,7 +58,6 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_vmid { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e9..97a9332f12cc5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -182,6 +182,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: case KVM_CAP_ARM_NISV_TO_USER: case KVM_CAP_ARM_INJECT_EXT_DABT: + case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VCPU_ATTRIBUTES: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: @@ -213,10 +215,35 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; - default: - r = kvm_arch_vm_ioctl_check_extension(kvm, ext); + case KVM_CAP_ARM_EL1_32BIT: + r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); + break; + case KVM_CAP_GUEST_DEBUG_HW_BPS: + r = get_num_brps(); + break; + case KVM_CAP_GUEST_DEBUG_HW_WPS: + r = get_num_wrps(); + break; + case KVM_CAP_ARM_PMU_V3: + r = kvm_arm_support_pmu_v3(); + break; + case KVM_CAP_ARM_INJECT_SERROR_ESR: + r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + break; + case KVM_CAP_ARM_VM_IPA_SIZE: + r = get_kvm_ipa_limit(); break; + case KVM_CAP_ARM_SVE: + r = system_supports_sve(); + break; + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + r = system_has_full_ptr_auth(); + break; + default: + r = 0; } + return r; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f32490229a4c7..c7985f4607a9d 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,58 +42,6 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) -static bool system_has_full_ptr_auth(void) -{ - return system_supports_address_auth() && system_supports_generic_auth(); -} - -/** - * kvm_arch_vm_ioctl_check_extension - * - * We currently assume that the number of HW registers is uniform - * across all CPUs (see cpuinfo_sanity_check). - */ -int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) -{ - int r; - - switch (ext) { - case KVM_CAP_ARM_EL1_32BIT: - r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); - break; - case KVM_CAP_GUEST_DEBUG_HW_BPS: - r = get_num_brps(); - break; - case KVM_CAP_GUEST_DEBUG_HW_WPS: - r = get_num_wrps(); - break; - case KVM_CAP_ARM_PMU_V3: - r = kvm_arm_support_pmu_v3(); - break; - case KVM_CAP_ARM_INJECT_SERROR_ESR: - r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); - break; - case KVM_CAP_SET_GUEST_DEBUG: - case KVM_CAP_VCPU_ATTRIBUTES: - r = 1; - break; - case KVM_CAP_ARM_VM_IPA_SIZE: - r = kvm_ipa_limit; - break; - case KVM_CAP_ARM_SVE: - r = system_supports_sve(); - break; - case KVM_CAP_ARM_PTRAUTH_ADDRESS: - case KVM_CAP_ARM_PTRAUTH_GENERIC: - r = system_has_full_ptr_auth(); - break; - default: - r = 0; - } - - return r; -} - unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void) -- GitLab From bf118a5cb7e6d17e7ec9492e4dc676e7e7b69d01 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2020 19:44:02 +0000 Subject: [PATCH 0305/1894] KVM: arm64: Remove unused __extended_idmap_trampoline() prototype __extended_idmap_trampoline() was removed a long time ago by 3421e9d88d7a ("arm64: KVM: Simplify HYP init/teardown") so remove the unused function prototype. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Link: https://lore.kernel.org/r/20201118194402.2892-4-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6691043d89307..724c74cfacdd2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,7 +58,6 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); -void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_vmid { /* The VMID generation used for the virt. memory system */ -- GitLab From c73a44161776f6e60d933717f3b34084b0a0eba0 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 26 Nov 2020 14:46:40 +0100 Subject: [PATCH 0306/1894] KVM: arm64: CSSELR_EL1 max is 13 Not counting TnD, which KVM doesn't currently consider, CSSELR_EL1 can have a maximum value of 0b1101 (13), which corresponds to an instruction cache at level 7. With CSSELR_MAX set to 12 we can only select up to cache level 6. Change it to 14. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201126134641.35231-2-drjones@redhat.com --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c1fac9836af1a..ef453f7827fae 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -169,7 +169,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ -#define CSSELR_MAX 12 +#define CSSELR_MAX 14 /* Which cache CCSIDR represents depends on CSSELR value. */ static u32 get_ccsidr(u32 csselr) -- GitLab From c6232bd40b2eda3819d108e6e3f621ec604e15d8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 26 Nov 2020 14:46:41 +0100 Subject: [PATCH 0307/1894] KVM: arm64: selftests: Filter out DEMUX registers DEMUX register presence depends on the host's hardware (the CLIDR_EL1 register to be precise). This means there's no set of them that we can bless and that it's possible to encounter new ones when running on different hardware (which would generate "Consider adding them ..." messages, but we'll never want to add them.) Remove the ones we have in the blessed list and filter them out of the new list, but also provide a new command line switch to list them if one so desires. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201126134641.35231-3-drjones@redhat.com --- .../selftests/kvm/aarch64/get-reg-list.c | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 33218a395d9f6..486932164cf21 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -42,12 +42,16 @@ #define for_each_reg(i) \ for ((i) = 0; (i) < reg_list->n; ++(i)) +#define for_each_reg_filtered(i) \ + for_each_reg(i) \ + if (!filter_reg(reg_list->reg[i])) + #define for_each_missing_reg(i) \ for ((i) = 0; (i) < blessed_n; ++(i)) \ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) #define for_each_new_reg(i) \ - for ((i) = 0; (i) < reg_list->n; ++(i)) \ + for_each_reg_filtered(i) \ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) @@ -57,6 +61,18 @@ static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; static __u64 *blessed_reg, blessed_n; +static bool filter_reg(__u64 reg) +{ + /* + * DEMUX register presence depends on the host's CLIDR_EL1. + * This means there's no set of them that we can bless. + */ + if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return true; + + return false; +} + static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) { int i; @@ -325,7 +341,7 @@ int main(int ac, char **av) struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i; int failed_get = 0, failed_set = 0, failed_reject = 0; - bool print_list = false, fixup_core_regs = false; + bool print_list = false, print_filtered = false, fixup_core_regs = false; struct kvm_vm *vm; __u64 *vec_regs; @@ -336,8 +352,10 @@ int main(int ac, char **av) fixup_core_regs = true; else if (strcmp(av[i], "--list") == 0) print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; else - fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + TEST_FAIL("Unknown option: %s\n", av[i]); } vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); @@ -350,10 +368,14 @@ int main(int ac, char **av) if (fixup_core_regs) core_reg_fixup(); - if (print_list) { + if (print_list || print_filtered) { putchar('\n'); - for_each_reg(i) - print_reg(reg_list->reg[i]); + for_each_reg(i) { + __u64 id = reg_list->reg[i]; + if ((print_list && !filter_reg(id)) || + (print_filtered && filter_reg(id))) + print_reg(id); + } putchar('\n'); return 0; } @@ -458,6 +480,8 @@ int main(int ac, char **av) /* * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. + * + * The blessed list is up to date with kernel version v5.10-rc5 */ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), @@ -736,9 +760,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, }; static __u64 base_regs_n = ARRAY_SIZE(base_regs); -- GitLab From 02d8e879e4101c2baa1f7a99c1a266742872a049 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 26 Nov 2020 23:24:00 +0000 Subject: [PATCH 0308/1894] clk: qcom: Kconfig: Fix spelling mistake "dyanmic" -> "dynamic" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201126232400.15011-1-colin.king@canonical.com Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 48c624a1eff12..8c8b568609d4b 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -44,7 +44,7 @@ config QCOM_CLK_APCC_MSM8996 help Support for the CPU clock controller on msm8996 devices. Say Y if you want to support CPU clock scaling using CPUfreq - drivers for dyanmic power management. + drivers for dynamic power management. config QCOM_CLK_RPM tristate "RPM based Clock Controller" -- GitLab From 7f43c2014fa03bb8718569ae628acf2089683bff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 26 Nov 2020 17:25:30 +0000 Subject: [PATCH 0309/1894] arm64: Make the Meltdown mitigation state available Our Meltdown mitigation state isn't exposed outside of cpufeature.c, contrary to the rest of the Spectre mitigation state. As we are going to use it in KVM, expose a arm64_get_meltdown_state() helper which returns the same possible values as arm64_get_spectre_v?_state(). Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/spectre.h | 2 ++ arch/arm64/kernel/cpufeature.c | 20 +++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fcdfbce302bdf..52e788981f4a2 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -29,4 +29,6 @@ bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); void spectre_v4_enable_task_mitigation(struct task_struct *tsk); +enum mitigation_state arm64_get_meltdown_state(void); + #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6f36c4f62f694..280b10762f6b7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2846,14 +2846,28 @@ static int __init enable_mrs_emulation(void) core_initcall(enable_mrs_emulation); +enum mitigation_state arm64_get_meltdown_state(void) +{ + if (__meltdown_safe) + return SPECTRE_UNAFFECTED; + + if (arm64_kernel_unmapped_at_el0()) + return SPECTRE_MITIGATED; + + return SPECTRE_VULNERABLE; +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - if (__meltdown_safe) + switch (arm64_get_meltdown_state()) { + case SPECTRE_UNAFFECTED: return sprintf(buf, "Not affected\n"); - if (arm64_kernel_unmapped_at_el0()) + case SPECTRE_MITIGATED: return sprintf(buf, "Mitigation: PTI\n"); - return sprintf(buf, "Vulnerable\n"); + default: + return sprintf(buf, "Vulnerable\n"); + } } -- GitLab From 7f5b57a095f3b9532793d143655e83433bb448af Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 27 Nov 2020 09:05:51 +0000 Subject: [PATCH 0310/1894] clk: rockchip: Remove redundant null check before clk_prepare_enable Because clk_prepare_enable() already checked NULL clock parameter, so the additional check is unnecessary, just remove it. Signed-off-by: Xu Wang Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20201127090551.50254-1-vulab@iscas.ac.cn Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c index b443169dd408d..336481bc6cc72 100644 --- a/drivers/clk/rockchip/clk.c +++ b/drivers/clk/rockchip/clk.c @@ -603,8 +603,7 @@ void rockchip_clk_protect_critical(const char *const clocks[], for (i = 0; i < nclocks; i++) { struct clk *clk = __clk_lookup(clocks[i]); - if (clk) - clk_prepare_enable(clk); + clk_prepare_enable(clk); } } EXPORT_SYMBOL_GPL(rockchip_clk_protect_critical); -- GitLab From 5868491e1257786628fdd2457dfb77609f49f91d Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 18 Nov 2020 14:58:16 +0100 Subject: [PATCH 0311/1894] clk: rockchip: add CLK_SET_RATE_PARENT to sclk for rk3066a i2s and uart clocks Add CLK_SET_RATE_PARENT to sclk for rk3066a i2s and uart clocks, so that the parent COMPOSITE_FRACMUX and COMPOSITE_NOMUX also update. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20201118135822.9582-2-jbx6244@gmail.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3188.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index 730020fcc7fed..db8c588139dec 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -255,19 +255,19 @@ static struct rockchip_clk_branch common_spdif_fracmux __initdata = RK2928_CLKSEL_CON(5), 8, 2, MFLAGS); static struct rockchip_clk_branch common_uart0_fracmux __initdata = - MUX(SCLK_UART0, "sclk_uart0", mux_sclk_uart0_p, 0, + MUX(SCLK_UART0, "sclk_uart0", mux_sclk_uart0_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); static struct rockchip_clk_branch common_uart1_fracmux __initdata = - MUX(SCLK_UART1, "sclk_uart1", mux_sclk_uart1_p, 0, + MUX(SCLK_UART1, "sclk_uart1", mux_sclk_uart1_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); static struct rockchip_clk_branch common_uart2_fracmux __initdata = - MUX(SCLK_UART2, "sclk_uart2", mux_sclk_uart2_p, 0, + MUX(SCLK_UART2, "sclk_uart2", mux_sclk_uart2_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); static struct rockchip_clk_branch common_uart3_fracmux __initdata = - MUX(SCLK_UART3, "sclk_uart3", mux_sclk_uart3_p, 0, + MUX(SCLK_UART3, "sclk_uart3", mux_sclk_uart3_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(16), 8, 2, MFLAGS); static struct rockchip_clk_branch common_clk_branches[] __initdata = { @@ -408,28 +408,28 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { COMPOSITE_NOMUX(0, "uart0_pre", "uart_src", 0, RK2928_CLKSEL_CON(13), 0, 7, DFLAGS, RK2928_CLKGATE_CON(1), 8, GFLAGS), - COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_pre", 0, + COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(17), 0, RK2928_CLKGATE_CON(1), 9, GFLAGS, &common_uart0_fracmux), COMPOSITE_NOMUX(0, "uart1_pre", "uart_src", 0, RK2928_CLKSEL_CON(14), 0, 7, DFLAGS, RK2928_CLKGATE_CON(1), 10, GFLAGS), - COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_pre", 0, + COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(18), 0, RK2928_CLKGATE_CON(1), 11, GFLAGS, &common_uart1_fracmux), COMPOSITE_NOMUX(0, "uart2_pre", "uart_src", 0, RK2928_CLKSEL_CON(15), 0, 7, DFLAGS, RK2928_CLKGATE_CON(1), 12, GFLAGS), - COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_pre", 0, + COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(19), 0, RK2928_CLKGATE_CON(1), 13, GFLAGS, &common_uart2_fracmux), COMPOSITE_NOMUX(0, "uart3_pre", "uart_src", 0, RK2928_CLKSEL_CON(16), 0, 7, DFLAGS, RK2928_CLKGATE_CON(1), 14, GFLAGS), - COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_pre", 0, + COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(20), 0, RK2928_CLKGATE_CON(1), 15, GFLAGS, &common_uart3_fracmux), @@ -543,15 +543,15 @@ static struct clk_div_table div_aclk_cpu_t[] = { }; static struct rockchip_clk_branch rk3066a_i2s0_fracmux __initdata = - MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, 0, + MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(2), 8, 2, MFLAGS); static struct rockchip_clk_branch rk3066a_i2s1_fracmux __initdata = - MUX(SCLK_I2S1, "sclk_i2s1", mux_sclk_i2s1_p, 0, + MUX(SCLK_I2S1, "sclk_i2s1", mux_sclk_i2s1_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); static struct rockchip_clk_branch rk3066a_i2s2_fracmux __initdata = - MUX(SCLK_I2S2, "sclk_i2s2", mux_sclk_i2s2_p, 0, + MUX(SCLK_I2S2, "sclk_i2s2", mux_sclk_i2s2_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(4), 8, 2, MFLAGS); static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = { @@ -615,21 +615,21 @@ static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = { COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0, RK2928_CLKSEL_CON(2), 0, 7, DFLAGS, RK2928_CLKGATE_CON(0), 7, GFLAGS), - COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", 0, + COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(6), 0, RK2928_CLKGATE_CON(0), 8, GFLAGS, &rk3066a_i2s0_fracmux), COMPOSITE_NOMUX(0, "i2s1_pre", "i2s_src", 0, RK2928_CLKSEL_CON(3), 0, 7, DFLAGS, RK2928_CLKGATE_CON(0), 9, GFLAGS), - COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_pre", 0, + COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(7), 0, RK2928_CLKGATE_CON(0), 10, GFLAGS, &rk3066a_i2s1_fracmux), COMPOSITE_NOMUX(0, "i2s2_pre", "i2s_src", 0, RK2928_CLKSEL_CON(4), 0, 7, DFLAGS, RK2928_CLKGATE_CON(0), 11, GFLAGS), - COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_pre", 0, + COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_pre", CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(8), 0, RK2928_CLKGATE_CON(0), 12, GFLAGS, &rk3066a_i2s2_fracmux), -- GitLab From caa2fd752ecb80faf7a2e1cdadc737187934675e Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 18 Nov 2020 14:58:17 +0100 Subject: [PATCH 0312/1894] clk: rockchip: fix i2s gate bits on rk3066 and rk3188 The Rockchip PX2/RK3066 uses these bits in CRU_CLKGATE7_CON: hclk_i2s_8ch_gate_en bit 4 (dtsi: i2s0) hclk_i2s0_2ch_gate_en bit 2 (dtsi: i2s1) hclk_i2s1_2ch_gate_en bit 3 (dtsi: i2s2) The Rockchip PX3/RK3188 uses this bit in CRU_CLKGATE7_CON: hclk_i2s_2ch_gate_en bit 2 (dtsi: i2s0) The bits got somehow mixed up in the clk-rk3188.c file. The labels in the dtsi files are not suppose to change. The sclk and hclk names should match for "trace_event=clk_disable,clk_enable", so remove GATE HCLK_I2S0 from the common clock tree and fix the bits in the rk3066 and rk3188 clock tree. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20201118135822.9582-3-jbx6244@gmail.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3188.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index db8c588139dec..0b76ad34de000 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -449,7 +449,6 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { /* hclk_cpu gates */ GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", 0, RK2928_CLKGATE_CON(5), 6, GFLAGS), - GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), GATE(HCLK_SPDIF, "hclk_spdif", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 1, GFLAGS), GATE(0, "hclk_cpubus", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 8, GFLAGS), /* hclk_ahb2apb is part of a clk branch */ @@ -634,8 +633,9 @@ static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = { RK2928_CLKGATE_CON(0), 12, GFLAGS, &rk3066a_i2s2_fracmux), - GATE(HCLK_I2S1, "hclk_i2s1", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), - GATE(HCLK_I2S2, "hclk_i2s2", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), + GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), + GATE(HCLK_I2S1, "hclk_i2s1", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), + GATE(HCLK_I2S2, "hclk_i2s2", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), GATE(HCLK_CIF1, "hclk_cif1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 6, GFLAGS), GATE(HCLK_HDMI, "hclk_hdmi", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS), @@ -728,6 +728,7 @@ static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = { RK2928_CLKGATE_CON(0), 10, GFLAGS, &rk3188_i2s0_fracmux), + GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), GATE(0, "hclk_imem0", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS), GATE(0, "hclk_imem1", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 15, GFLAGS), -- GitLab From 57e3cebd022fbc035dcf190ac789fd2ffc747f5b Mon Sep 17 00:00:00 2001 From: Shenming Lu Date: Sat, 28 Nov 2020 22:18:57 +0800 Subject: [PATCH 0313/1894] KVM: arm64: Delay the polling of the GICR_VPENDBASER.Dirty bit In order to reduce the impact of the VPT parsing happening on the GIC, we can split the vcpu reseidency in two phases: - programming GICR_VPENDBASER: this still happens in vcpu_load() - checking for the VPT parsing to be complete: this can happen on vcpu entry (in kvm_vgic_flush_hwstate()) This allows the GIC and the CPU to work in parallel, rewmoving some of the entry overhead. Suggested-by: Marc Zyngier Signed-off-by: Shenming Lu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201128141857.983-3-lushenming@huawei.com --- arch/arm64/kvm/vgic/vgic-v4.c | 12 ++++++++++++ arch/arm64/kvm/vgic/vgic.c | 3 +++ drivers/irqchip/irq-gic-v3-its.c | 12 ++++++++---- drivers/irqchip/irq-gic-v4.c | 19 +++++++++++++++++++ include/kvm/arm_vgic.h | 1 + include/linux/irqchip/arm-gic-v4.h | 4 ++++ 6 files changed, 47 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index b5fa73c9fd355..66508b03094f2 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -353,6 +353,18 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) return err; } +void vgic_v4_commit(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + /* + * No need to wait for the vPE to be ready across a shallow guest + * exit, as only a vcpu_put will invalidate it. + */ + if (!vpe->ready) + its_commit_vpe(vpe); +} + static struct vgic_its *vgic_get_its(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *irq_entry) { diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index c3643b7f101b7..1c597c9885fa7 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); + + if (vgic_supports_direct_msis(vcpu->kvm)) + vgic_v4_commit(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0fec31931e115..7db602434ac51 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe) val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; val |= GICR_VPENDBASER_Valid; gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_deschedule(struct its_vpe *vpe) @@ -3891,6 +3889,10 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_deschedule(vpe); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_invall(vpe); return 0; @@ -4052,8 +4054,6 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe, val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_4_1_deschedule(struct its_vpe *vpe, @@ -4128,6 +4128,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_4_1_deschedule(vpe, info); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_4_1_invall(vpe); return 0; diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 0c18714ae13ec..5d1dc9915272b 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -232,6 +232,8 @@ int its_make_vpe_non_resident(struct its_vpe *vpe, bool db) if (!ret) vpe->resident = false; + vpe->ready = false; + return ret; } @@ -258,6 +260,23 @@ int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en) return ret; } +int its_commit_vpe(struct its_vpe *vpe) +{ + struct its_cmd_info info = { + .cmd_type = COMMIT_VPE, + }; + int ret; + + WARN_ON(preemptible()); + + ret = its_send_vpe_cmd(vpe, &info); + if (!ret) + vpe->ready = true; + + return ret; +} + + int its_invall_vpe(struct its_vpe *vpe) { struct its_cmd_info info = { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index a8d8fdcd37230..3d74f1060bd18 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -402,6 +402,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); int vgic_v4_load(struct kvm_vcpu *vcpu); +void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6976b8331b604..943c3411ca101 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { irq_hw_number_t vpe_db_lpi; /* VPE resident */ bool resident; + /* VPT parse complete */ + bool ready; union { /* GICv4.0 implementations */ struct { @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { PROP_UPDATE_AND_INV_VLPI, SCHEDULE_VPE, DESCHEDULE_VPE, + COMMIT_VPE, INVALL_VPE, PROP_UPDATE_VSGI, }; @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); +int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); -- GitLab From 94b69c615e4e2b04d1392d1193c72406ff9fd73e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 13:14:04 +0900 Subject: [PATCH 0314/1894] perf test: Add shadow stat test It calculates IPC from the cycles and instruction counts and compares it with the shadow stat for both global aggregation (default) and no aggregation mode. $ perf stat -a -A -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 39,580,880 cycles CPU1 45,426,945 cycles CPU2 31,151,685 cycles CPU3 55,167,421 cycles CPU0 17,073,564 instructions # 0.43 insn per cycle CPU1 34,955,764 instructions # 0.77 insn per cycle CPU2 15,688,459 instructions # 0.50 insn per cycle CPU3 34,699,217 instructions # 0.63 insn per cycle 1.003275495 seconds time elapsed In this example, the 'insn per cycle' should be matched to the number for each cpu. For CPU2, 0.50 = 15,688,459 / 31,151,685 . Committer testing: # perf test shadow 78: perf stat metrics (shadow stat) test : Ok # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127041404.390276-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 tools/perf/tests/shell/stat+shadow_stat.sh diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh new file mode 100755 index 0000000000000..249dfe48cf6ad --- /dev/null +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -0,0 +1,80 @@ +#!/bin/sh +# perf stat metrics (shadow stat) test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# skip if system-wide mode is forbidden +perf stat -a true > /dev/null 2>&1 || exit 2 + +test_global_aggr() +{ + local cyc + + perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ + grep -e cycles -e instructions | \ + while read num evt hash ipc rest + do + # skip not counted events + if [[ $num == "&1 | \ + grep ^CPU | \ + while read cpu num evt hash ipc rest + do + # skip not counted events + if [[ $num == " Date: Mon, 30 Nov 2020 09:08:24 -0300 Subject: [PATCH 0315/1894] perf evsel: Convert last 'struct evsel' methods to the right evsel__ prefix As 'perf_evsel__' means its a function in tools/lib/perf/. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/evsel-tp-sched.c | 25 ++++++++++--------- tools/perf/ui/browsers/hists.c | 28 ++++++++-------------- tools/perf/util/evsel.c | 40 ++++++++++++------------------- tools/perf/util/header.c | 5 ++-- tools/perf/util/stat.c | 4 ++-- 5 files changed, 41 insertions(+), 61 deletions(-) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 0e224a0a55d9a..f9e34bd26cf33 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -5,8 +5,7 @@ #include "tests.h" #include "debug.h" -static int perf_evsel__test_field(struct evsel *evsel, const char *name, - int size, bool should_be_signed) +static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed) { struct tep_format_field *field = evsel__field(evsel, name); int is_signed; @@ -43,25 +42,25 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes return -1; } - if (perf_evsel__test_field(evsel, "prev_comm", 16, false)) + if (evsel__test_field(evsel, "prev_comm", 16, false)) ret = -1; - if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) + if (evsel__test_field(evsel, "prev_pid", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "prev_prio", 4, true)) + if (evsel__test_field(evsel, "prev_prio", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true)) + if (evsel__test_field(evsel, "prev_state", sizeof(long), true)) ret = -1; - if (perf_evsel__test_field(evsel, "next_comm", 16, false)) + if (evsel__test_field(evsel, "next_comm", 16, false)) ret = -1; - if (perf_evsel__test_field(evsel, "next_pid", 4, true)) + if (evsel__test_field(evsel, "next_pid", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "next_prio", 4, true)) + if (evsel__test_field(evsel, "next_prio", 4, true)) ret = -1; evsel__delete(evsel); @@ -73,16 +72,16 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes return -1; } - if (perf_evsel__test_field(evsel, "comm", 16, false)) + if (evsel__test_field(evsel, "comm", 16, false)) ret = -1; - if (perf_evsel__test_field(evsel, "pid", 4, true)) + if (evsel__test_field(evsel, "pid", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "prio", 4, true)) + if (evsel__test_field(evsel, "prio", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) + if (evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; evsel__delete(evsel); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b0e1880cf992b..e735d7e37da91 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2946,14 +2946,10 @@ next: } } -static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, - const char *helpline, - bool left_exits, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_opts) +static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline, + bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, + struct perf_env *env, bool warn_lost_event, + struct annotation_options *annotation_opts) { struct hists *hists = evsel__hists(evsel); struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); @@ -3505,12 +3501,10 @@ browse_hists: */ if (hbt) hbt->timer(hbt->arg); - key = perf_evsel__hists_browse(pos, nr_events, help, - true, hbt, - menu->min_pcnt, - menu->env, - warn_lost_event, - menu->annotation_opts); + key = evsel__hists_browse(pos, nr_events, help, true, hbt, + menu->min_pcnt, menu->env, + warn_lost_event, + menu->annotation_opts); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3633,10 +3627,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, single_entry: { struct evsel *first = evlist__first(evlist); - return perf_evsel__hists_browse(first, nr_entries, help, - false, hbt, min_pcnt, - env, warn_lost_event, - annotation_opts); + return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, + env, warn_lost_event, annotation_opts); } } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cad6051d8b08..9005cc974f43f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -497,7 +497,7 @@ static const char *__evsel__hw_name(u64 config) return "unknown-hardware"; } -static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) +static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) { int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->core.attr; @@ -536,7 +536,7 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size) { int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config)); - return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); + return r + evsel__add_modifiers(evsel, bf + r, size - r); } const char *evsel__sw_names[PERF_COUNT_SW_MAX] = { @@ -562,7 +562,7 @@ static const char *__evsel__sw_name(u64 config) static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) { int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config)); - return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); + return r + evsel__add_modifiers(evsel, bf + r, size - r); } static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) @@ -587,7 +587,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size) { struct perf_event_attr *attr = &evsel->core.attr; int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); - return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); + return r + evsel__add_modifiers(evsel, bf + r, size - r); } const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { @@ -682,13 +682,13 @@ out_err: static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) { int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size); - return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); + return ret + evsel__add_modifiers(evsel, bf + ret, size - ret); } static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size) { int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config); - return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); + return ret + evsel__add_modifiers(evsel, bf + ret, size - ret); } static int evsel__tool_name(char *bf, size_t size) @@ -850,9 +850,7 @@ void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, return __evsel__config_callchain(evsel, opts, param); } -static void -perf_evsel__reset_callgraph(struct evsel *evsel, - struct callchain_param *param) +static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param) { struct perf_event_attr *attr = &evsel->core.attr; @@ -988,7 +986,7 @@ static void evsel__apply_config_terms(struct evsel *evsel, /* If global callgraph set, clear it */ if (callchain_param.enabled) - perf_evsel__reset_callgraph(evsel, &callchain_param); + evsel__reset_callgraph(evsel, &callchain_param); /* set perf-event callgraph */ if (param.enabled) { @@ -1434,9 +1432,7 @@ static int evsel__read_one(struct evsel *evsel, int cpu, int thread) return perf_evsel__read(&evsel->core, cpu, thread, count); } -static void -perf_evsel__set_count(struct evsel *counter, int cpu, int thread, - u64 val, u64 ena, u64 run) +static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run) { struct perf_counts_values *count; @@ -1449,9 +1445,7 @@ perf_evsel__set_count(struct evsel *counter, int cpu, int thread, perf_counts__set_loaded(counter->counts, cpu, thread, true); } -static int -perf_evsel__process_group_data(struct evsel *leader, - int cpu, int thread, u64 *data) +static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; struct sample_read_value *v; @@ -1470,8 +1464,7 @@ perf_evsel__process_group_data(struct evsel *leader, v = (struct sample_read_value *) data; - perf_evsel__set_count(leader, cpu, thread, - v[0].value, ena, run); + evsel__set_count(leader, cpu, thread, v[0].value, ena, run); for (i = 1; i < nr; i++) { struct evsel *counter; @@ -1480,8 +1473,7 @@ perf_evsel__process_group_data(struct evsel *leader, if (!counter) return -EINVAL; - perf_evsel__set_count(counter, cpu, thread, - v[i].value, ena, run); + evsel__set_count(counter, cpu, thread, v[i].value, ena, run); } return 0; @@ -1514,7 +1506,7 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread) if (readn(FD(leader, cpu, thread), data, size) <= 0) return -errno; - return perf_evsel__process_group_data(leader, cpu, thread, data); + return evsel__process_group_data(leader, cpu, thread, data); } int evsel__read_counter(struct evsel *evsel, int cpu, int thread) @@ -1567,9 +1559,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) return fd; } -static void perf_evsel__remove_fd(struct evsel *pos, - int nr_cpus, int nr_threads, - int thread_idx) +static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx) { for (int cpu = 0; cpu < nr_cpus; cpu++) for (int thread = thread_idx; thread < nr_threads - 1; thread++) @@ -1588,7 +1578,7 @@ static int update_fds(struct evsel *evsel, evlist__for_each_entry(evsel->evlist, pos) { nr_cpus = pos != evsel ? nr_cpus : cpu_idx; - perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); /* * Since fds for next evsel has not been created, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b9171bd11fe69..b65c8f7ce36a0 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3735,8 +3735,7 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } -static int perf_evsel__prepare_tracepoint_event(struct evsel *evsel, - struct tep_handle *pevent) +static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent) { struct tep_event *event; char bf[128]; @@ -3774,7 +3773,7 @@ static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { if (pos->core.attr.type == PERF_TYPE_TRACEPOINT && - perf_evsel__prepare_tracepoint_event(pos, pevent)) + evsel__prepare_tracepoint_event(pos, pevent)) return -1; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index bd0decd6d7535..5e98d591cb38d 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -229,7 +229,7 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist) evsel__reset_prev_raw_counts(evsel); } -static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel) +static void evsel__copy_prev_raw_counts(struct evsel *evsel) { int ncpus = evsel__nr_cpus(evsel); int nthreads = perf_thread_map__nr(evsel->core.threads); @@ -250,7 +250,7 @@ void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - perf_evsel__copy_prev_raw_counts(evsel); + evsel__copy_prev_raw_counts(evsel); } void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist) -- GitLab From a622eafa1a54043c2eaedfccdd1b1ee5ffeb9d06 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:22:07 -0300 Subject: [PATCH 0316/1894] perf evlist: Use the right prefix for 'struct evlist' methods: evlist__set_leader() perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- tools/perf/util/evlist.c | 8 ++++---- tools/perf/util/evlist.h | 4 ++-- tools/perf/util/parse-events.c | 2 +- tools/perf/util/python.c | 2 +- tools/perf/util/record.c | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f15b2f8aa14d8..2dcfd6d3f879a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -733,7 +733,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } if (group) - perf_evlist__set_leader(evsel_list); + evlist__set_leader(evsel_list); if (affinity__setup(&affinity) < 0) return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8bdf3d2c907cb..655691eebe851 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -212,7 +212,7 @@ out: return err; } -void __perf_evlist__set_leader(struct list_head *list) +void __evlist__set_leader(struct list_head *list) { struct evsel *evsel, *leader; @@ -226,11 +226,11 @@ void __perf_evlist__set_leader(struct list_head *list) } } -void perf_evlist__set_leader(struct evlist *evlist) +void evlist__set_leader(struct evlist *evlist) { if (evlist->core.nr_entries) { evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0; - __perf_evlist__set_leader(&evlist->core.entries); + __evlist__set_leader(&evlist->core.entries); } } @@ -1694,7 +1694,7 @@ void perf_evlist__force_leader(struct evlist *evlist) if (!evlist->nr_groups) { struct evsel *leader = evlist__first(evlist); - perf_evlist__set_leader(evlist); + evlist__set_leader(evlist); leader->forced_leader = true; } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e1a450322bc5b..c155f13640775 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -220,8 +220,8 @@ void perf_evlist__set_selected(struct evlist *evlist, int perf_evlist__create_maps(struct evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); -void __perf_evlist__set_leader(struct list_head *list); -void perf_evlist__set_leader(struct evlist *evlist); +void __evlist__set_leader(struct list_head *list); +void evlist__set_leader(struct evlist *evlist); u64 __evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_sample_type(struct evlist *evlist); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3b581d7b32131..8fa87f60133ff 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1769,7 +1769,7 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - __perf_evlist__set_leader(list); + __evlist__set_leader(list); leader = list_entry(list->next, struct evsel, core.node); leader->group_name = name ? strdup(name) : NULL; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ae8edde7c50ef..3d3b465148630 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1089,7 +1089,7 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, return NULL; if (group) - perf_evlist__set_leader(evlist); + evlist__set_leader(evlist); if (evlist__open(evlist) < 0) { PyErr_SetFromErrno(PyExc_OSError); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 07e4b96a6625a..7330407db4ba1 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -102,7 +102,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, * since some might depend on this info. */ if (opts->group) - perf_evlist__set_leader(evlist); + evlist__set_leader(evlist); if (evlist->core.cpus->map[0] < 0) opts->no_inherit = true; -- GitLab From 7b392ef04ef570c15de8fc0d36171f9bc80dd539 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:26:54 -0300 Subject: [PATCH 0317/1894] perf evlist: Use the right prefix for 'struct evlist' 'workload' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 9 ++++----- tools/perf/builtin-record.c | 11 +++++------ tools/perf/builtin-stat.c | 7 +++---- tools/perf/builtin-trace.c | 7 +++---- tools/perf/tests/event-times.c | 4 ++-- tools/perf/tests/perf-record.c | 8 ++++---- tools/perf/tests/task-exit.c | 9 ++++----- tools/perf/util/evlist.c | 9 ++++----- tools/perf/util/evlist.h | 10 ++++------ 9 files changed, 33 insertions(+), 41 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 9366fad591dcc..bf4d8e9ab8f5c 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -67,7 +67,7 @@ static void sig_handler(int sig __maybe_unused) } /* - * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since + * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since * we asked by setting its exec_error to the function below, * ftrace__workload_exec_failed_signal. * @@ -600,9 +600,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (write_tracing_file("trace", "0") < 0) goto out; - if (argc && perf_evlist__prepare_workload(ftrace->evlist, - &ftrace->target, argv, false, - ftrace__workload_exec_failed_signal) < 0) { + if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false, + ftrace__workload_exec_failed_signal) < 0) { goto out; } @@ -644,7 +643,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) } } - perf_evlist__start_workload(ftrace->evlist); + evlist__start_workload(ftrace->evlist); if (ftrace->initial_delay) { usleep(ftrace->initial_delay * 1000); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index adf311d15d3d2..4302f32c80c59 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1333,7 +1333,7 @@ record__switch_output(struct record *rec, bool at_exit) static volatile int workload_exec_errno; /* - * perf_evlist__prepare_workload will send a SIGUSR1 + * evlist__prepare_workload will send a SIGUSR1 * if the fork fails, since we asked by setting its * want_signal to true. */ @@ -1689,9 +1689,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__init_features(rec); if (forks) { - err = perf_evlist__prepare_workload(rec->evlist, &opts->target, - argv, data->is_pipe, - workload_exec_failed_signal); + err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, + workload_exec_failed_signal); if (err < 0) { pr_err("Couldn't run the workload!\n"); status = err; @@ -1835,7 +1834,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine); free(event); - perf_evlist__start_workload(rec->evlist); + evlist__start_workload(rec->evlist); } if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack)) @@ -2413,7 +2412,7 @@ static bool dry_run; * XXX Will stay a global variable till we fix builtin-script.c to stop messing * with it and switch to use the library functions in perf_evlist that came * from builtin-record.c, i.e. use record_opts, - * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', + * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', * using pipes, etc. */ static struct option __record_options[] = { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2dcfd6d3f879a..460e9b206533a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -534,7 +534,7 @@ static void disable_counters(void) static volatile int workload_exec_errno; /* - * perf_evlist__prepare_workload will send a SIGUSR1 + * evlist__prepare_workload will send a SIGUSR1 * if the fork fails, since we asked by setting its * want_signal to true. */ @@ -724,8 +724,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) bool second_pass = false; if (forks) { - if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, - workload_exec_failed_signal) < 0) { + if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) { perror("failed to prepare workload"); return -1; } @@ -876,7 +875,7 @@ try_again_reset: clock_gettime(CLOCK_MONOTONIC, &ref_time); if (forks) { - perf_evlist__start_workload(evsel_list); + evlist__start_workload(evsel_list); enable_counters(); if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index de80534473afa..80af0bf37d76b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3680,7 +3680,7 @@ static int trace__set_filter_pids(struct trace *trace) * Better not use !target__has_task() here because we need to cover the * case where no threads were specified in the command line, but a * workload was, and in that case we will fill in the thread_map when - * we fork the workload in perf_evlist__prepare_workload. + * we fork the workload in evlist__prepare_workload. */ if (trace->filter_pids.nr > 0) { err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, @@ -3969,8 +3969,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) signal(SIGINT, sig_handler); if (forks) { - err = perf_evlist__prepare_workload(evlist, &trace->opts.target, - argv, false, NULL); + err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL); if (err < 0) { fprintf(trace->output, "Couldn't run the workload!\n"); goto out_delete_evlist; @@ -4043,7 +4042,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__enable(evlist); if (forks) - perf_evlist__start_workload(evlist); + evlist__start_workload(evlist); if (trace->opts.initial_delay) { usleep(trace->opts.initial_delay * 1000); diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index db68894a6f40a..9da2d4f58b8e7 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -32,7 +32,7 @@ static int attach__enable_on_exec(struct evlist *evlist) return err; } - err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL); + err = evlist__prepare_workload(evlist, &target, argv, false, NULL); if (err < 0) { pr_debug("Couldn't run the workload!\n"); return err; @@ -47,7 +47,7 @@ static int attach__enable_on_exec(struct evlist *evlist) return err; } - return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL; + return evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL; } static int detach__enable_on_exec(struct evlist *evlist) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 67d3f5aad0167..8745bf5f93e66 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -81,7 +81,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Create maps of threads and cpus to monitor. In this case * we start with all threads and cpus (-1, -1) but then in - * perf_evlist__prepare_workload we'll fill in the only thread + * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ err = perf_evlist__create_maps(evlist, &opts.target); @@ -92,11 +92,11 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Prepare the workload in argv[] to run, it'll fork it, and then wait - * for perf_evlist__start_workload() to exec it. This is done this way + * for evlist__start_workload() to exec it. This is done this way * so that we have time to open the evlist (calling sys_perf_event_open * on all the fds) and then mmap them. */ - err = perf_evlist__prepare_workload(evlist, &opts.target, argv, false, NULL); + err = evlist__prepare_workload(evlist, &opts.target, argv, false, NULL); if (err < 0) { pr_debug("Couldn't run the workload!\n"); goto out_delete_evlist; @@ -161,7 +161,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Now! */ - perf_evlist__start_workload(evlist); + evlist__start_workload(evlist); while (1) { int before = total_events; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index adaff90443319..452d51048cc18 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -23,7 +23,7 @@ static void sig_handler(int sig __maybe_unused) } /* - * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since + * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since * we asked by setting its exec_error to this handler. */ static void workload_exec_failed_signal(int signo __maybe_unused, @@ -67,7 +67,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused /* * Create maps of threads and cpus to monitor. In this case * we start with all threads and cpus (-1, -1) but then in - * perf_evlist__prepare_workload we'll fill in the only thread + * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ cpus = perf_cpu_map__dummy_new(); @@ -83,8 +83,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused cpus = NULL; threads = NULL; - err = perf_evlist__prepare_workload(evlist, &target, argv, false, - workload_exec_failed_signal); + err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { pr_debug("Couldn't run the workload!\n"); goto out_delete_evlist; @@ -116,7 +115,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - perf_evlist__start_workload(evlist); + evlist__start_workload(evlist); retry: md = &evlist->mmap[0]; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 655691eebe851..198d4ef038844 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1315,9 +1315,8 @@ out_err: return err; } -int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, - const char *argv[], bool pipe_output, - void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) +int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[], + bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) { int child_ready_pipe[2], go_pipe[2]; char bf; @@ -1362,7 +1361,7 @@ int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, /* * The parent will ask for the execvp() to be performed by * writing exactly one byte, in workload.cork_fd, usually via - * perf_evlist__start_workload(). + * evlist__start_workload(). * * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing @@ -1429,7 +1428,7 @@ out_close_ready_pipe: return -1; } -int perf_evlist__start_workload(struct evlist *evlist) +int evlist__start_workload(struct evlist *evlist) { if (evlist->workload.cork_fd > 0) { char bf = 0; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c155f13640775..a0a53f33a2724 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -182,12 +182,10 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); -int perf_evlist__prepare_workload(struct evlist *evlist, - struct target *target, - const char *argv[], bool pipe_output, - void (*exec_error)(int signo, siginfo_t *info, - void *ucontext)); -int perf_evlist__start_workload(struct evlist *evlist); +int evlist__prepare_workload(struct evlist *evlist, struct target *target, + const char *argv[], bool pipe_output, + void (*exec_error)(int signo, siginfo_t *info, void *ucontext)); +int evlist__start_workload(struct evlist *evlist); struct option; -- GitLab From 53f5e9084d0195209bfc7e5fa547fd35bbaadbee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:31:04 -0300 Subject: [PATCH 0318/1894] perf evlist: Use the right prefix for 'struct evlist' stats methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 6 +++--- tools/perf/builtin-stat.c | 16 ++++++++-------- tools/perf/tests/parse-metric.c | 4 ++-- tools/perf/util/stat.c | 14 +++++++------- tools/perf/util/stat.h | 12 ++++++------ 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 48588ccf902eb..983c101965ab6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1847,7 +1847,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, u64 val; if (!evsel->stats) - perf_evlist__alloc_stats(script->session->evlist, false); + evlist__alloc_stats(script->session->evlist, false); if (evsel_script(evsel->leader)->gnum++ == 0) perf_stat__reset_shadow_stats(); val = sample->period * evsel->scale; @@ -3308,7 +3308,7 @@ static int set_maps(struct perf_script *script) perf_evlist__set_maps(&evlist->core, script->cpus, script->threads); - if (perf_evlist__alloc_stats(evlist, true)) + if (evlist__alloc_stats(evlist, true)) return -ENOMEM; script->allocated = true; @@ -3935,7 +3935,7 @@ out_delete: zfree(&script.ptime_range); } - perf_evlist__free_stats(session->evlist); + evlist__free_stats(session->evlist); perf_session__delete(session); if (script_started) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 460e9b206533a..b81e64017808f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -270,7 +270,7 @@ static void perf_stat__reset_stats(void) { int i; - perf_evlist__reset_stats(evsel_list); + evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); for (i = 0; i < stat_config.stats_num; i++) @@ -913,10 +913,10 @@ try_again_reset: update_stats(&walltime_nsecs_stats, t1 - t0); if (stat_config.aggr_mode == AGGR_GLOBAL) - perf_evlist__save_aggr_prev_raw_counts(evsel_list); + evlist__save_aggr_prev_raw_counts(evsel_list); - perf_evlist__copy_prev_raw_counts(evsel_list); - perf_evlist__reset_prev_raw_counts(evsel_list); + evlist__copy_prev_raw_counts(evsel_list); + evlist__reset_prev_raw_counts(evsel_list); runtime_stat_reset(&stat_config); perf_stat__reset_shadow_per_stat(&rt_stat); } else @@ -1907,7 +1907,7 @@ static int set_maps(struct perf_stat *st) perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); - if (perf_evlist__alloc_stats(evsel_list, true)) + if (evlist__alloc_stats(evsel_list, true)) return -ENOMEM; st->maps_allocated = true; @@ -2309,7 +2309,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (perf_evlist__alloc_stats(evsel_list, interval)) + if (evlist__alloc_stats(evsel_list, interval)) goto out; if (perf_stat_init_aggr_mode()) @@ -2349,7 +2349,7 @@ int cmd_stat(int argc, const char **argv) run_idx + 1); if (run_idx != 0) - perf_evlist__reset_prev_raw_counts(evsel_list); + evlist__reset_prev_raw_counts(evsel_list); status = run_perf_stat(argc, argv, run_idx); if (forever && status != -1 && !interval) { @@ -2400,7 +2400,7 @@ int cmd_stat(int argc, const char **argv) } perf_stat__exit_aggr_mode(); - perf_evlist__free_stats(evsel_list); + evlist__free_stats(evsel_list); out: zfree(&stat_config.walltime_run); diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 7c1bde01cb500..ce7be37f0d88a 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -166,7 +166,7 @@ static int __compute_metric(const char *name, struct value *vals, if (err) goto out; - err = perf_evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(evlist, false); if (err) goto out; @@ -183,7 +183,7 @@ out: /* ... clenup. */ metricgroup__rblist_exit(&metric_events); runtime_stat__exit(&st); - perf_evlist__free_stats(evlist); + evlist__free_stats(evlist); perf_cpu_map__put(cpus); evlist__delete(evlist); return err; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5e98d591cb38d..8be9c3da9e564 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -184,7 +184,7 @@ static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) return 0; } -int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) +int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) { struct evsel *evsel; @@ -196,11 +196,11 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) return 0; out_free: - perf_evlist__free_stats(evlist); + evlist__free_stats(evlist); return -1; } -void perf_evlist__free_stats(struct evlist *evlist) +void evlist__free_stats(struct evlist *evlist) { struct evsel *evsel; @@ -211,7 +211,7 @@ void perf_evlist__free_stats(struct evlist *evlist) } } -void perf_evlist__reset_stats(struct evlist *evlist) +void evlist__reset_stats(struct evlist *evlist) { struct evsel *evsel; @@ -221,7 +221,7 @@ void perf_evlist__reset_stats(struct evlist *evlist) } } -void perf_evlist__reset_prev_raw_counts(struct evlist *evlist) +void evlist__reset_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; @@ -245,7 +245,7 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel) evsel->counts->aggr = evsel->prev_raw_counts->aggr; } -void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) +void evlist__copy_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; @@ -253,7 +253,7 @@ void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) evsel__copy_prev_raw_counts(evsel); } -void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist) +void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 05adf81650259..4cba4b106e45c 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -213,12 +213,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct runtime_stat *st); void perf_stat__collect_metric_expr(struct evlist *); -int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); -void perf_evlist__free_stats(struct evlist *evlist); -void perf_evlist__reset_stats(struct evlist *evlist); -void perf_evlist__reset_prev_raw_counts(struct evlist *evlist); -void perf_evlist__copy_prev_raw_counts(struct evlist *evlist); -void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist); +int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); +void evlist__free_stats(struct evlist *evlist); +void evlist__reset_stats(struct evlist *evlist); +void evlist__reset_prev_raw_counts(struct evlist *evlist); +void evlist__copy_prev_raw_counts(struct evlist *evlist); +void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); -- GitLab From ade9d208d6f054c0cd69af16c0a23af62b3da3b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:33:55 -0300 Subject: [PATCH 0319/1894] perf evlist: Use the right prefix for 'struct evlist' 'toggle' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 8 ++++---- tools/perf/builtin-top.c | 6 +++--- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/util/evlist.c | 7 +++---- tools/perf/util/evlist.h | 4 ++-- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4302f32c80c59..d0e3dd57c108e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1166,7 +1166,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); if (overwrite) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); out: return rc; } @@ -1860,11 +1860,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * BKW_MMAP_EMPTY here: when done == true and * hits != rec->samples in previous round. * - * perf_evlist__toggle_bkw_mmap ensure we never + * evlist__toggle_bkw_mmap ensure we never * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. */ if (trigger_is_hit(&switch_output_trigger) || done || draining) - perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); + evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); if (record__mmap_read_all(rec, false) < 0) { trigger_error(&auxtrace_snapshot_trigger); @@ -1903,7 +1903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * record__mmap_read_all(): we should have collected * data from it. */ - perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); + evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); if (!quiet) fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7c64134472c77..b7da09376c728 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -918,14 +918,14 @@ static void perf_top__mmap_read(struct perf_top *top) int i; if (overwrite) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); for (i = 0; i < top->evlist->core.nr_mmaps; i++) perf_top__mmap_read_idx(top, i); if (overwrite) { - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e735d7e37da91..48814efb98e8c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3264,7 +3264,7 @@ do_hotkey: // key came straight from options ui__popup_menu() if (!is_report_browser(hbt)) { struct perf_top *top = hbt->arg; - perf_evlist__toggle_enable(top->evlist); + evlist__toggle_enable(top->evlist); /* * No need to refresh, resort/decay histogram * entries if we are not collecting samples: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 198d4ef038844..3ea3c649ab044 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -447,7 +447,7 @@ void evlist__enable(struct evlist *evlist) evlist->enabled = true; } -void perf_evlist__toggle_enable(struct evlist *evlist) +void evlist__toggle_enable(struct evlist *evlist) { (evlist->enabled ? evlist__disable : evlist__enable)(evlist); } @@ -727,7 +727,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) if (overwrite) { evlist->overwrite_mmap = maps; if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } else { evlist->mmap = maps; } @@ -1613,8 +1613,7 @@ perf_evlist__find_evsel_by_str(struct evlist *evlist, return NULL; } -void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, - enum bkw_mmap_state state) +void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state) { enum bkw_mmap_state old_state = evlist->bkw_mmap_state; enum action { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a0a53f33a2724..e5a4de1a89153 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -168,7 +168,7 @@ struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id); -void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); +void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); void evlist__mmap_consume(struct evlist *evlist, int idx); @@ -207,7 +207,7 @@ size_t evlist__mmap_size(unsigned long pages); void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); -void perf_evlist__toggle_enable(struct evlist *evlist); +void evlist__toggle_enable(struct evlist *evlist); int perf_evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); -- GitLab From 24bf91a7540bc0d14c389dd4f612eea57c06dc93 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:38:02 -0300 Subject: [PATCH 0320/1894] perf evlist: Use the right prefix for 'struct evlist' 'filter' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-trace.c | 8 ++++---- tools/perf/util/evlist.c | 22 +++++++++++----------- tools/perf/util/evlist.h | 14 +++++++------- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d0e3dd57c108e..d53396421a022 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -946,7 +946,7 @@ try_again: "even with a suitable vmlinux or kallsyms file.\n\n"); } - if (perf_evlist__apply_filters(evlist, &pos)) { + if (evlist__apply_filters(evlist, &pos)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", pos->filter, evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b81e64017808f..29f7d4752e848 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -842,7 +842,7 @@ try_again_reset: return -1; } - if (perf_evlist__apply_filters(evsel_list, &counter)) { + if (evlist__apply_filters(evsel_list, &counter)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", counter->filter, evsel__name(counter), errno, str_error_r(errno, msg, sizeof(msg))); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 80af0bf37d76b..9f6f0f873cbc2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3666,7 +3666,7 @@ static int trace__set_filter_loop_pids(struct trace *trace) thread = parent; } - err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids); + err = evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids); @@ -3683,8 +3683,8 @@ static int trace__set_filter_pids(struct trace *trace) * we fork the workload in evlist__prepare_workload. */ if (trace->filter_pids.nr > 0) { - err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, - trace->filter_pids.entries); + err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, + trace->filter_pids.entries); if (!err && trace->filter_pids.map) { err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, trace->filter_pids.entries); @@ -4027,7 +4027,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) err = trace__expand_filters(trace, &evsel); if (err) goto out_delete_evlist; - err = perf_evlist__apply_filters(evlist, &evsel); + err = evlist__apply_filters(evlist, &evsel); if (err < 0) goto out_error_apply_filters; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3ea3c649ab044..725f9f58bdd1e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -975,7 +975,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, __evsel__reset_sample_bit(evsel, bit); } -int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) { struct evsel *evsel; int err = 0; @@ -998,7 +998,7 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) return err; } -int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) +int evlist__set_tp_filter(struct evlist *evlist, const char *filter) { struct evsel *evsel; int err = 0; @@ -1018,7 +1018,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) return err; } -int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) +int evlist__append_tp_filter(struct evlist *evlist, const char *filter) { struct evsel *evsel; int err = 0; @@ -1064,32 +1064,32 @@ out_free: return NULL; } -int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); - int ret = perf_evlist__set_tp_filter(evlist, filter); + int ret = evlist__set_tp_filter(evlist, filter); free(filter); return ret; } -int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) +int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) { - return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); + return evlist__set_tp_filter_pids(evlist, 1, &pid); } -int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); - int ret = perf_evlist__append_tp_filter(evlist, filter); + int ret = evlist__append_tp_filter(evlist, filter); free(filter); return ret; } -int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) +int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) { - return perf_evlist__append_tp_filter_pids(evlist, 1, &pid); + return evlist__append_tp_filter_pids(evlist, 1, &pid); } bool evlist__valid_sample_type(struct evlist *evlist) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e5a4de1a89153..3b1acf70b5dda 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -141,14 +141,14 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, #define perf_evlist__reset_sample_bit(evlist, bit) \ __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit) -int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter); -int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); -int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); +int evlist__set_tp_filter(struct evlist *evlist, const char *filter); +int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); +int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter); +int evlist__append_tp_filter(struct evlist *evlist, const char *filter); -int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); -int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); +int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); +int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); struct evsel * perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); @@ -216,7 +216,7 @@ void perf_evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int perf_evlist__create_maps(struct evlist *evlist, struct target *target); -int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __evlist__set_leader(struct list_head *list); void evlist__set_leader(struct evlist *evlist); -- GitLab From 08c83997ca87f9e162563a59ea43eabadc9e4231 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:40:10 -0300 Subject: [PATCH 0321/1894] perf evlist: Use the right prefix for 'struct evlist' sideband thread methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-top.c | 4 ++-- tools/perf/util/bpf-event.c | 2 +- tools/perf/util/evlist.h | 11 ++++------- tools/perf/util/sideband_evlist.c | 8 ++++---- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d53396421a022..04946bd3d090f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1548,7 +1548,7 @@ static int record__setup_sb_evlist(struct record *rec) } } #endif - if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { + if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -2065,7 +2065,7 @@ out_delete_session: perf_session__delete(session); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(rec->sb_evlist); + evlist__stop_sb_thread(rec->sb_evlist); return status; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b7da09376c728..50dd42da66eec 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1762,7 +1762,7 @@ int cmd_top(int argc, const char **argv) } #endif - if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { + if (evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -1770,7 +1770,7 @@ int cmd_top(int argc, const char **argv) status = __cmd_top(&top); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(top.sb_evlist); + evlist__stop_sb_thread(top.sb_evlist); out_delete_evlist: evlist__delete(top.evlist); diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 3742511a08d15..57d58c81a5f8e 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -526,7 +526,7 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) */ attr.wakeup_watermark = 1; - return perf_evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); + return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3b1acf70b5dda..736cb639df84a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -112,14 +112,11 @@ int __evlist__add_default_attrs(struct evlist *evlist, int evlist__add_dummy(struct evlist *evlist); -int perf_evlist__add_sb_event(struct evlist *evlist, - struct perf_event_attr *attr, - evsel__sb_cb_t cb, - void *data); +int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + evsel__sb_cb_t cb, void *data); void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data); -int perf_evlist__start_sb_thread(struct evlist *evlist, - struct target *target); -void perf_evlist__stop_sb_thread(struct evlist *evlist); +int evlist__start_sb_thread(struct evlist *evlist, struct target *target); +void evlist__stop_sb_thread(struct evlist *evlist); int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index ded9ced027973..90ed016bb3489 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -12,8 +12,8 @@ #include #include -int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, - evsel__sb_cb_t cb, void *data) +int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + evsel__sb_cb_t cb, void *data) { struct evsel *evsel; @@ -94,7 +94,7 @@ void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data) } } -int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) +int evlist__start_sb_thread(struct evlist *evlist, struct target *target) { struct evsel *counter; @@ -138,7 +138,7 @@ out_delete_evlist: return -1; } -void perf_evlist__stop_sb_thread(struct evlist *evlist) +void evlist__stop_sb_thread(struct evlist *evlist) { if (!evlist) return; -- GitLab From 2a6599cd5e093b3c607a39288f14a618c03a0e24 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:43:07 -0300 Subject: [PATCH 0322/1894] perf evlist: Use the right prefix for 'struct evlist' sample parsing methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/builtin-trace.c | 5 ++--- tools/perf/tests/code-reading.c | 4 ++-- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/parse-no-sample-id-all.c | 4 ++-- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 8 ++++---- tools/perf/util/auxtrace.c | 2 +- tools/perf/util/evlist.c | 7 ++----- tools/perf/util/evlist.h | 8 ++------ tools/perf/util/s390-cpumsf.c | 4 ++-- tools/perf/util/session.c | 7 +++---- 14 files changed, 26 insertions(+), 35 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 460945ded6dd2..e1b60aad13cbe 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -764,7 +764,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return (err == -EAGAIN) ? 0 : -1; while ((event = perf_mmap__read_event(&md->core)) != NULL) { - err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + err = evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { perf_mmap__consume(&md->core); pr_err("Failed to parse sample\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 50dd42da66eec..fff1d5ef5c493 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -890,7 +890,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) while ((event = perf_mmap__read_event(&md->core)) != NULL) { int ret; - ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp); + ret = evlist__parse_sample_timestamp(evlist, event, &last_timestamp); if (ret && ret != -1) break; @@ -1153,7 +1153,7 @@ static int deliver_event(struct ordered_events *qe, return 0; } - ret = perf_evlist__parse_sample(evlist, event, &sample); + ret = evlist__parse_sample(evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); goto next_event; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 9f6f0f873cbc2..5638eae36ec8a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3700,9 +3700,8 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event) { struct evlist *evlist = trace->evlist; struct perf_sample sample; - int err; + int err = evlist__parse_sample(evlist, event, &sample); - err = perf_evlist__parse_sample(evlist, event, &sample); if (err) fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); else @@ -3735,7 +3734,7 @@ static int trace__deliver_event(struct trace *trace, union perf_event *event) if (!trace->sort_events) return __trace__deliver_event(trace, event); - err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last); + err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last); if (err && err != -1) return err; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 035c9123549a9..339d432b1bf1c 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -378,8 +378,8 @@ static int process_sample_event(struct machine *machine, struct thread *thread; int ret; - if (perf_evlist__parse_sample(evlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(evlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 7b0dbfc0e17d4..d826153adbf84 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -126,7 +126,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse goto out_delete_evlist; } - err = perf_evlist__parse_sample(evlist, event, &sample); + err = evlist__parse_sample(evlist, event, &sample); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_delete_evlist; diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index adf3c9c4a4163..471273676701b 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -27,8 +27,8 @@ static int process_event(struct evlist **pevlist, union perf_event *event) if (!*pevlist) return -1; - if (perf_evlist__parse_sample(*pevlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(*pevlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 8745bf5f93e66..f0b2066c7ea97 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -182,7 +182,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus if (type < PERF_RECORD_MAX) nr_events[type]++; - err = perf_evlist__parse_sample(evlist, event, &sample); + err = evlist__parse_sample(evlist, event, &sample); if (err < 0) { if (verbose > 0) perf_event__fprintf(event, NULL, stderr); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 4b9b731977c8c..a49c9e23053bd 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -109,7 +109,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) if (event->header.type != PERF_RECORD_SAMPLE) goto next_event; - err = perf_evlist__parse_sample(evlist, event, &sample); + err = evlist__parse_sample(evlist, event, &sample); if (err < 0) { pr_debug("Error during parse sample\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index db5e1f70053a8..b8cdbe6f324f2 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -128,8 +128,8 @@ static int process_sample_event(struct evlist *evlist, pid_t next_tid, prev_tid; int cpu, err; - if (perf_evlist__parse_sample(evlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(evlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } @@ -223,8 +223,8 @@ static int add_event(struct evlist *evlist, struct list_head *events, node->event = event; list_add(&node->list, events); - if (perf_evlist__parse_sample(evlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(evlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 62e7f6c5f8b5d..647afdcb26995 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1082,7 +1082,7 @@ static int auxtrace_queue_data_cb(struct perf_session *session, if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE) return 0; - err = perf_evlist__parse_sample(session->evlist, event, &sample); + err = evlist__parse_sample(session->evlist, event, &sample); if (err) return err; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 725f9f58bdd1e..943262aa6505b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1447,8 +1447,7 @@ int evlist__start_workload(struct evlist *evlist) return 0; } -int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, - struct perf_sample *sample) +int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); @@ -1457,9 +1456,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, return evsel__parse_sample(evsel, event, sample); } -int perf_evlist__parse_sample_timestamp(struct evlist *evlist, - union perf_event *event, - u64 *timestamp) +int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 736cb639df84a..c59a08cf9f25a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -224,12 +224,8 @@ u64 evlist__combined_branch_type(struct evlist *evlist); bool evlist__sample_id_all(struct evlist *evlist); u16 perf_evlist__id_hdr_size(struct evlist *evlist); -int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, - struct perf_sample *sample); - -int perf_evlist__parse_sample_timestamp(struct evlist *evlist, - union perf_event *event, - u64 *timestamp); +int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); +int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp); bool evlist__valid_sample_type(struct evlist *evlist); bool evlist__valid_sample_id_all(struct evlist *evlist); diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index f8861998e5bd2..959e9890bccce 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -96,9 +96,9 @@ * | than PERF_RECORD_USER_TYPE_START) are handled by * | perf_session__process_user_event(see below) * | - Those generated by the kernel are handled by - * | perf_evlist__parse_sample_timestamp() + * | evlist__parse_sample_timestamp() * | - * perf_evlist__parse_sample_timestamp() + * evlist__parse_sample_timestamp() * | Extract time stamp from sample data. * | * perf_session__queue_event() diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5cc722b6fe7c0..61e4b3cc73135 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1523,9 +1523,8 @@ static int perf_session__deliver_event(struct perf_session *session, u64 file_offset) { struct perf_sample sample; - int ret; + int ret = evlist__parse_sample(session->evlist, event, &sample); - ret = perf_evlist__parse_sample(session->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); return ret; @@ -1697,7 +1696,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, out_parse_sample: if (sample && event->header.type < PERF_RECORD_USER_TYPE_START && - perf_evlist__parse_sample(session->evlist, event, sample)) + evlist__parse_sample(session->evlist, event, sample)) return -1; *event_ptr = event; @@ -1754,7 +1753,7 @@ static s64 perf_session__process_event(struct perf_session *session, if (tool->ordered_events) { u64 timestamp = -1ULL; - ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + ret = evlist__parse_sample_timestamp(evlist, event, ×tamp); if (ret && ret != -1) return ret; -- GitLab From b02736f776d5f50bb13ff85eb34efaed0c3f5ffa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:48:07 -0300 Subject: [PATCH 0323/1894] perf evlist: Use the right prefix for 'struct evlist' 'find' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 7 ++----- tools/perf/builtin-sched.c | 6 ++---- tools/perf/builtin-trace.c | 14 +++++--------- tools/perf/util/bpf-loader.c | 3 +-- tools/perf/util/evlist.c | 14 ++++---------- tools/perf/util/evlist.h | 11 +++-------- tools/perf/util/evswitch.c | 4 ++-- tools/perf/util/header.c | 5 ++--- 8 files changed, 21 insertions(+), 43 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a50dae2c4ae9e..0062445e8ead8 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1960,18 +1960,15 @@ int cmd_kmem(int argc, const char **argv) ret = -1; if (kmem_slab) { - if (!perf_evlist__find_tracepoint_by_name(session->evlist, - "kmem:kmalloc")) { + if (!evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) { pr_err(errmsg, "slab", "slab"); goto out_delete; } } if (kmem_page) { - struct evsel *evsel; + struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc"); - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "kmem:mm_page_alloc"); if (evsel == NULL) { pr_err(errmsg, "page", "page"); goto out_delete; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0e16f9d5a9471..69c769b04a61f 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3036,8 +3036,7 @@ static int perf_sched__timehist(struct perf_sched *sched) setup_pager(); /* prefer sched_waking if it is captured */ - if (perf_evlist__find_tracepoint_by_name(session->evlist, - "sched:sched_waking")) + if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) handlers[1].handler = timehist_sched_wakeup_ignore; /* setup per-evsel handlers */ @@ -3045,8 +3044,7 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; /* sched_switch event at a minimum needs to exist */ - if (!perf_evlist__find_tracepoint_by_name(session->evlist, - "sched:sched_switch")) { + if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) { pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); goto out; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5638eae36ec8a..6e47b19a1dcba 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4227,12 +4227,10 @@ static int trace__replay(struct trace *trace) if (err) goto out; - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "raw_syscalls:sys_enter"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter"); /* older kernels have syscalls tp versus raw_syscalls */ if (evsel == NULL) - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "syscalls:sys_enter"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter"); if (evsel && (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 || @@ -4241,11 +4239,9 @@ static int trace__replay(struct trace *trace) goto out; } - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "raw_syscalls:sys_exit"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit"); if (evsel == NULL) - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "syscalls:sys_exit"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); if (evsel && (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 || perf_evsel__init_sc_tp_uint_field(evsel, ret))) { @@ -4905,7 +4901,7 @@ int cmd_trace(int argc, const char **argv) if (evsel) { trace.syscalls.events.augmented = evsel; - evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter"); + evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter"); if (evsel == NULL) { pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n"); goto out; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 0374adcb223c7..9087f1bffd3d6 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1058,12 +1058,11 @@ __bpf_map__config_event(struct bpf_map *map, struct parse_events_term *term, struct evlist *evlist) { - struct evsel *evsel; const struct bpf_map_def *def; struct bpf_map_op *op; const char *map_name = bpf_map__name(map); + struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str); - evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str); if (!evsel) { pr_debug("Event (for '%s') '%s' doesn't exist\n", map_name, term->val.str); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 943262aa6505b..e6b2715ad31d1 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -191,13 +191,12 @@ void perf_evlist__splice_list_tail(struct evlist *evlist, int __evlist__set_tracepoints_handlers(struct evlist *evlist, const struct evsel_str_handler *assocs, size_t nr_assocs) { - struct evsel *evsel; size_t i; int err; for (i = 0; i < nr_assocs; i++) { // Adding a handler for an event not in this evlist, just ignore it. - evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name); + struct evsel *evsel = evlist__find_tracepoint_by_name(evlist, assocs[i].name); if (evsel == NULL) continue; @@ -294,8 +293,7 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a return evlist__add_attrs(evlist, attrs, nr_attrs); } -struct evsel * -perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) +struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) { struct evsel *evsel; @@ -308,9 +306,7 @@ perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) return NULL; } -struct evsel * -perf_evlist__find_tracepoint_by_name(struct evlist *evlist, - const char *name) +struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name) { struct evsel *evsel; @@ -1594,9 +1590,7 @@ void perf_evlist__set_tracking_event(struct evlist *evlist, tracking_evsel->tracking = true; } -struct evsel * -perf_evlist__find_evsel_by_str(struct evlist *evlist, - const char *str) +struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c59a08cf9f25a..aa9b3f9431e76 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -147,12 +147,8 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter); int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -struct evsel * -perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); - -struct evsel * -perf_evlist__find_tracepoint_by_name(struct evlist *evlist, - const char *name); +struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id); +struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int evlist__add_pollfd(struct evlist *evlist, int fd); int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); @@ -338,8 +334,7 @@ void evlist__cpu_iter_start(struct evlist *evlist); bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); -struct evsel * -perf_evlist__find_evsel_by_str(struct evlist *evlist, const char *str); +struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *perf_evlist__event2evsel(struct evlist *evlist, union perf_event *event); diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c index 3ba72f743d3c9..40cb56a9347da 100644 --- a/tools/perf/util/evswitch.c +++ b/tools/perf/util/evswitch.c @@ -41,7 +41,7 @@ static int evswitch__fprintf_enoent(FILE *fp, const char *evtype, const char *ev int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp) { if (evswitch->on_name) { - evswitch->on = perf_evlist__find_evsel_by_str(evlist, evswitch->on_name); + evswitch->on = evlist__find_evsel_by_str(evlist, evswitch->on_name); if (evswitch->on == NULL) { evswitch__fprintf_enoent(fp, "on", evswitch->on_name); return -ENOENT; @@ -50,7 +50,7 @@ int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp) } if (evswitch->off_name) { - evswitch->off = perf_evlist__find_evsel_by_str(evlist, evswitch->off_name); + evswitch->off = evlist__find_evsel_by_str(evlist, evswitch->off_name); if (evswitch->off == NULL) { evswitch__fprintf_enoent(fp, "off", evswitch->off_name); return -ENOENT; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b65c8f7ce36a0..64a3b83b30901 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2263,8 +2263,7 @@ static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused) return 0; } -static struct evsel * -perf_evlist__find_by_index(struct evlist *evlist, int idx) +static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx) { struct evsel *evsel; @@ -2285,7 +2284,7 @@ perf_evlist__set_event_name(struct evlist *evlist, if (!event->name) return; - evsel = perf_evlist__find_by_index(evlist, event->idx); + evsel = evlist__find_by_index(evlist, event->idx); if (!evsel) return; -- GitLab From fd643db5a8797dde0fe8d6f2fd01f36971d43fe0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:54:08 -0300 Subject: [PATCH 0324/1894] perf evlist: Ditch unused set/reset sample_bit methods Not used anymore, ditch them. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 18 ------------------ tools/perf/util/evlist.h | 11 ----------- 2 files changed, 29 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e6b2715ad31d1..86172ee394d0d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -953,24 +953,6 @@ out_delete_threads: return -1; } -void __perf_evlist__set_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) - __evsel__set_sample_bit(evsel, bit); -} - -void __perf_evlist__reset_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) - __evsel__reset_sample_bit(evsel, bit); -} - int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index aa9b3f9431e76..a1288b35717c8 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -127,17 +127,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist, #define evlist__set_tracepoints_handlers(evlist, array) \ __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) -void __perf_evlist__set_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit); -void __perf_evlist__reset_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit); - -#define perf_evlist__set_sample_bit(evlist, bit) \ - __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit) - -#define perf_evlist__reset_sample_bit(evlist, bit) \ - __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit) - int evlist__set_tp_filter(struct evlist *evlist, const char *filter); int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -- GitLab From 43d2479687c93ed9b93774ef9b46b37de5b3efcc Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Thu, 19 Nov 2020 19:43:02 +0800 Subject: [PATCH 0325/1894] clk: imx: scu: remove the calling of device_is_bound The device_is_bound() is invisible to drivers when built as modules. It's also not aimed to be used by drivers according to Greg K.H. Let's remove it from clk-scu driver and find another way to do proper driver loading sequence. Cc: Shawn Guo Cc: Sascha Hauer Cc: Stephen Boyd Fixes: 77d8f3068c63 ("clk: imx: scu: add two cells binding support") Reported-by: Sudip Mukherjee Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-scu.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index d10f60e48ece8..1f5518b7ab39e 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -153,7 +153,6 @@ static inline struct clk_scu *to_clk_scu(struct clk_hw *hw) int imx_clk_scu_init(struct device_node *np) { - struct platform_device *pd_dev; u32 clk_cells; int ret, i; @@ -166,17 +165,11 @@ int imx_clk_scu_init(struct device_node *np) if (clk_cells == 2) { for (i = 0; i < IMX_SC_R_LAST; i++) INIT_LIST_HEAD(&imx_scu_clks[i]); - /* - * Note: SCU clock driver depends on SCU power domain to be ready - * first. As there're no power domains under scu clock node in dts, - * we can't use PROBE_DEFER automatically. - */ + + /* pd_np will be used to attach power domains later */ pd_np = of_find_compatible_node(NULL, NULL, "fsl,scu-pd"); - pd_dev = of_find_device_by_node(pd_np); - if (!pd_dev || !device_is_bound(&pd_dev->dev)) { - of_node_put(pd_np); - return -EPROBE_DEFER; - } + if (!pd_np) + return -EINVAL; } return platform_driver_register(&imx_clk_scu_driver); -- GitLab From 4f1df628d4ec22b04f67e068e6d02538d3dd557b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 26 Nov 2020 17:27:13 +0000 Subject: [PATCH 0326/1894] KVM: arm64: Advertise ID_AA64PFR0_EL1.CSV3=1 if the CPUs are Meltdown-safe Cores that predate the introduction of ID_AA64PFR0_EL1.CSV3 to the ARMv8 architecture have this field set to 0, even of some of them are not affected by the vulnerability. The kernel maintains a list of unaffected cores (A53, A55 and a few others) so that it doesn't impose an expensive mitigation uncessarily. As we do for CSV2, let's expose the CSV3 property to guests that run on HW that is effectively not vulnerable. This can be reset to zero by writing to the ID register from userspace, ensuring that VMs can be migrated despite the new property being set. Reported-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 6 ++++-- arch/arm64/kvm/sys_regs.c | 16 +++++++++++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0cd9f0f75c135..147347028a208 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -120,6 +120,7 @@ struct kvm_arch { unsigned int pmuver; u8 pfr0_csv2; + u8 pfr0_csv3; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c0ffb019ca8be..dc3fa6a0f9e5c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -102,7 +102,7 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } -static void set_default_csv2(struct kvm *kvm) +static void set_default_spectre(struct kvm *kvm) { /* * The default is to expose CSV2 == 1 if the HW isn't affected. @@ -114,6 +114,8 @@ static void set_default_csv2(struct kvm *kvm) */ if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) kvm->arch.pfr0_csv2 = 1; + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv3 = 1; } /** @@ -141,7 +143,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); - set_default_csv2(kvm); + set_default_spectre(kvm); return ret; out_free_stage2_pgd: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c1fac9836af1a..6f653c0f60eca 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1122,6 +1122,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT); + val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT); + val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT); } else if (id == SYS_ID_AA64PFR1_EL1) { val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { @@ -1209,9 +1211,9 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, void __user *uaddr) { const u64 id = sys_reg_to_index(rd); + u8 csv2, csv3; int err; u64 val; - u8 csv2; err = reg_from_user(&val, uaddr, id); if (err) @@ -1227,13 +1229,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED)) return -EINVAL; - /* We can only differ with CSV2, and anything else is an error */ + /* Same thing for CSV3 */ + csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT); + if (csv3 > 1 || + (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) + return -EINVAL; + + /* We can only differ with CSV[23], and anything else is an error */ val ^= read_id_reg(vcpu, rd, false); - val &= ~(0xFUL << ID_AA64PFR0_CSV2_SHIFT); + val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) | + (0xFUL << ID_AA64PFR0_CSV3_SHIFT)); if (val) return -EINVAL; vcpu->kvm->arch.pfr0_csv2 = csv2; + vcpu->kvm->arch.pfr0_csv3 = csv3 ; return 0; } -- GitLab From 3ccf8a7b66b6bff69a7be62f2d5a2a61328ebe91 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:17:57 -0300 Subject: [PATCH 0327/1894] perf evlist: Use the right prefix for 'struct evlist' sample id lookup methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/auxtrace.c | 4 ++-- tools/perf/util/evlist.c | 23 ++++++++++------------- tools/perf/util/evlist.h | 10 ++++------ tools/perf/util/evsel.c | 2 +- tools/perf/util/header.c | 2 +- tools/perf/util/intel-pt.c | 3 +-- tools/perf/util/python.c | 2 +- tools/perf/util/s390-cpumsf.c | 2 +- tools/perf/util/s390-sample-raw.c | 2 +- tools/perf/util/session.c | 6 +++--- tools/perf/util/sideband_evlist.c | 2 +- tools/perf/util/stat.c | 2 +- tools/perf/util/synthetic-events.c | 2 +- 18 files changed, 33 insertions(+), 39 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 983c101965ab6..1c322c1291855 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2224,7 +2224,7 @@ static int print_event_with_time(struct perf_tool *tool, { struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + struct evsel *evsel = evlist__id2evsel(session->evlist, sample->id); struct thread *thread = NULL; if (evsel && !evsel->core.attr.sample_id_all) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fff1d5ef5c493..88bd1ac5c8cf6 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1159,7 +1159,7 @@ static int deliver_event(struct ordered_events *qe, goto next_event; } - evsel = perf_evlist__id2evsel(session->evlist, sample.id); + evsel = evlist__id2evsel(session->evlist, sample.id); assert(evsel != NULL); if (event->header.type == PERF_RECORD_SAMPLE) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6e47b19a1dcba..a463ce8fb8e09 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3105,7 +3105,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st return; } - evsel = perf_evlist__id2evsel(trace->evlist, sample->id); + evsel = evlist__id2evsel(trace->evlist, sample->id); if (evsel == NULL) { fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); return; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index d826153adbf84..c01d7e12d2419 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -133,7 +133,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } err = -1; - evsel = perf_evlist__id2evsel(evlist, sample.id); + evsel = evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { pr_debug("event with id %" PRIu64 " doesn't map to an evsel\n", sample.id); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index b8cdbe6f324f2..3c0bc3f1fd0e1 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -133,7 +133,7 @@ static int process_sample_event(struct evlist *evlist, return -1; } - evsel = perf_evlist__id2evsel(evlist, sample.id); + evsel = evlist__id2evsel(evlist, sample.id); if (evsel == switch_tracking->switch_evsel) { next_tid = evsel__intval(evsel, &sample, "next_pid"); prev_tid = evsel__intval(evsel, &sample, "prev_pid"); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 647afdcb26995..b538b9093567c 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1017,7 +1017,7 @@ struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *que if (!id) return NULL; - sid = perf_evlist__id2sid(session->evlist, id); + sid = evlist__id2sid(session->evlist, id); if (!sid) return NULL; @@ -1047,7 +1047,7 @@ int auxtrace_queues__add_sample(struct auxtrace_queues *queues, if (!id) return -EINVAL; - sid = perf_evlist__id2sid(session->evlist, id); + sid = evlist__id2sid(session->evlist, id); if (!sid) return -ENOENT; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 86172ee394d0d..51775ff2979f2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -509,7 +509,7 @@ int evlist__poll(struct evlist *evlist, int timeout) return perf_evlist__poll(&evlist->core, timeout); } -struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) +struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id) { struct hlist_head *head; struct perf_sample_id *sid; @@ -525,14 +525,14 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) return NULL; } -struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) +struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id) { struct perf_sample_id *sid; if (evlist->core.nr_entries == 1 || !id) return evlist__first(evlist); - sid = perf_evlist__id2sid(evlist, id); + sid = evlist__id2sid(evlist, id); if (sid) return container_of(sid->evsel, struct evsel, core); @@ -542,23 +542,21 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) return NULL; } -struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, - u64 id) +struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id) { struct perf_sample_id *sid; if (!id) return NULL; - sid = perf_evlist__id2sid(evlist, id); + sid = evlist__id2sid(evlist, id); if (sid) return container_of(sid->evsel, struct evsel, core); return NULL; } -static int perf_evlist__event2id(struct evlist *evlist, - union perf_event *event, u64 *id) +static int evlist__event2id(struct evlist *evlist, union perf_event *event, u64 *id) { const __u64 *array = event->sample.array; ssize_t n; @@ -578,8 +576,7 @@ static int perf_evlist__event2id(struct evlist *evlist, return 0; } -struct evsel *perf_evlist__event2evsel(struct evlist *evlist, - union perf_event *event) +struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event) { struct evsel *first = evlist__first(evlist); struct hlist_head *head; @@ -594,7 +591,7 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist, event->header.type != PERF_RECORD_SAMPLE) return first; - if (perf_evlist__event2id(evlist, event, &id)) + if (evlist__event2id(evlist, event, &id)) return NULL; /* Synthesized events have an id of zero */ @@ -1427,7 +1424,7 @@ int evlist__start_workload(struct evlist *evlist) int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + struct evsel *evsel = evlist__event2evsel(evlist, event); if (!evsel) return -EFAULT; @@ -1436,7 +1433,7 @@ int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + struct evsel *evsel = evlist__event2evsel(evlist, event); if (!evsel) return -EFAULT; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a1288b35717c8..90f1127fecf68 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -144,11 +144,10 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); int evlist__poll(struct evlist *evlist, int timeout); -struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id); -struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, - u64 id); +struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id); +struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id); -struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id); +struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id); void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); @@ -325,8 +324,7 @@ bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); -struct evsel *perf_evlist__event2evsel(struct evlist *evlist, - union perf_event *event); +struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); bool perf_evlist__exclude_kernel(struct evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9005cc974f43f..3dd0eae9810db 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1469,7 +1469,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, for (i = 1; i < nr; i++) { struct evsel *counter; - counter = perf_evlist__id2evsel(leader->evlist, v[i].id); + counter = evlist__id2evsel(leader->evlist, v[i].id); if (!counter) return -EINVAL; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 64a3b83b30901..be219051119ce 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4030,7 +4030,7 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, evlist = *pevlist; - evsel = perf_evlist__id2evsel(evlist, ev->id); + evsel = evlist__id2evsel(evlist, ev->id); if (evsel == NULL) return -EINVAL; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3a0348caec7d6..60214de42f31b 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2520,11 +2520,10 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample) { - struct evsel *evsel; pid_t tid; int cpu, ret; + struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id); - evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); if (evsel != pt->switch_evsel) return 0; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3d3b465148630..cc5ade85a33fc 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1055,7 +1055,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (pyevent == NULL) return PyErr_NoMemory(); - evsel = perf_evlist__event2evsel(evlist, event); + evsel = evlist__event2evsel(evlist, event); if (!evsel) { Py_INCREF(Py_None); return Py_None; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 959e9890bccce..078a717735654 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -932,7 +932,7 @@ s390_cpumsf_process_event(struct perf_session *session, if (event->header.type == PERF_RECORD_SAMPLE && sample->raw_size) { /* Handle event with raw data */ - ev_bc000 = perf_evlist__event2evsel(session->evlist, event); + ev_bc000 = evlist__event2evsel(session->evlist, event); if (ev_bc000 && ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) err = s390_cpumcf_dumpctr(sf, sample); diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 05b43ab4eeefc..d177e61798395 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -205,7 +205,7 @@ void perf_evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event if (event->header.type != PERF_RECORD_SAMPLE) return; - ev_bc000 = perf_evlist__event2evsel(evlist, event); + ev_bc000 = evlist__event2evsel(evlist, event); if (ev_bc000 == NULL || ev_bc000->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) return; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 61e4b3cc73135..ce381b3dca066 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1364,7 +1364,7 @@ static int deliver_sample_value(struct evlist *evlist, struct sample_read_value *v, struct machine *machine) { - struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); + struct perf_sample_id *sid = evlist__id2sid(evlist, v->id); struct evsel *evsel; if (sid) { @@ -1445,7 +1445,7 @@ static int machines__deliver_event(struct machines *machines, dump_event(evlist, event, file_offset, sample); - evsel = perf_evlist__id2evsel(evlist, sample->id); + evsel = evlist__id2evsel(evlist, sample->id); machine = machines__find_for_cpumode(machines, event, sample); @@ -2476,7 +2476,7 @@ int perf_event__process_id_index(struct perf_session *session, fprintf(stdout, " tid: %"PRI_ld64"\n", e->tid); } - sid = perf_evlist__id2sid(evlist, e->id); + sid = evlist__id2sid(evlist, e->id); if (!sid) return -ENOENT; sid->idx = e->idx; diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 90ed016bb3489..9c04c71dbf51f 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -62,7 +62,7 @@ static void *perf_evlist__poll_thread(void *arg) if (perf_mmap__read_init(&map->core)) continue; while ((event = perf_mmap__read_event(&map->core)) != NULL) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + struct evsel *evsel = evlist__event2evsel(evlist, event); if (evsel && evsel->side_band.cb) evsel->side_band.cb(event, evsel->side_band.data); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8be9c3da9e564..1e125e39ff84e 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -458,7 +458,7 @@ int perf_event__process_stat_event(struct perf_session *session, count.ena = st->ena; count.run = st->run; - counter = perf_evlist__id2evsel(session->evlist, st->id); + counter = evlist__id2evsel(session->evlist, st->id); if (!counter) { pr_err("Failed to resolve counter for stat event.\n"); return -EINVAL; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index d9c624377da73..297987c6960b5 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1643,7 +1643,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ e->id = evsel->core.id[j]; - sid = perf_evlist__id2sid(evlist, e->id); + sid = evlist__id2sid(evlist, e->id); if (!sid) { free(ev); return -ENOENT; -- GitLab From f4bd0b4a9b21c609ede28cee2dcd16824c0489a8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:23:35 -0300 Subject: [PATCH 0328/1894] perf evlist: Use the right prefix for 'struct evlist' browser methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 19 +++++++------------ tools/perf/builtin-top.c | 9 +++------ tools/perf/ui/browsers/hists.c | 25 ++++++++----------------- tools/perf/ui/gtk/gtk.h | 5 ++--- tools/perf/ui/gtk/hists.c | 6 ++---- tools/perf/util/hist.h | 23 ++++++++++------------- 6 files changed, 32 insertions(+), 55 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3c74c9c0f3c38..11f13aafde440 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -493,8 +493,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report return ret + fprintf(fp, "\n#\n"); } -static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, - struct report *rep) +static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report *rep) { struct evsel *pos; int i = 0, ret; @@ -511,9 +510,7 @@ static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, return 0; } -static int perf_evlist__tty_browse_hists(struct evlist *evlist, - struct report *rep, - const char *help) +static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, const char *help) { struct evsel *pos; int i = 0; @@ -595,7 +592,7 @@ static int report__gtk_browse_hists(struct report *rep, const char *help) int (*hist_browser)(struct evlist *evlist, const char *help, struct hist_browser_timer *timer, float min_pcnt); - hist_browser = dlsym(perf_gtk_handle, "perf_evlist__gtk_browse_hists"); + hist_browser = dlsym(perf_gtk_handle, "evlist__gtk_browse_hists"); if (hist_browser == NULL) { ui__error("GTK browser not found!\n"); @@ -622,14 +619,12 @@ static int report__browse_hists(struct report *rep) switch (use_browser) { case 1: if (rep->total_cycles_mode) { - ret = perf_evlist__tui_block_hists_browse(evlist, rep); + ret = evlist__tui_block_hists_browse(evlist, rep); break; } - ret = perf_evlist__tui_browse_hists(evlist, help, NULL, - rep->min_percent, - &session->header.env, - true, &rep->annotation_opts); + ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, + &session->header.env, true, &rep->annotation_opts); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -641,7 +636,7 @@ static int report__browse_hists(struct report *rep) ret = report__gtk_browse_hists(rep, help); break; default: - ret = perf_evlist__tty_browse_hists(evlist, rep, help); + ret = evlist__tty_browse_hists(evlist, rep, help); break; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 88bd1ac5c8cf6..5601a35a5ba42 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -641,12 +641,9 @@ repeat: hists->uid_filter_str = top->record_opts.target.uid_str; } - ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt, - top->min_percent, - &top->session->header.env, - !top->record_opts.overwrite, - &top->annotation_opts); - + ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, + &top->session->header.env, !top->record_opts.overwrite, + &top->annotation_opts); if (ret == K_RELOAD) { top->zero = true; goto repeat; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 48814efb98e8c..2f3ce505eec5b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3559,13 +3559,9 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, return false; } -static int __perf_evlist__tui_browse_hists(struct evlist *evlist, - int nr_entries, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_opts) +static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help, + struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, + bool warn_lost_event, struct annotation_options *annotation_opts) { struct evsel *pos; struct evsel_menu menu = { @@ -3614,12 +3610,9 @@ static bool perf_evlist__single_entry(struct evlist *evlist) return false; } -int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_opts) +int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, + float min_pcnt, struct perf_env *env, bool warn_lost_event, + struct annotation_options *annotation_opts) { int nr_entries = evlist->core.nr_entries; @@ -3645,10 +3638,8 @@ single_entry: { goto single_entry; } - return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, - hbt, min_pcnt, env, - warn_lost_event, - annotation_opts); + return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, + warn_lost_event, annotation_opts); } static int block_hists_browser__title(struct hist_browser *browser, char *bf, diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index a9563932fa04e..a2b497f03fd6e 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -57,9 +57,8 @@ struct evlist; struct hist_entry; struct hist_browser_timer; -int perf_evlist__gtk_browse_hists(struct evlist *evlist, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt); +int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, + struct hist_browser_timer *hbt, float min_pcnt); int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, struct hist_browser_timer *hbt); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 53ef71a1b15dc..c83be2d57f7eb 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -590,10 +590,8 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, gtk_container_add(GTK_CONTAINER(window), view); } -int perf_evlist__gtk_browse_hists(struct evlist *evlist, - const char *help, - struct hist_browser_timer *hbt __maybe_unused, - float min_pcnt) +int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, + struct hist_browser_timer *hbt __maybe_unused, float min_pcnt) { struct evsel *pos; GtkWidget *vbox; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 96b1c13bbccc5..278b49e915b07 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -464,12 +464,9 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, struct hist_browser_timer *hbt, struct annotation_options *annotation_opts); -int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_options); +int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, + float min_pcnt, struct perf_env *env, bool warn_lost_event, + struct annotation_options *annotation_options); int script_browse(const char *script_opt, struct evsel *evsel); @@ -483,13 +480,13 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, struct annotation_options *annotation_opts); #else static inline -int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, - const char *help __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - float min_pcnt __maybe_unused, - struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) +int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, + const char *help __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused, + float min_pcnt __maybe_unused, + struct perf_env *env __maybe_unused, + bool warn_lost_event __maybe_unused, + struct annotation_options *annotation_options __maybe_unused) { return 0; } -- GitLab From e80db255525a014a78af414b346413142e9142da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:39:41 -0300 Subject: [PATCH 0329/1894] perf evlist: Use the right prefix for 'struct evlist' tracking event methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/builtin-record.c | 4 ++-- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/evlist.c | 5 ++--- tools/perf/util/evlist.h | 5 ++--- 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index cad7bf7834138..bc0afeb903c26 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -420,7 +420,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, goto out; tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index e3593063b3d17..beccf1b36654e 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -130,7 +130,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, return err; tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 0dc09b5809c12..66ca98df48b0f 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -238,7 +238,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 082e5f2a415a3..f1b5380a74013 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -865,7 +865,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 04946bd3d090f..2d74a410d008f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -891,13 +891,13 @@ static int record__open(struct record *rec) * event synthesis. */ if (opts->initial_delay || target__has_cpu(&opts->target)) { - pos = perf_evlist__get_tracking_event(evlist); + pos = evlist__get_tracking_event(evlist); if (!evsel__is_dummy_event(pos)) { /* Set up dummy event. */ if (evlist__add_dummy(evlist)) return -ENOMEM; pos = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, pos); + evlist__set_tracking_event(evlist, pos); } /* diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 3c0bc3f1fd0e1..8bf484a0f35d6 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -424,7 +424,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 51775ff2979f2..b5a53e118564e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1541,7 +1541,7 @@ void perf_evlist__to_front(struct evlist *evlist, list_splice(&move, &evlist->core.entries); } -struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist) +struct evsel *evlist__get_tracking_event(struct evlist *evlist) { struct evsel *evsel; @@ -1553,8 +1553,7 @@ struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist) return evlist__first(evlist); } -void perf_evlist__set_tracking_event(struct evlist *evlist, - struct evsel *tracking_evsel) +void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 90f1127fecf68..14bf7e2f9c0ef 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -314,9 +314,8 @@ void perf_evlist__to_front(struct evlist *evlist, evlist__cpu_iter_start(evlist); \ perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) -struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist); -void perf_evlist__set_tracking_event(struct evlist *evlist, - struct evsel *tracking_evsel); +struct evsel *evlist__get_tracking_event(struct evlist *evlist); +void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); void evlist__cpu_iter_start(struct evlist *evlist); bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); -- GitLab From 2a99ff822dfa4a88d54b2c4f17d33748bcedd899 Mon Sep 17 00:00:00 2001 From: Alexandre Truong Date: Fri, 27 Nov 2020 15:39:23 +0000 Subject: [PATCH 0330/1894] perf tools: Add aarch64 registers to --user-regs Previously, this command returns no help message on aarch64: -> ./perf record --user-regs=? available registers: Usage: perf record [] [] or: perf record [] -- [] With this change, the registers are listed. -> ./perf record --user-regs=? available registers: x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 lr sp pc It's also now possible to record subsets of registers on aarch64: -> ./perf record --user-regs=x4,x5 ls -> ./perf report --dump-raw-trace 12801163749305260 0xc70 [0x40]: PERF_RECORD_SAMPLE(IP, 0x2): 51956/51956: 0xffffaa6571f0 period: 145785 addr: 0 ... user regs: mask 0x30 ABI 64-bit .... x4 0x000000000000006c .... x5 0x0000001001000001 ... thread: ls:51956 ...... dso: /usr/lib64/ld-2.17.so Signed-off-by: Alexandre Truong Tested-by: James Clark Acked-by: John Garry Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20201127153923.26717-1-alexandre.truong@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/perf_regs.c | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2833e101a7c64..54efa12fdbea3 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -2,5 +2,38 @@ #include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { + SMPL_REG(x0, PERF_REG_ARM64_X0), + SMPL_REG(x1, PERF_REG_ARM64_X1), + SMPL_REG(x2, PERF_REG_ARM64_X2), + SMPL_REG(x3, PERF_REG_ARM64_X3), + SMPL_REG(x4, PERF_REG_ARM64_X4), + SMPL_REG(x5, PERF_REG_ARM64_X5), + SMPL_REG(x6, PERF_REG_ARM64_X6), + SMPL_REG(x7, PERF_REG_ARM64_X7), + SMPL_REG(x8, PERF_REG_ARM64_X8), + SMPL_REG(x9, PERF_REG_ARM64_X9), + SMPL_REG(x10, PERF_REG_ARM64_X10), + SMPL_REG(x11, PERF_REG_ARM64_X11), + SMPL_REG(x12, PERF_REG_ARM64_X12), + SMPL_REG(x13, PERF_REG_ARM64_X13), + SMPL_REG(x14, PERF_REG_ARM64_X14), + SMPL_REG(x15, PERF_REG_ARM64_X15), + SMPL_REG(x16, PERF_REG_ARM64_X16), + SMPL_REG(x17, PERF_REG_ARM64_X17), + SMPL_REG(x18, PERF_REG_ARM64_X18), + SMPL_REG(x19, PERF_REG_ARM64_X19), + SMPL_REG(x20, PERF_REG_ARM64_X20), + SMPL_REG(x21, PERF_REG_ARM64_X21), + SMPL_REG(x22, PERF_REG_ARM64_X22), + SMPL_REG(x23, PERF_REG_ARM64_X23), + SMPL_REG(x24, PERF_REG_ARM64_X24), + SMPL_REG(x25, PERF_REG_ARM64_X25), + SMPL_REG(x26, PERF_REG_ARM64_X26), + SMPL_REG(x27, PERF_REG_ARM64_X27), + SMPL_REG(x28, PERF_REG_ARM64_X28), + SMPL_REG(x29, PERF_REG_ARM64_X29), + SMPL_REG(lr, PERF_REG_ARM64_LR), + SMPL_REG(sp, PERF_REG_ARM64_SP), + SMPL_REG(pc, PERF_REG_ARM64_PC), SMPL_REG_END }; -- GitLab From 0a7e7ec90e601d98cc5914626b78fd043598b85b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:44:40 -0300 Subject: [PATCH 0331/1894] perf evlist: Use the right prefix for 'struct evlist' id_pos methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 12 ++++++------ tools/perf/util/evlist.h | 2 +- tools/perf/util/record.c | 2 +- tools/perf/util/sideband_evlist.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b5a53e118564e..26c436abac77d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -103,13 +103,13 @@ struct evlist *perf_evlist__new_dummy(void) } /** - * perf_evlist__set_id_pos - set the positions of event ids. + * evlist__set_id_pos - set the positions of event ids. * @evlist: selected event list * * Events with compatible sample types all have the same id_pos * and is_pos. For convenience, put a copy on evlist. */ -void perf_evlist__set_id_pos(struct evlist *evlist) +void evlist__set_id_pos(struct evlist *evlist) { struct evsel *first = evlist__first(evlist); @@ -117,14 +117,14 @@ void perf_evlist__set_id_pos(struct evlist *evlist) evlist->is_pos = first->is_pos; } -static void perf_evlist__update_id_pos(struct evlist *evlist) +static void evlist__update_id_pos(struct evlist *evlist) { struct evsel *evsel; evlist__for_each_entry(evlist, evsel) evsel__calc_id_pos(evsel); - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } static void evlist__purge(struct evlist *evlist) @@ -168,7 +168,7 @@ void evlist__add(struct evlist *evlist, struct evsel *entry) perf_evlist__add(&evlist->core, &entry->core); if (evlist->core.nr_entries == 1) - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } void evlist__remove(struct evlist *evlist, struct evsel *evsel) @@ -1275,7 +1275,7 @@ int evlist__open(struct evlist *evlist) goto out_err; } - perf_evlist__update_id_pos(evlist); + evlist__update_id_pos(evlist); evlist__for_each_entry(evlist, evsel) { err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 14bf7e2f9c0ef..85da0c443ae61 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -158,7 +158,7 @@ void evlist__close(struct evlist *evlist); struct callchain_param; -void perf_evlist__set_id_pos(struct evlist *evlist); +void evlist__set_id_pos(struct evlist *evlist); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 7330407db4ba1..220b9910427bb 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -144,7 +144,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, evsel__set_sample_id(evsel, use_sample_identifier); } - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } static int get_max_rate(unsigned int *rate) diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 9c04c71dbf51f..13f387b764565 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -110,7 +110,7 @@ int evlist__start_sb_thread(struct evlist *evlist, struct target *target) evlist__for_each_entry(evlist, counter) evsel__set_sample_id(counter, can_sample_identifier); - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } evlist__for_each_entry(evlist, counter) { -- GitLab From 37b01abe2a63db1b6a69af32257cb50795c725f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:47:05 -0300 Subject: [PATCH 0332/1894] perf evlist: Use the right prefix for 'struct evlist' enable event methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 3 +-- tools/perf/util/evlist.c | 16 ++++++---------- tools/perf/util/evlist.h | 3 +-- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index b538b9093567c..8c94e21db2aa7 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -658,8 +658,7 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) if (evsel->core.attr.type == itr->pmu->type) { if (evsel->disabled) return 0; - return perf_evlist__enable_event_idx(itr->evlist, evsel, - idx); + return evlist__enable_event_idx(itr->evlist, evsel, idx); } } return -EINVAL; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 26c436abac77d..1c7b6d4a76bea 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -448,8 +448,7 @@ void evlist__toggle_enable(struct evlist *evlist) (evlist->enabled ? evlist__disable : evlist__enable)(evlist); } -static int perf_evlist__enable_event_cpu(struct evlist *evlist, - struct evsel *evsel, int cpu) +static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu) { int thread; int nr_threads = perf_evlist__nr_threads(evlist, evsel); @@ -465,9 +464,7 @@ static int perf_evlist__enable_event_cpu(struct evlist *evlist, return 0; } -static int perf_evlist__enable_event_thread(struct evlist *evlist, - struct evsel *evsel, - int thread) +static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread) { int cpu; int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); @@ -483,15 +480,14 @@ static int perf_evlist__enable_event_thread(struct evlist *evlist, return 0; } -int perf_evlist__enable_event_idx(struct evlist *evlist, - struct evsel *evsel, int idx) +int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) { bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus); if (per_cpu_mmaps) - return perf_evlist__enable_event_cpu(evlist, evsel, idx); - else - return perf_evlist__enable_event_thread(evlist, evsel, idx); + return evlist__enable_event_cpu(evlist, evsel, idx); + + return evlist__enable_event_thread(evlist, evsel, idx); } int evlist__add_pollfd(struct evlist *evlist, int fd) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 85da0c443ae61..f563e546b0553 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -190,8 +190,7 @@ void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); void evlist__toggle_enable(struct evlist *evlist); -int perf_evlist__enable_event_idx(struct evlist *evlist, - struct evsel *evsel, int idx); +int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); void perf_evlist__set_selected(struct evlist *evlist, struct evsel *evsel); -- GitLab From 0a60b339475970213f2685d0da55a26d5f4f22f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:49:05 -0300 Subject: [PATCH 0333/1894] perf evlist: Use the right prefix for 'struct evlist' pause/resume methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1c7b6d4a76bea..3ca211e533bac 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -604,7 +604,7 @@ struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event return NULL; } -static int perf_evlist__set_paused(struct evlist *evlist, bool value) +static int evlist__set_paused(struct evlist *evlist, bool value) { int i; @@ -624,14 +624,14 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value) return 0; } -static int perf_evlist__pause(struct evlist *evlist) +static int evlist__pause(struct evlist *evlist) { - return perf_evlist__set_paused(evlist, true); + return evlist__set_paused(evlist, true); } -static int perf_evlist__resume(struct evlist *evlist) +static int evlist__resume(struct evlist *evlist) { - return perf_evlist__set_paused(evlist, false); + return evlist__set_paused(evlist, false); } static void evlist__munmap_nofree(struct evlist *evlist) @@ -1621,10 +1621,10 @@ void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state) switch (action) { case PAUSE: - perf_evlist__pause(evlist); + evlist__pause(evlist); break; case RESUME: - perf_evlist__resume(evlist); + evlist__resume(evlist); break; case NONE: default: -- GitLab From e414fd1a3f709984a03f0fa287e39df6a7218e22 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:52:44 -0300 Subject: [PATCH 0334/1894] perf evlist: Use the right prefix for 'struct evlist' evsel list methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/expand-cgroup.c | 4 ++-- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/ui/browsers/hists.c | 4 ++-- tools/perf/util/cgroup.c | 4 ++-- tools/perf/util/evlist.c | 8 +++----- tools/perf/util/evlist.h | 8 +++----- tools/perf/util/parse-events.c | 2 +- tools/perf/util/sort.c | 2 +- 13 files changed, 20 insertions(+), 24 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index bc0afeb903c26..bd446aba64f72 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -395,7 +395,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace * event must come first. */ - perf_evlist__to_front(evlist, cs_etm_evsel); + evlist__to_front(evlist, cs_etm_evsel); /* * In the case of per-cpu mmaps, we need the CPU on the diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index beccf1b36654e..414c8a5584b13 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -118,7 +118,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace event * must come first. */ - perf_evlist__to_front(evlist, arm_spe_evsel); + evlist__to_front(evlist, arm_spe_evsel); evsel__set_sample_bit(arm_spe_evsel, CPU); evsel__set_sample_bit(arm_spe_evsel, TIME); diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 66ca98df48b0f..4a76d49d25d60 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -218,7 +218,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace event * must come first. */ - perf_evlist__to_front(evlist, intel_bts_evsel); + evlist__to_front(evlist, intel_bts_evsel); /* * In the case of per-cpu mmaps, we need the CPU on the * AUX event. diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index f1b5380a74013..cb9c0c85dc8fc 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -846,7 +846,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace * event must come first. */ - perf_evlist__to_front(evlist, intel_pt_evsel); + evlist__to_front(evlist, intel_pt_evsel); /* * In the case of per-cpu mmaps, we need the CPU on the * AUX event. diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index d880c588a951f..7ed284a00ea28 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -157,7 +157,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), goto out_delete_evlist; } - perf_evlist__splice_list_tail(evlist, &parse_state.list); + evlist__splice_list_tail(evlist, &parse_state.list); evlist->nr_groups = parse_state.nr_groups; perf_evlist__config(evlist, &opts, NULL); diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 4c59f3ae438fc..e8187f37fe1e7 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -26,7 +26,7 @@ static int test_expand_events(struct evlist *evlist, char **ev_name; struct evsel *evsel; - TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist)); + TEST_ASSERT_VAL("evlist is empty", !evlist__empty(evlist)); nr_events = evlist->core.nr_entries; ev_name = calloc(nr_events, sizeof(*ev_name)); @@ -161,7 +161,7 @@ static int expand_libpfm_events(void) event_str, ret); goto out; } - if (perf_evlist__empty(evlist)) { + if (evlist__empty(evlist)) { pr_debug("libpfm was not enabled\n"); goto out; } diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 8bf484a0f35d6..e07fe84bb3040 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -406,7 +406,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ pr_debug("cycles event already at front"); goto out_err; } - perf_evlist__to_front(evlist, cycles_evsel); + evlist__to_front(evlist, cycles_evsel); if (cycles_evsel != evlist__first(evlist)) { pr_debug("Failed to move cycles event to front"); goto out_err; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2f3ce505eec5b..766be504abca4 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3593,7 +3593,7 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con hbt, warn_lost_event); } -static bool perf_evlist__single_entry(struct evlist *evlist) +static bool evlist__single_entry(struct evlist *evlist) { int nr_entries = evlist->core.nr_entries; @@ -3616,7 +3616,7 @@ int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct his { int nr_entries = evlist->core.nr_entries; - if (perf_evlist__single_entry(evlist)) { + if (evlist__single_entry(evlist)) { single_entry: { struct evsel *first = evlist__first(evlist); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 704333748549f..5dff7e489921b 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -371,7 +371,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, } /* save original events and init evlist */ - perf_evlist__splice_list_tail(orig_list, &evlist->core.entries); + evlist__splice_list_tail(orig_list, &evlist->core.entries); evlist->core.nr_entries = 0; if (metric_events) { @@ -430,7 +430,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, goto out_err; } - perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); + evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3ca211e533bac..28b4d347d4ba3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -177,8 +177,7 @@ void evlist__remove(struct evlist *evlist, struct evsel *evsel) perf_evlist__remove(&evlist->core, &evsel->core); } -void perf_evlist__splice_list_tail(struct evlist *evlist, - struct list_head *list) +void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list) { struct evsel *evsel, *temp; @@ -273,7 +272,7 @@ static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attr list_add_tail(&evsel->core.node, &head); } - perf_evlist__splice_list_tail(evlist, &head); + evlist__splice_list_tail(evlist, &head); return 0; @@ -1520,8 +1519,7 @@ int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size return 0; } -void perf_evlist__to_front(struct evlist *evlist, - struct evsel *move_evsel) +void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel) { struct evsel *evsel, *n; LIST_HEAD(move); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f563e546b0553..8d3fdeab2f22c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -214,10 +214,9 @@ bool evlist__valid_sample_type(struct evlist *evlist); bool evlist__valid_sample_id_all(struct evlist *evlist); bool perf_evlist__valid_read_format(struct evlist *evlist); -void perf_evlist__splice_list_tail(struct evlist *evlist, - struct list_head *list); +void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list); -static inline bool perf_evlist__empty(struct evlist *evlist) +static inline bool evlist__empty(struct evlist *evlist) { return list_empty(&evlist->core.entries); } @@ -240,8 +239,7 @@ int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); bool perf_evlist__can_select_event(struct evlist *evlist, const char *str); -void perf_evlist__to_front(struct evlist *evlist, - struct evsel *move_evsel); +void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); /** * __evlist__for_each_entry - iterate thru all the evsels diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8fa87f60133ff..42c84adeb2fbe 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2183,7 +2183,7 @@ int __parse_events(struct evlist *evlist, const char *str, /* * Add list to the evlist even with errors to allow callers to clean up. */ - perf_evlist__splice_list_tail(evlist, &parse_state.list); + evlist__splice_list_tail(evlist, &parse_state.list); if (!ret) { struct evsel *last; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d42339df20f8d..7d87bfcffb3f6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2756,7 +2756,7 @@ static const char *get_default_sort_order(struct evlist *evlist) BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders)); - if (evlist == NULL || perf_evlist__empty(evlist)) + if (evlist == NULL || evlist__empty(evlist)) goto out_no_evlist; evlist__for_each_entry(evlist, evsel) { -- GitLab From 712737241980476a277a4108e3121240a29de968 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:55:12 -0300 Subject: [PATCH 0335/1894] perf evlist: Use the right prefix for 'struct evlist' print methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 3 +-- tools/perf/util/hist.c | 2 +- tools/perf/util/hist.h | 2 +- tools/perf/util/session.c | 6 ++---- tools/perf/util/stat-display.c | 8 ++------ tools/perf/util/stat.h | 8 ++------ 8 files changed, 11 insertions(+), 22 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4940d10074c3e..d6a81a6e1041c 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -412,7 +412,7 @@ static int __cmd_annotate(struct perf_annotate *ann) if (dump_trace) { perf_session__fprintf_nr_events(session, stdout); - perf_evlist__fprintf_nr_events(session->evlist, stdout); + evlist__fprintf_nr_events(session->evlist, stdout); goto out; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 11f13aafde440..0a335bef64d0c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -928,7 +928,7 @@ static int __cmd_report(struct report *rep) if (dump_trace) { perf_session__fprintf_nr_events(session, stdout); - perf_evlist__fprintf_nr_events(session->evlist, stdout); + evlist__fprintf_nr_events(session->evlist, stdout); return 0; } } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 29f7d4752e848..d69378f135e6b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -974,8 +974,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (stat_config.quiet) return; - perf_evlist__print_counters(evsel_list, &stat_config, &target, - ts, argc, argv); + evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); } static volatile int signr = -1; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 8a793e4c9400a..7feeaa07c7776 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2654,7 +2654,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, } } -size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) { struct evsel *pos; size_t ret = 0; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 278b49e915b07..df6c6eea09605 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -196,7 +196,7 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, bool ignore_callchains); -size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ce381b3dca066..6707a01b7ef8d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1191,9 +1191,7 @@ static void stack_user__printf(struct stack_dump *dump) dump->size, dump->offset); } -static void perf_evlist__print_tstamp(struct evlist *evlist, - union perf_event *event, - struct perf_sample *sample) +static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { u64 sample_type = __evlist__combined_sample_type(evlist); @@ -1254,7 +1252,7 @@ static void dump_event(struct evlist *evlist, union perf_event *event, evlist->trace_event_sample_raw(evlist, event, sample); if (sample) - perf_evlist__print_tstamp(evlist, event, sample); + evlist__print_tstamp(evlist, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, event->header.size, perf_event__name(event->header.type)); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a963b5b8eb724..fee7543843a8d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1184,12 +1184,8 @@ static void print_percore(struct perf_stat_config *config, fputc('\n', output); } -void -perf_evlist__print_counters(struct evlist *evlist, - struct perf_stat_config *config, - struct target *_target, - struct timespec *ts, - int argc, const char **argv) +void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, + struct target *_target, struct timespec *ts, int argc, const char **argv) { bool metric_only = config->metric_only; int interval = config->interval; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 4cba4b106e45c..9979b4b100f21 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -238,12 +238,8 @@ int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, int cpu); -void -perf_evlist__print_counters(struct evlist *evlist, - struct perf_stat_config *config, - struct target *_target, - struct timespec *ts, - int argc, const char **argv); +void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, + struct target *_target, struct timespec *ts, int argc, const char **argv); struct metric_expr; double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); -- GitLab From 7748bb7175ccad5ee29e7355134b0061d8edf3d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:56:52 -0300 Subject: [PATCH 0336/1894] perf evlist: Use the right prefix for 'struct evlist' create maps methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/event-times.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 4 ++-- tools/perf/tests/perf-record.c | 2 +- tools/perf/util/evlist.c | 6 +++--- tools/perf/util/evlist.h | 2 +- tools/perf/util/sideband_evlist.c | 2 +- 14 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index bf4d8e9ab8f5c..d49448a1060c9 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -957,7 +957,7 @@ int cmd_ftrace(int argc, const char **argv) goto out_delete_filters; } - ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target); + ret = evlist__create_maps(ftrace.evlist, &ftrace.target); if (ret < 0) goto out_delete_evlist; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index e1b60aad13cbe..7560d6b1d6a30 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1442,7 +1442,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, goto out; } - if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) + if (evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) usage_with_options(live_usage, live_options); /* diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2d74a410d008f..e956348c90811 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2792,7 +2792,7 @@ int cmd_record(int argc, const char **argv) rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; err = -ENOMEM; - if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) + if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d69378f135e6b..1e967c32db7c0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2249,7 +2249,7 @@ int cmd_stat(int argc, const char **argv) if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) target.per_thread = true; - if (perf_evlist__create_maps(evsel_list, &target) < 0) { + if (evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); parse_options_usage(stat_usage, stat_options, "p", 1); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5601a35a5ba42..51600f6567714 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1694,7 +1694,7 @@ int cmd_top(int argc, const char **argv) if (target__none(target)) target->system_wide = true; - if (perf_evlist__create_maps(top.evlist, target) < 0) { + if (evlist__create_maps(top.evlist, target) < 0) { ui__error("Couldn't create thread/CPU maps: %s\n", errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); goto out_delete_evlist; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a463ce8fb8e09..ebf4fe944525e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3950,7 +3950,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->cgroup) evlist__set_default_cgroup(trace->evlist, trace->cgroup); - err = perf_evlist__create_maps(evlist, &trace->opts.target); + err = evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 15cea518f5add..f00f7f34efbdb 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -109,7 +109,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m return TEST_FAIL; } - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 7ed284a00ea28..d4b232fe94488 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -151,7 +151,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), return TEST_FAIL; } - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 9da2d4f58b8e7..04ce4401f7752 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -26,7 +26,7 @@ static int attach__enable_on_exec(struct evlist *evlist) pr_debug("attaching to spawned child, enable on exec\n"); - err = perf_evlist__create_maps(evlist, &target); + err = evlist__create_maps(evlist, &target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); return err; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 1f5f5e79ae253..78367b17f1bb0 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -54,9 +54,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest evlist__add(evlist, evsel); - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { - pr_debug("%s: perf_evlist__create_maps\n", __func__); + pr_debug("%s: evlist__create_maps\n", __func__); goto out_delete_evlist; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index f0b2066c7ea97..01c9282c7a3bc 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -84,7 +84,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 28b4d347d4ba3..e68e5a40b49ab 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -892,7 +892,7 @@ int evlist__mmap(struct evlist *evlist, unsigned int pages) return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); } -int perf_evlist__create_maps(struct evlist *evlist, struct target *target) +int evlist__create_maps(struct evlist *evlist, struct target *target) { bool all_threads = (target->per_thread && target->system_wide); struct perf_cpu_map *cpus; @@ -1223,7 +1223,7 @@ void evlist__close(struct evlist *evlist) } } -static int perf_evlist__create_syswide_maps(struct evlist *evlist) +static int evlist__create_syswide_maps(struct evlist *evlist) { struct perf_cpu_map *cpus; struct perf_thread_map *threads; @@ -1265,7 +1265,7 @@ int evlist__open(struct evlist *evlist) * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL */ if (evlist->core.threads == NULL && evlist->core.cpus == NULL) { - err = perf_evlist__create_syswide_maps(evlist); + err = evlist__create_syswide_maps(evlist); if (err < 0) goto out_err; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8d3fdeab2f22c..3b3bf3d282045 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -195,7 +195,7 @@ int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx void perf_evlist__set_selected(struct evlist *evlist, struct evsel *evsel); -int perf_evlist__create_maps(struct evlist *evlist, struct target *target); +int evlist__create_maps(struct evlist *evlist, struct target *target); int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __evlist__set_leader(struct list_head *list); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 13f387b764565..748371ac22bec 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -101,7 +101,7 @@ int evlist__start_sb_thread(struct evlist *evlist, struct target *target) if (!evlist) return 0; - if (perf_evlist__create_maps(evlist, target)) + if (evlist__create_maps(evlist, target)) goto out_delete_evlist; if (evlist->core.nr_entries > 1) { -- GitLab From 64b4778b863b6fa84e36e043fb34bde6b847fa96 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:58:32 -0300 Subject: [PATCH 0337/1894] perf evlist: Use the right prefix for 'struct evlist' event group methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/util/auxtrace.c | 6 ++---- tools/perf/util/evlist.c | 6 ++---- tools/perf/util/evlist.h | 7 +++---- 7 files changed, 11 insertions(+), 16 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index d6a81a6e1041c..a23ba6bb99b6e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -598,7 +598,7 @@ int cmd_annotate(int argc, const char **argv) HEADER_BRANCH_STACK); if (annotate.group_set) - perf_evlist__force_leader(annotate.session->evlist); + evlist__force_leader(annotate.session->evlist); ret = symbol__annotation_init(); if (ret < 0) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e956348c90811..412717406dc9a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -923,7 +923,7 @@ try_again: if ((errno == EINVAL || errno == EBADF) && pos->leader != pos && pos->weak_group) { - pos = perf_evlist__reset_weak_group(evlist, pos, true); + pos = evlist__reset_weak_group(evlist, pos, true); goto try_again; } rc = -errno; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0a335bef64d0c..e78614ab7a16b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -211,7 +211,7 @@ static void setup_forced_leader(struct report *report, struct evlist *evlist) { if (report->group_set) - perf_evlist__force_leader(evlist); + evlist__force_leader(evlist); } static int process_feature_event(struct perf_session *session, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1e967c32db7c0..89c32692f40c7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -759,7 +759,7 @@ try_again: if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { - perf_evlist__reset_weak_group(evsel_list, counter, false); + evlist__reset_weak_group(evsel_list, counter, false); assert(counter->reset_group); second_pass = true; continue; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 8c94e21db2aa7..a608784981399 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -62,9 +62,7 @@ * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. */ -static int perf_evlist__regroup(struct evlist *evlist, - struct evsel *leader, - struct evsel *last) +static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct evsel *last) { struct evsel *evsel; bool grp; @@ -775,7 +773,7 @@ no_opt: evsel->core.attr.aux_sample_size = term->val.aux_sample_size; /* If possible, group with the AUX event */ if (aux_evsel && evsel->core.attr.aux_sample_size) - perf_evlist__regroup(evlist, aux_evsel, evsel); + evlist__regroup(evlist, aux_evsel, evsel); } } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e68e5a40b49ab..869195b6f21c4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1650,7 +1650,7 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist) * the group display. Set the artificial group and set the leader's * forced_leader flag to notify the display code. */ -void perf_evlist__force_leader(struct evlist *evlist) +void evlist__force_leader(struct evlist *evlist) { if (!evlist->nr_groups) { struct evsel *leader = evlist__first(evlist); @@ -1660,9 +1660,7 @@ void perf_evlist__force_leader(struct evlist *evlist) } } -struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, - struct evsel *evsel, - bool close) +struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *evsel, bool close) { struct evsel *c2, *leader; bool is_open = true; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3b3bf3d282045..d09970e7c838e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -324,11 +324,10 @@ struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event bool perf_evlist__exclude_kernel(struct evlist *evlist); -void perf_evlist__force_leader(struct evlist *evlist); +void evlist__force_leader(struct evlist *evlist); + +struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close); -struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist, - struct evsel *evsel, - bool close); #define EVLIST_CTL_CMD_ENABLE_TAG "enable" #define EVLIST_CTL_CMD_DISABLE_TAG "disable" #define EVLIST_CTL_CMD_ACK_TAG "ack\n" -- GitLab From 900c8ead5b0b21d73236ffbc4bc2f47a506d8297 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:01:08 -0300 Subject: [PATCH 0338/1894] perf evlist: Use the right prefix for 'struct evlist' event selection methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/util/evlist.c | 3 +-- tools/perf/util/evlist.h | 5 ++--- tools/perf/util/record.c | 2 +- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index cb9c0c85dc8fc..a6420c6479597 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -416,7 +416,7 @@ static int intel_pt_track_switches(struct evlist *evlist) struct evsel *evsel; int err; - if (!perf_evlist__can_select_event(evlist, sched_switch)) + if (!evlist__can_select_event(evlist, sched_switch)) return -EPERM; err = parse_events(evlist, sched_switch, NULL); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e07fe84bb3040..a4cf5ee22c28a 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -380,7 +380,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ cycles_evsel = evlist__last(evlist); /* Third event */ - if (!perf_evlist__can_select_event(evlist, sched_switch)) { + if (!evlist__can_select_event(evlist, sched_switch)) { pr_debug("No sched_switch\n"); err = 0; goto out; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 766be504abca4..3b9818ee9546a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3494,7 +3494,7 @@ static int perf_evsel_menu__run(struct evsel_menu *menu, continue; pos = menu->selection; browse_hists: - perf_evlist__set_selected(evlist, pos); + evlist__set_selected(evlist, pos); /* * Give the calling tool a chance to populate the non * default evsel resorted hists tree. diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 869195b6f21c4..a11364d947208 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1183,8 +1183,7 @@ bool evlist__sample_id_all(struct evlist *evlist) return first->core.attr.sample_id_all; } -void perf_evlist__set_selected(struct evlist *evlist, - struct evsel *evsel) +void evlist__set_selected(struct evlist *evlist, struct evsel *evsel) { evlist->selected = evsel; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index d09970e7c838e..e858a351b0142 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -192,8 +192,7 @@ void evlist__toggle_enable(struct evlist *evlist); int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); -void perf_evlist__set_selected(struct evlist *evlist, - struct evsel *evsel); +void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int evlist__create_maps(struct evlist *evlist, struct target *target); int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); @@ -238,7 +237,7 @@ static inline struct evsel *evlist__last(struct evlist *evlist) int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); -bool perf_evlist__can_select_event(struct evlist *evlist, const char *str); +bool evlist__can_select_event(struct evlist *evlist, const char *str); void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); /** diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 220b9910427bb..f72c8e05e15ce 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -217,7 +217,7 @@ int record_opts__config(struct record_opts *opts) return record_opts__config_freq(opts); } -bool perf_evlist__can_select_event(struct evlist *evlist, const char *str) +bool evlist__can_select_event(struct evlist *evlist, const char *str) { struct evlist *temp_evlist; struct evsel *evsel; -- GitLab From 606e2c29334556797e1639115bd198aedb331f07 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:04:05 -0300 Subject: [PATCH 0339/1894] perf evlist: Use the right prefix for alternative 'struct evlist' constructors perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/intel-cqm.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/event_update.c | 3 +-- tools/perf/tests/expand-cgroup.c | 3 +-- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 4 ++-- tools/perf/tests/task-exit.c | 4 ++-- tools/perf/tests/topology.c | 2 +- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 4 ++-- 12 files changed, 16 insertions(+), 18 deletions(-) diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 3ec562a2aabaa..27dd8cf9e0609 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -52,7 +52,7 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt evlist = evlist__new(); if (!evlist) { - pr_debug("perf_evlist__new failed\n"); + pr_debug("evlist__new failed\n"); return TEST_FAIL; } diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index d4b232fe94488..22d7f567c41c6 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -144,7 +144,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), pid[sizeof(pid) - 1] = '\0'; opts.target.tid = opts.target.pid = pid; - /* Instead of perf_evlist__new_default, don't add default events */ + /* Instead of evlist__new_default, don't add default events */ evlist = evlist__new(); if (!evlist) { pr_debug("Not enough memory to create evlist\n"); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 339d432b1bf1c..0c494aa90a8df 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -637,7 +637,7 @@ static int do_test_code_reading(bool try_kcore) evlist = evlist__new(); if (!evlist) { - pr_debug("perf_evlist__new failed\n"); + pr_debug("evlist__new failed\n"); goto out_put; } diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index bdcf032f85162..656218179222c 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -85,11 +85,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused) { - struct evlist *evlist; struct evsel *evsel; struct event_name tmp; + struct evlist *evlist = evlist__new_default(); - evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("failed to get evlist", evlist); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index e8187f37fe1e7..0e46aeb843ce4 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -100,10 +100,9 @@ out: for (i = 0; i < nr_events; i++) static int expand_default_events(void) { int ret; - struct evlist *evlist; struct rblist metric_events; + struct evlist *evlist = evlist__new_default(); - evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("failed to get evlist", evlist); rblist__init(&metric_events); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index c01d7e12d2419..57093aeacc6f4 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -69,7 +69,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse evlist = evlist__new(); if (evlist == NULL) { - pr_debug("perf_evlist__new\n"); + pr_debug("evlist__new\n"); goto out_free_cpus; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 78367b17f1bb0..5e4af2f0f14ae 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -42,7 +42,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest char sbuf[STRERR_BUFSIZE]; if (evlist == NULL) { - pr_debug("%s: perf_evlist__new\n", __func__); + pr_debug("%s: evlist__new\n", __func__); goto out; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 01c9282c7a3bc..61d1ff30253c9 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -53,7 +53,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus }; cpu_set_t cpu_mask; size_t cpu_mask_size = sizeof(cpu_mask); - struct evlist *evlist = perf_evlist__new_dummy(); + struct evlist *evlist = evlist__new_dummy(); struct evsel *evsel; struct perf_sample sample; const char *cmd = "sleep"; @@ -71,7 +71,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus char sbuf[STRERR_BUFSIZE]; if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */ - evlist = perf_evlist__new_default(); + evlist = evlist__new_default(); if (evlist == NULL) { pr_debug("Not enough memory to create evlist\n"); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 452d51048cc18..bbf94e4aa1450 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -58,9 +58,9 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused signal(SIGCHLD, sig_handler); - evlist = perf_evlist__new_default(); + evlist = evlist__new_default(); if (evlist == NULL) { - pr_debug("perf_evlist__new_default\n"); + pr_debug("evlist__new_default\n"); return -1; } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 22daf2bdf5faf..165feedc78634 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -40,7 +40,7 @@ static int session_write_header(char *path) session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); - session->evlist = perf_evlist__new_default(); + session->evlist = evlist__new_default(); TEST_ASSERT_VAL("can't get evlist", session->evlist); perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a11364d947208..70b5b6e506c8b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -78,7 +78,7 @@ struct evlist *evlist__new(void) return evlist; } -struct evlist *perf_evlist__new_default(void) +struct evlist *evlist__new_default(void) { struct evlist *evlist = evlist__new(); @@ -90,7 +90,7 @@ struct evlist *perf_evlist__new_default(void) return evlist; } -struct evlist *perf_evlist__new_dummy(void) +struct evlist *evlist__new_dummy(void) { struct evlist *evlist = evlist__new(); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e858a351b0142..9e73d4ae16d9e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -87,8 +87,8 @@ struct evsel_str_handler { }; struct evlist *evlist__new(void); -struct evlist *perf_evlist__new_default(void); -struct evlist *perf_evlist__new_dummy(void); +struct evlist *evlist__new_default(void); +struct evlist *evlist__new_dummy(void); void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void evlist__exit(struct evlist *evlist); -- GitLab From 78e1bc25786656c490befc6d44d265f263cb8861 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:07:49 -0300 Subject: [PATCH 0340/1894] perf evlist: Use the right prefix for 'struct evlist' event attribute config methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 6 +++--- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/parse-events.c | 10 ++++------ tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/perf-time-to-tsc.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 7 +++---- tools/perf/util/record.c | 3 +-- tools/perf/util/session.c | 2 +- 17 files changed, 26 insertions(+), 30 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 7560d6b1d6a30..616125160b58d 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1022,7 +1022,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct evlist *evlist = kvm->evlist; char sbuf[STRERR_BUFSIZE]; - perf_evlist__config(evlist, &kvm->opts, NULL); + evlist__config(evlist, &kvm->opts, NULL); /* * Note: exclude_{guest,host} do not apply here. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 412717406dc9a..2a732414ae998 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -910,7 +910,7 @@ static int record__open(struct record *rec) pos->immediate = 1; } - perf_evlist__config(evlist, opts, &callchain_param); + evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, pos) { try_again: @@ -935,7 +935,7 @@ try_again: pos->supported = true; } - if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) { + if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { pr_warning( "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" @@ -1444,7 +1444,7 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } - if (!perf_evlist__exclude_kernel(rec->evlist)) { + if (!evlist__exclude_kernel(rec->evlist)) { err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, machine); WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e78614ab7a16b..5efbd0602f175 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -563,7 +563,7 @@ static void report__warn_kptr_restrict(const struct report *rep) struct map *kernel_map = machine__kernel_map(&rep->session->machines.host); struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; - if (perf_evlist__exclude_kernel(rep->session->evlist)) + if (evlist__exclude_kernel(rep->session->evlist)) return; if (kernel_map == NULL || diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 51600f6567714..f65aa1c22c2a8 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -779,7 +779,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { - if (!perf_evlist__exclude_kernel(top->session->evlist)) { + if (!evlist__exclude_kernel(top->session->evlist)) { ui__warning( "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" @@ -1022,7 +1022,7 @@ static int perf_top__start_counters(struct perf_top *top) goto out_err; } - perf_evlist__config(evlist, opts, &callchain_param); + evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, counter) { try_again: diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ebf4fe944525e..fa7d57fc53da4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3962,7 +3962,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - perf_evlist__config(evlist, &trace->opts, &callchain_param); + evlist__config(evlist, &trace->opts, &callchain_param); signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index f00f7f34efbdb..b4b9a9488d516 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -127,7 +127,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m goto out_delete_evlist; } - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); err = evlist__open(evlist); if (err < 0) { diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 22d7f567c41c6..f57e075b0ed2f 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -160,7 +160,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), evlist__splice_list_tail(evlist, &parse_state.list); evlist->nr_groups = parse_state.nr_groups; - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); err = evlist__open(evlist); if (err < 0) { diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 0c494aa90a8df..7c098d49c77e5 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -651,7 +651,7 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 50a0c9fcde7da..e6f1b2a38e032 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -92,7 +92,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un CHECK__(parse_events(evlist, "dummy:u", NULL)); CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 611512f22b344..a7f6661e6112d 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -115,7 +115,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); /* - * The period value gets configured within perf_evlist__config, + * The period value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", @@ -443,7 +443,7 @@ static int test__checkevent_pmu(struct evlist *evlist) TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1); TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2); /* - * The period value gets configured within perf_evlist__config, + * The period value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period); @@ -520,8 +520,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); /* - * The period, time and callgraph value gets configured - * within perf_evlist__config, + * The period, time and callgraph value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period); @@ -533,8 +532,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); /* - * The period, time and callgraph value gets configured - * within perf_evlist__config, + * The period, time and callgraph value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 61d1ff30253c9..0df471bf1590e 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -109,7 +109,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus evsel__set_sample_bit(evsel, CPU); evsel__set_sample_bit(evsel, TID); evsel__set_sample_bit(evsel, TIME); - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); if (err < 0) { diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index a9560e0f63609..7cff02664d0e6 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -80,7 +80,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index a4cf5ee22c28a..15a2ab765d890 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -432,7 +432,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ evsel__set_sample_bit(tracking_evsel, TIME); /* Config events */ - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); /* Check moved event is still at the front */ if (cycles_evsel != evlist__first(evlist)) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 70b5b6e506c8b..7ca92a5d3206c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1110,7 +1110,7 @@ u64 evlist__combined_branch_type(struct evlist *evlist) return branch_type; } -bool perf_evlist__valid_read_format(struct evlist *evlist) +bool evlist__valid_read_format(struct evlist *evlist) { struct evsel *first = evlist__first(evlist), *pos = first; u64 read_format = first->core.attr.read_format; @@ -1632,7 +1632,7 @@ state_err: return; } -bool perf_evlist__exclude_kernel(struct evlist *evlist) +bool evlist__exclude_kernel(struct evlist *evlist) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 9e73d4ae16d9e..ebc4550870a1a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -159,8 +159,7 @@ void evlist__close(struct evlist *evlist); struct callchain_param; void evlist__set_id_pos(struct evlist *evlist); -void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, - struct callchain_param *callchain); +void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); int evlist__prepare_workload(struct evlist *evlist, struct target *target, @@ -211,7 +210,7 @@ int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *even bool evlist__valid_sample_type(struct evlist *evlist); bool evlist__valid_sample_id_all(struct evlist *evlist); -bool perf_evlist__valid_read_format(struct evlist *evlist); +bool evlist__valid_read_format(struct evlist *evlist); void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list); @@ -321,7 +320,7 @@ struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); -bool perf_evlist__exclude_kernel(struct evlist *evlist); +bool evlist__exclude_kernel(struct evlist *evlist); void evlist__force_leader(struct evlist *evlist); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index f72c8e05e15ce..e70c9dd04567b 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -89,8 +89,7 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev leader->core.attr.sample_type; } -void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, - struct callchain_param *callchain) +void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain) { struct evsel *evsel; bool use_sample_identifier = false; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 6707a01b7ef8d..bf10576d257ae 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -125,7 +125,7 @@ static int perf_session__open(struct perf_session *session) return -1; } - if (!perf_evlist__valid_read_format(session->evlist)) { + if (!evlist__valid_read_format(session->evlist)) { pr_err("non matching read_format\n"); return -1; } -- GitLab From 25f84702f3590ce6caa3e5bb98e001692f3a2b9e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:09:45 -0300 Subject: [PATCH 0341/1894] perf evlist: Use the right prefix for 'struct evlist' mmap pages parsing method perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 3 +-- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-top.c | 3 +-- tools/perf/builtin-trace.c | 3 +-- tools/perf/util/evlist.c | 7 +++---- tools/perf/util/evlist.h | 6 ++---- 6 files changed, 10 insertions(+), 16 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 616125160b58d..1105c9e40a80f 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1349,8 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid", "record events on existing process id"), OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages", - "number of mmap data pages", - perf_evlist__parse_mmap_pages), + "number of mmap data pages", evlist__parse_mmap_pages), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2a732414ae998..92039fbf63375 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2221,7 +2221,7 @@ static int record__parse_mmap_pages(const struct option *opt, *p = '\0'; if (*s) { - ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); + ret = __evlist__parse_mmap_pages(&mmap_pages, s); if (ret) goto out_free; opts->mmap_pages = mmap_pages; @@ -2232,7 +2232,7 @@ static int record__parse_mmap_pages(const struct option *opt, goto out_free; } - ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); + ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); if (ret) goto out_free; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f65aa1c22c2a8..3673c04d16b6d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1466,8 +1466,7 @@ int cmd_top(int argc, const char **argv) OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, "hide kernel symbols"), OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages", - "number of mmap data pages", - perf_evlist__parse_mmap_pages), + "number of mmap data pages", evlist__parse_mmap_pages), OPT_INTEGER('r', "realtime", &top.realtime_prio, "collect data with this RT SCHED_FIFO priority"), OPT_INTEGER('d', "delay", &top.delay_secs, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fa7d57fc53da4..85b6a46e85b68 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4763,8 +4763,7 @@ int cmd_trace(int argc, const char **argv) OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, "child tasks do not inherit counters"), OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", - "number of mmap data pages", - perf_evlist__parse_mmap_pages), + "number of mmap data pages", evlist__parse_mmap_pages), OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", "user to profile"), OPT_CALLBACK(0, "duration", &trace, "float", diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7ca92a5d3206c..59264a7a858a2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -815,7 +815,7 @@ static long parse_pages_arg(const char *str, unsigned long min, return pages; } -int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) +int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) { unsigned long max = UINT_MAX; long pages; @@ -833,10 +833,9 @@ int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) return 0; } -int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, - int unset __maybe_unused) +int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) { - return __perf_evlist__parse_mmap_pages(opt->value, str); + return __evlist__parse_mmap_pages(opt->value, str); } /** diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index ebc4550870a1a..8a5b515193b33 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -169,10 +169,8 @@ int evlist__start_workload(struct evlist *evlist); struct option; -int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str); -int perf_evlist__parse_mmap_pages(const struct option *opt, - const char *str, - int unset); +int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str); +int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset); unsigned long perf_event_mlock_kb_in_pages(void); -- GitLab From 44d2a5573665ab5dfb72572e43184388d15d695e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:11:10 -0300 Subject: [PATCH 0342/1894] perf evlist: Use the right prefix for 'struct evlist' raw samples methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-sample-raw.c | 3 +-- tools/perf/util/sample-raw.c | 4 ++-- tools/perf/util/sample-raw.h | 7 ++----- tools/perf/util/session.c | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index d177e61798395..cfcf8d534d767 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -197,8 +197,7 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) * its raw data. * The function is only invoked when the dump flag -D is set. */ -void perf_evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, - struct perf_sample *sample) +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *ev_bc000; diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c index e84bbe0e441a0..cde5cd3ce49bf 100644 --- a/tools/perf/util/sample-raw.c +++ b/tools/perf/util/sample-raw.c @@ -9,10 +9,10 @@ * Check platform the perf data file was created on and perform platform * specific interpretation. */ -void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist) +void evlist__init_trace_event_sample_raw(struct evlist *evlist) { const char *arch_pf = perf_env__arch(evlist->env); if (arch_pf && !strcmp("s390", arch_pf)) - evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw; + evlist->trace_event_sample_raw = evlist__s390_sample_raw; } diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h index afe1491a117e7..4be84a5510cf0 100644 --- a/tools/perf/util/sample-raw.h +++ b/tools/perf/util/sample-raw.h @@ -6,9 +6,6 @@ struct evlist; union perf_event; struct perf_sample; -void perf_evlist__s390_sample_raw(struct evlist *evlist, - union perf_event *event, - struct perf_sample *sample); - -void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist); +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); +void evlist__init_trace_event_sample_raw(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index bf10576d257ae..83de320900087 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -221,7 +221,7 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_session__set_comm_exec(session); } - perf_evlist__init_trace_event_sample_raw(session->evlist); + evlist__init_trace_event_sample_raw(session->evlist); /* Open the directory data. */ if (data->is_dir) { -- GitLab From 1420ba2f6250270c4143d96af86f654f9f4d9997 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:13:12 -0300 Subject: [PATCH 0343/1894] perf evlist: Use the right prefix for 'struct evlist' header methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/evlist.h | 2 +- tools/perf/util/header.c | 15 +++++---------- tools/perf/util/session.c | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 59264a7a858a2..23eef3cedc2f9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1131,7 +1131,7 @@ bool evlist__valid_read_format(struct evlist *evlist) return true; } -u16 perf_evlist__id_hdr_size(struct evlist *evlist) +u16 evlist__id_hdr_size(struct evlist *evlist) { struct evsel *first = evlist__first(evlist); struct perf_sample *data; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8a5b515193b33..9b0c795736bb5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -201,7 +201,7 @@ u64 __evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_branch_type(struct evlist *evlist); bool evlist__sample_id_all(struct evlist *evlist); -u16 perf_evlist__id_hdr_size(struct evlist *evlist); +u16 evlist__id_hdr_size(struct evlist *evlist); int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index be219051119ce..062383e225a3e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2275,9 +2275,7 @@ static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx) return NULL; } -static void -perf_evlist__set_event_name(struct evlist *evlist, - struct evsel *event) +static void evlist__set_event_name(struct evlist *evlist, struct evsel *event) { struct evsel *evsel; @@ -2312,7 +2310,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) } for (evsel = events; evsel->core.attr.size; evsel++) - perf_evlist__set_event_name(session->evlist, evsel); + evlist__set_event_name(session->evlist, evsel); if (!session->data->is_pipe) free_event_desc(events); @@ -3765,8 +3763,7 @@ static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handl return 0; } -static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist, - struct tep_handle *pevent) +static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_handle *pevent) { struct evsel *pos; @@ -3881,8 +3878,7 @@ int perf_session__read_header(struct perf_session *session) perf_header__process_sections(header, fd, &session->tevent, perf_file_section__process); - if (perf_evlist__prepare_tracepoint_events(session->evlist, - session->tevent.pevent)) + if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent)) goto out_delete_evlist; return 0; @@ -4103,8 +4099,7 @@ int perf_event__process_tracing_data(struct perf_session *session, return -1; } - perf_evlist__prepare_tracepoint_events(session->evlist, - session->tevent.pevent); + evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent); return size_read + padding; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 83de320900087..69799e747bf03 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -135,7 +135,7 @@ static int perf_session__open(struct perf_session *session) void perf_session__set_id_hdr_size(struct perf_session *session) { - u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist); + u16 id_hdr_size = evlist__id_hdr_size(session->evlist); machines__set_id_hdr_size(&session->machines, id_hdr_size); } -- GitLab From 515ea461c26e19ebca4351266480306979a113fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:16:29 -0300 Subject: [PATCH 0344/1894] perf evlist: Use the right prefix for 'struct evlist' deliver event method perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 69799e747bf03..3b3c50b127918 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1407,13 +1407,9 @@ static int deliver_sample_group(struct evlist *evlist, return ret; } -static int - perf_evlist__deliver_sample(struct evlist *evlist, - struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct evsel *evsel, - struct machine *machine) +static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, + struct evsel *evsel, struct machine *machine) { /* We know evsel != NULL. */ u64 sample_type = evsel->core.attr.sample_type; @@ -1458,7 +1454,7 @@ static int machines__deliver_event(struct machines *machines, ++evlist->stats.nr_unprocessable_samples; return 0; } - return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); + return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_MMAP2: -- GitLab From f63c2f5a8b0eb4a7a8d5d19c8e0ccbbd0ee41d14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:17:20 -0300 Subject: [PATCH 0345/1894] perf evlist: Use the right prefix for 'struct evlist' nr_threads method perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 23eef3cedc2f9..493819173a8e4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -330,8 +330,7 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, return 0; } -static int perf_evlist__nr_threads(struct evlist *evlist, - struct evsel *evsel) +static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) { if (evsel->core.system_wide) return 1; @@ -450,7 +449,7 @@ void evlist__toggle_enable(struct evlist *evlist) static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu) { int thread; - int nr_threads = perf_evlist__nr_threads(evlist, evsel); + int nr_threads = evlist__nr_threads(evlist, evsel); if (!evsel->core.fd) return -EINVAL; -- GitLab From b979a2f13b1b98c26b8f94d9401cd5255f75f978 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:18:48 -0300 Subject: [PATCH 0346/1894] perf evlist: Use the right prefix for 'struct evlist' diff methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index cefc71506409f..8f6c784ce629c 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -494,7 +494,7 @@ static struct evsel *evsel_match(struct evsel *evsel, return NULL; } -static void perf_evlist__collapse_resort(struct evlist *evlist) +static void evlist__collapse_resort(struct evlist *evlist) { struct evsel *evsel; @@ -1214,7 +1214,7 @@ static int __cmd_diff(void) goto out_delete; } - perf_evlist__collapse_resort(d->session->evlist); + evlist__collapse_resort(d->session->evlist); if (pdiff.ptime_range) zfree(&pdiff.ptime_range); -- GitLab From db0ea13cc741e7c93f26bf5b3d313f48d00f15a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:19:40 -0300 Subject: [PATCH 0347/1894] perf evlist: Use the right prefix for 'struct evlist' record methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 92039fbf63375..d832c108a1ca2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1349,8 +1349,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); static void alarm_sig_handler(int sig); -static const struct perf_event_mmap_page * -perf_evlist__pick_pc(struct evlist *evlist) +static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) { if (evlist) { if (evlist->mmap && evlist->mmap[0].core.base) @@ -1363,9 +1362,7 @@ perf_evlist__pick_pc(struct evlist *evlist) static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) { - const struct perf_event_mmap_page *pc; - - pc = perf_evlist__pick_pc(rec->evlist); + const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); if (pc) return pc; return NULL; -- GitLab From dfd375864ac1e2ee60ed2d61820697ac01642f80 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Nov 2020 23:04:12 -0800 Subject: [PATCH 0348/1894] 9p: Uninitialized variable in v9fs_writeback_fid() If v9fs_fid_lookup_with_uid() fails then "fid" is not initialized. The v9fs_fid_lookup_with_uid() can't return NULL. If it returns an error pointer then we can still pass that to clone_fid() and it will return the error pointer back again. Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct") Signed-off-by: Dan Carpenter Signed-off-by: Dominique Martinet --- fs/9p/fid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 50118ec72a926..79837f1b1c913 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -302,8 +302,7 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) struct p9_fid *fid, *ofid; ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0); - if (ofid && !IS_ERR(ofid)) - fid = clone_fid(ofid); + fid = clone_fid(ofid); if (IS_ERR(fid)) goto error_out; p9_client_clunk(ofid); -- GitLab From cfd1d0f524a87b7d6d14b41a14fa4cbe522cf8cc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Dec 2020 10:04:34 +0300 Subject: [PATCH 0349/1894] 9p: Remove unnecessary IS_ERR() check The "fid" variable can't be an error pointer so there is no need to check. The code is slightly cleaner if we move the increment before the break and remove the NULL check as well. Signed-off-by: Dan Carpenter Signed-off-by: Dominique Martinet --- fs/9p/fid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 79837f1b1c913..9d9de62592be2 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -56,12 +56,11 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid) h = (struct hlist_head *)&inode->i_private; hlist_for_each_entry(fid, h, ilist) { if (uid_eq(fid->uid, uid)) { + refcount_inc(&fid->count); ret = fid; break; } } - if (ret && !IS_ERR(ret)) - refcount_inc(&ret->count); spin_unlock(&inode->i_lock); return ret; } -- GitLab From 665f1388bc9713c81989dda6eed5cde52d57c255 Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Mon, 30 Nov 2020 12:42:33 +0000 Subject: [PATCH 0350/1894] ARM: omap2plus_defconfig: drop unused POWER_AVS option Commit 785b5bb41b0a ("PM: AVS: Drop the avs directory and the corresponding Kconfig") moved AVS code to SOC-specific folders, and removed corresponding Kconfig from drivers/power, leaving original POWER_AVS config option enabled in omap2plus_defconfig file. Remove the option, which has no references in the tree anymore. Fixes: 785b5bb41b0a ("PM: AVS: Drop the avs directory and the corresponding Kconfig") Signed-off-by: Andrey Zhizhikin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Cc: Nishanth Menon Cc: Ulf Hansson Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 77716f500813f..23b53526c4eb3 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -297,7 +297,6 @@ CONFIG_GPIO_TWL4030=y CONFIG_W1=m CONFIG_HDQ_MASTER_OMAP=m CONFIG_W1_SLAVE_DS250X=m -CONFIG_POWER_AVS=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_GPIO=y CONFIG_BATTERY_BQ27XXX=m -- GitLab From f1dc15cd7fc146107cad2a926d9c1d005f69002a Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 29 Nov 2020 16:47:10 +0200 Subject: [PATCH 0351/1894] ARM: dts: OMAP3: disable AES on N950/N9 AES needs to be disabled on Nokia N950/N9 as well (HS devices), otherwise kernel fails to boot. Fixes: c312f066314e ("ARM: dts: omap3: Migrate AES from hwmods to sysc-omap2") Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 11d41e86f814d..7dde9fbb06d33 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -494,3 +494,11 @@ clock-names = "sysclk"; }; }; + +&aes1_target { + status = "disabled"; +}; + +&aes2_target { + status = "disabled"; +}; -- GitLab From 9836720911cfec25d3fbdead1c438bf87e0f2841 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:52 +0900 Subject: [PATCH 0352/1894] ARC: build: remove non-existing bootpImage from KBUILD_IMAGE The deb-pkg builds for ARCH=arc fail. $ export CROSS_COMPILE= $ make -s ARCH=arc defconfig $ make ARCH=arc bindeb-pkg SORTTAB vmlinux SYSMAP System.map MODPOST Module.symvers make KERNELRELEASE=5.10.0-rc4 ARCH=arc KBUILD_BUILD_VERSION=2 -f ./Makefile intdeb-pkg sh ./scripts/package/builddeb cp: cannot stat 'arch/arc/boot/bootpImage': No such file or directory make[4]: *** [scripts/Makefile.package:87: intdeb-pkg] Error 1 make[3]: *** [Makefile:1527: intdeb-pkg] Error 2 make[2]: *** [debian/rules:13: binary-arch] Error 2 dpkg-buildpackage: error: debian/rules binary subprocess returned exit status 2 make[1]: *** [scripts/Makefile.package:83: bindeb-pkg] Error 2 make: *** [Makefile:1527: bindeb-pkg] Error 2 The reason is obvious; arch/arc/Makefile sets $(boot)/bootpImage as the default image, but there is no rule to build it. Remove the meaningless KBUILD_IMAGE assignment so it will fallback to the default vmlinux. With this change, you can build the deb package. I removed the 'bootpImage' target as well. At best, it provides 'make bootpImage' as an alias of 'make vmlinux', but I do not see much sense in doing so. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 0c6bf0d1df7ad..acf99420e161d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,12 +102,6 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -#default target for make without any arguments. -KBUILD_IMAGE := $(boot)/bootpImage - -all: bootpImage -bootpImage: vmlinux - boot_targets += uImage uImage.bin uImage.gz $(boot_targets): vmlinux -- GitLab From f2712ec76a5433e5ec9def2bd52a95df1f96d050 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:53 +0900 Subject: [PATCH 0353/1894] ARC: build: add uImage.lzma to the top-level target arch/arc/boot/Makefile supports uImage.lzma, but you cannot do 'make uImage.lzma' because the corresponding target is missing in arch/arc/Makefile. Add it. I also changed the assignment operator '+=' to ':=' since this is the only place where we expect this variable to be set. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index acf99420e161d..61a41123ad4c4 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,7 +102,7 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -boot_targets += uImage uImage.bin uImage.gz +boot_targets := uImage uImage.bin uImage.gz uImage.lzma $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -- GitLab From 0cfccb3c04934cdef42ae26042139f16e805b5f7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:54 +0900 Subject: [PATCH 0354/1894] ARC: build: add boot_targets to PHONY The top-level boot_targets (uImage and uImage.*) should be phony targets. They just let Kbuild descend into arch/arc/boot/ and create files there. If a file exists in the top directory with the same name, the boot image will not be created. You can confirm it by the following steps: $ export CROSS_COMPILE= $ make -s ARCH=arc defconfig all # vmlinux will be built $ touch uImage.gz $ make ARCH=arc uImage.gz CALL scripts/atomic/check-atomics.sh CALL scripts/checksyscalls.sh CHK include/generated/compile.h # arch/arc/boot/uImage.gz is not created Specify the targets as PHONY to fix this. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 61a41123ad4c4..cf9da9aea12ac 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -104,6 +104,7 @@ boot := arch/arc/boot boot_targets := uImage uImage.bin uImage.gz uImage.lzma +PHONY += $(boot_targets) $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -- GitLab From c5e6ae563c802c4d828d42e134af64004db2e58c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:55 +0900 Subject: [PATCH 0355/1894] ARC: build: move symlink creation to arch/arc/Makefile to avoid race If you run 'make uImage uImage.gz' with the parallel option, uImage.gz will be created by two threads simultaneously. This is because arch/arc/Makefile does not specify the dependency between uImage and uImage.gz. Hence, GNU Make assumes they can be built in parallel. One thread descends into arch/arc/boot/ to create uImage, and another to create uImage.gz. Please notice the same log is displayed twice in the following steps: $ export CROSS_COMPILE= $ make -s ARCH=arc defconfig $ make -j$(nproc) ARCH=arc uImage uImage.gz [ snip ] LD vmlinux SORTTAB vmlinux SYSMAP System.map OBJCOPY arch/arc/boot/vmlinux.bin OBJCOPY arch/arc/boot/vmlinux.bin GZIP arch/arc/boot/vmlinux.bin.gz GZIP arch/arc/boot/vmlinux.bin.gz UIMAGE arch/arc/boot/uImage.gz UIMAGE arch/arc/boot/uImage.gz Image Name: Linux-5.10.0-rc4-00003-g62f23044 Created: Sun Nov 22 02:52:26 2020 Image Type: ARC Linux Kernel Image (gzip compressed) Data Size: 2109376 Bytes = 2059.94 KiB = 2.01 MiB Load Address: 80000000 Entry Point: 80004000 Image arch/arc/boot/uImage is ready Image Name: Linux-5.10.0-rc4-00003-g62f23044 Created: Sun Nov 22 02:52:26 2020 Image Type: ARC Linux Kernel Image (gzip compressed) Data Size: 2815455 Bytes = 2749.47 KiB = 2.69 MiB Load Address: 80000000 Entry Point: 80004000 This is a race between the two threads trying to write to the same file arch/arc/boot/uImage.gz. This is a potential problem that can generate a broken file. I fixed a similar problem for ARM by commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images"). I highly recommend to avoid such build rules that cause a race condition. Move the uImage rule to arch/arc/Makefile. Another strangeness is that arch/arc/boot/Makefile compares the timestamps between $(obj)/uImage and $(obj)/uImage.*: $(obj)/uImage: $(obj)/uImage.$(suffix-y) @ln -sf $(notdir $<) $@ @echo ' Image $@ is ready' This does not work as expected since $(obj)/uImage is a symlink. The symlink should be created in a phony target rule. I used $(kecho) instead of echo to suppress the message 'Image arch/arc/boot/uImage is ready' when the -s option is given. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 13 ++++++++++++- arch/arc/boot/Makefile | 11 +---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index cf9da9aea12ac..578bdbbb0fa7f 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,11 +102,22 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -boot_targets := uImage uImage.bin uImage.gz uImage.lzma +boot_targets := uImage.bin uImage.gz uImage.lzma PHONY += $(boot_targets) $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ +uimage-default-y := uImage.bin +uimage-default-$(CONFIG_KERNEL_GZIP) := uImage.gz +uimage-default-$(CONFIG_KERNEL_LZMA) := uImage.lzma + +PHONY += uImage +uImage: $(uimage-default-y) + @ln -sf $< $(boot)/uImage + @$(kecho) ' Image $(boot)/uImage is ready' + +CLEAN_FILES += $(boot)/uImage + archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 538b92f4dd253..3b1f8a69a89ef 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -targets := vmlinux.bin vmlinux.bin.gz uImage +targets := vmlinux.bin vmlinux.bin.gz # uImage build relies on mkimage being availble on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage @@ -13,11 +13,6 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \ UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) -suffix-y := bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_LZMA) := lzma - -targets += uImage targets += uImage.bin targets += uImage.gz targets += uImage.lzma @@ -42,7 +37,3 @@ $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE $(call if_changed,uimage,lzma) - -$(obj)/uImage: $(obj)/uImage.$(suffix-y) - @ln -sf $(notdir $<) $@ - @echo ' Image $@ is ready' -- GitLab From a4e070cfeb9d4961a169a2f1a614665cf51de963 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:56 +0900 Subject: [PATCH 0356/1894] ARC: build: remove unneeded extra-y Adding vmlinux.* to extra-y has no point because we expect they are built on demand while building uImage.* Add them to 'targets' is enough to include the corresponding .cmd file. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/boot/Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 3b1f8a69a89ef..b3870cc100bf8 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -targets := vmlinux.bin vmlinux.bin.gz # uImage build relies on mkimage being availble on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage @@ -13,12 +12,12 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \ UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) +targets += vmlinux.bin +targets += vmlinux.bin.gz +targets += vmlinux.bin.lzma targets += uImage.bin targets += uImage.gz targets += uImage.lzma -extra-y += vmlinux.bin -extra-y += vmlinux.bin.gz -extra-y += vmlinux.bin.lzma $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -- GitLab From 3a71e423133a4b1166ffafcb4a7cfa87ddecb910 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:57 +0900 Subject: [PATCH 0357/1894] ARC: build: use $(READELF) instead of hard-coded readelf The top Makefile defines READELF as the readelf in the cross-toolchains. Use it rather than the host readelf. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index b3870cc100bf8..5648748c285f5 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -6,7 +6,7 @@ OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S -LINUX_START_TEXT = $$(readelf -h vmlinux | \ +LINUX_START_TEXT = $$($(READELF) -h vmlinux | \ grep "Entry point address" | grep -o 0x.*) UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) -- GitLab From 1967939462641d8b36bcb3fcf06d48e66cd67a4f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Nov 2020 04:33:35 +0900 Subject: [PATCH 0358/1894] Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK Revert commit cebc04ba9aeb ("add CONFIG_ENABLE_MUST_CHECK"). A lot of warn_unused_result warnings existed in 2006, but until now they have been fixed thanks to people doing allmodconfig tests. Our goal is to always enable __must_check where appropriate, so this CONFIG option is no longer needed. I see a lot of defconfig (arch/*/configs/*_defconfig) files having: # CONFIG_ENABLE_MUST_CHECK is not set I did not touch them for now since it would be a big churn. If arch maintainers want to clean them up, please go ahead. While I was here, I also moved __must_check to compiler_attributes.h from compiler_types.h Signed-off-by: Masahiro Yamada Acked-by: Jason A. Donenfeld Acked-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [Moved addition in compiler_attributes.h to keep it sorted] Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 6 ++++++ include/linux/compiler_types.h | 6 ------ lib/Kconfig.debug | 8 -------- tools/testing/selftests/wireguard/qemu/debug.config | 1 - 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index b2a3f4f641a70..ea5e04e75845c 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -272,6 +272,12 @@ */ #define __used __attribute__((__used__)) +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warn_005funused_005fresult-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result + */ +#define __must_check __attribute__((__warn_unused_result__)) + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index ac3fa37a84f94..7ef20d1a6c286 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -110,12 +110,6 @@ struct ftrace_likely_data { unsigned long constant; }; -#ifdef CONFIG_ENABLE_MUST_CHECK -#define __must_check __attribute__((__warn_unused_result__)) -#else -#define __must_check -#endif - #if defined(CC_USING_HOTPATCH) #define notrace __attribute__((hotpatch(0, 0))) #elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c789b39ed5271..cb8ef4fd0d021 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -286,14 +286,6 @@ config GDB_SCRIPTS endif # DEBUG_INFO -config ENABLE_MUST_CHECK - bool "Enable __must_check logic" - default y - help - Enable the __must_check logic in the kernel build. Disable this to - suppress the "warning: ignoring return value of 'foo', declared with - attribute warn_unused_result" messages. - config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index b50c2085c1ac0..fe07d97df9fa8 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -1,5 +1,4 @@ CONFIG_LOCALVERSION="-debug" -CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_POINTER=y CONFIG_STACK_VALIDATION=y CONFIG_DEBUG_KERNEL=y -- GitLab From 50a4952fd67b7f7f551e82ac07c51c1a7a74d474 Mon Sep 17 00:00:00 2001 From: Alexander Lochmann Date: Thu, 15 Oct 2020 15:24:52 +0200 Subject: [PATCH 0359/1894] Updated locking documentation for transaction_t We used LockDoc to derive locking rules for each member of struct transaction_t. Based on those results, we extended the existing documentation by more members of struct transaction_t, and updated the existing documentation. Link: https://lore.kernel.org/r/10cfbef1-994c-c604-f8a6-b1042fcc622f@tu-dortmund.de Signed-off-by: Alexander Lochmann Signed-off-by: Horst Schirmeier Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 578ff196b3cef..d2a4860feb72f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -538,6 +538,7 @@ struct transaction_chp_stats_s { * The transaction keeps track of all of the buffers modified by a * running transaction, and all of the buffers committed but not yet * flushed to home for finished transactions. + * (Locking Documentation improved by LockDoc) */ /* @@ -658,12 +659,12 @@ struct transaction_s unsigned long t_start; /* - * When commit was requested + * When commit was requested [j_state_lock] */ unsigned long t_requested; /* - * Checkpointing stats [j_checkpoint_sem] + * Checkpointing stats [j_list_lock] */ struct transaction_chp_stats_s t_chp_stats; -- GitLab From 7b721e6d334c9699fcd94553a7fe073ec717d926 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Tue, 20 Oct 2020 19:41:09 +0800 Subject: [PATCH 0360/1894] ext4: remove redundant operation that set bh to NULL The out_fail branch path don't release the bh and the second bh is valid only in the for statement, so we don't need to set them to NULL. Signed-off-by: Kaixu Xia Reviewed-by: zhangyi (F) Link: https://lore.kernel.org/r/1603194069-17557-1-git-send-email-kaixuxia@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94472044f4c1d..40e7aebaf3d0c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4074,7 +4074,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(bh)) { ext4_msg(sb, KERN_ERR, "unable to read superblock"); ret = PTR_ERR(bh); - bh = NULL; goto out_fail; } /* @@ -4703,7 +4702,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "can't read group descriptor %d", i); db_count = i; ret = PTR_ERR(bh); - bh = NULL; goto failed_mount2; } rcu_read_lock(); -- GitLab From 46bac5352929f75c1ec0c2395b06dcbc0fbaee69 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Tue, 20 Oct 2020 16:22:01 +0800 Subject: [PATCH 0361/1894] ext4: remove the null check of bio_vec page bv_page can't be NULL in a valid bio_vec, so we can remove the NULL check, as we did in other places when calling bio_for_each_segment_all() to go through all bio_vec of a bio. Reviewed-by: Jan Kara Signed-off-by: Xianting Tian Link: https://lore.kernel.org/r/20201020082201.34257-1-tian.xianting@h3c.com Signed-off-by: Theodore Ts'o --- fs/ext4/page-io.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index defd2e10dfd10..cb135a94482dd 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -111,9 +111,6 @@ static void ext4_finish_bio(struct bio *bio) unsigned under_io = 0; unsigned long flags; - if (!page) - continue; - if (fscrypt_is_bounce_page(page)) { bounce_page = page; page = fscrypt_pagecache_page(bounce_page); -- GitLab From face525ecb30b3d7e35be21911a933980ab504f9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Oct 2020 14:23:26 +0100 Subject: [PATCH 0362/1894] ext4: remove redundant assignment of variable ex Variable ex is assigned a variable that is not being read, the assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201021132326.148052-1-colin.king@canonical.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 17d7096b3212d..90ba74c146949 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5988,7 +5988,6 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) kfree(path); break; } - ex = path2[path2->p_depth].p_ext; for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { cmp1 = cmp2 = 0; if (i <= path->p_depth) -- GitLab From f177ee0882af031b3d7a1e66e1639a58c7932dee Mon Sep 17 00:00:00 2001 From: Roman Anufriev Date: Thu, 22 Oct 2020 06:20:59 +0300 Subject: [PATCH 0363/1894] ext4: add helpers for checking whether quota can be enabled/is journalled Right now, there are several places, where we check whether fs is capable of enabling quota or if quota is journalled with quite long and non-self-descriptive condition statements. This patch wraps these statements into helpers for better readability and easier usage. Link: https://lore.kernel.org/r/1603336860-16153-1-git-send-email-dotdot@yandex-team.ru Reviewed-by: Jan Kara Signed-off-by: Roman Anufriev Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 15 +++++++++++++++ fs/ext4/ext4_jbd2.h | 9 +++------ fs/ext4/super.c | 5 +---- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 65ecaf96d0a4d..fb4b36585ff69 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3361,6 +3361,21 @@ static inline void ext4_unlock_group(struct super_block *sb, spin_unlock(ext4_group_lock_ptr(sb, group)); } +#ifdef CONFIG_QUOTA +static inline bool ext4_quota_capable(struct super_block *sb) +{ + return (test_opt(sb, QUOTA) || ext4_has_feature_quota(sb)); +} + +static inline bool ext4_is_quota_journalled(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + return (ext4_has_feature_quota(sb) || + sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]); +} +#endif + /* * Block validity checking */ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 00dc668e052b3..a124c68b0c75e 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -86,17 +86,14 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ? 1 : 0) +#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((ext4_quota_capable(sb)) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ -#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ?\ +#define EXT4_QUOTA_INIT_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_INIT_REWRITE) : 0) -#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ?\ +#define EXT4_QUOTA_DEL_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_DEL_REWRITE) : 0) #else diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 40e7aebaf3d0c..d9e1946a634ef 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6210,11 +6210,8 @@ static int ext4_release_dquot(struct dquot *dquot) static int ext4_mark_dquot_dirty(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - /* Are we journaling quotas? */ - if (ext4_has_feature_quota(sb) || - sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + if (ext4_is_quota_journalled(sb)) { dquot_mark_dquot_dirty(dquot); return ext4_write_dquot(dquot); } else { -- GitLab From ca9b404ff137d67e559c5bd77533408bb0fa41dc Mon Sep 17 00:00:00 2001 From: Roman Anufriev Date: Thu, 22 Oct 2020 06:21:00 +0300 Subject: [PATCH 0364/1894] ext4: print quota journalling mode on (re-)mount Right now, it is hard to understand which quota journalling type is enabled: you need to be quite familiar with kernel code and trace it or really understand what different combinations of fs flags/mount options lead to. This patch adds printing of current quota jounalling mode on each mount/remount, thus making it easier to check it at a glance/in autotests. The semantics is similar to ext4 data journalling modes: * journalled - quota configured, journalling will be enabled * writeback - quota configured, journalling won't be enabled * none - quota isn't configured * disabled - kernel compiled without CONFIG_QUOTA feature Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1603336860-16153-2-git-send-email-dotdot@yandex-team.ru Signed-off-by: Roman Anufriev Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d9e1946a634ef..132adb2a2c01d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4005,6 +4005,21 @@ static void ext4_set_resv_clusters(struct super_block *sb) atomic64_set(&sbi->s_resv_clusters, resv_clusters); } +static const char *ext4_quota_mode(struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + if (!ext4_quota_capable(sb)) + return "none"; + + if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb)) + return "journalled"; + else + return "writeback"; +#else + return "disabled"; +#endif +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); @@ -5090,10 +5105,11 @@ no_journal: if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %.*s%s%s", descr, + "Opts: %.*s%s%s. Quota mode: %s.", descr, (int) sizeof(sbi->s_es->s_mount_opts), sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data); + *sbi->s_es->s_mount_opts ? "; " : "", orig_data, + ext4_quota_mode(sb)); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -6031,7 +6047,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) */ *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); - ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", + orig_data, ext4_quota_mode(sb)); kfree(orig_data); return 0; -- GitLab From 837c23fbc1b812f814c75388b6b364349c02efd8 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 7 Nov 2020 23:58:11 +0800 Subject: [PATCH 0365/1894] ext4: use ASSERT() to replace J_ASSERT() There are currently multiple forms of assertion, such as J_ASSERT(). J_ASEERT() is provided for the jbd module, which is a public module. Maybe we should use custom ASSERT() like other file systems, such as xfs, which would be better. Signed-off-by: Chunguang Xu Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1604764698-4269-1-git-send-email-brookxu@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 2 +- fs/ext4/ext4.h | 10 ++++++++++ fs/ext4/fsync.c | 2 +- fs/ext4/indirect.c | 4 ++-- fs/ext4/inode.c | 10 +++++----- fs/ext4/namei.c | 12 ++++-------- fs/ext4/super.c | 2 +- 7 files changed, 24 insertions(+), 18 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1d640b1456375..f45f9feebe593 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -185,7 +185,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start, tmp; - J_ASSERT_BH(bh, buffer_locked(bh)); + ASSERT(buffer_locked(bh)); /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fb4b36585ff69..cd95965be3d17 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -98,6 +98,16 @@ #define ext_debug(ino, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif +#define ASSERT(assert) \ +do { \ + if (unlikely(!(assert))) { \ + printk(KERN_EMERG \ + "Assertion failure in %s() at %s:%d: '%s'\n", \ + __func__, __FILE__, __LINE__, #assert); \ + BUG(); \ + } \ +} while (0) + /* data type for block offset of block group */ typedef int ext4_grpblk_t; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a42ca95840f28..113bfb023a4a0 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -136,7 +136,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (unlikely(ext4_forced_shutdown(sbi))) return -EIO; - J_ASSERT(ext4_journal_current_handle() == NULL); + ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file_enter(file, datasync); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 05efa682bc2f9..1223a18c3ff9a 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -534,8 +534,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t first_block = 0; trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); - J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); - J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); + ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); + ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0d8385aea8981..b147c2e20469b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -830,8 +830,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, int create = map_flags & EXT4_GET_BLOCKS_CREATE; int err; - J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - || handle != NULL || create == 0); + ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + || handle != NULL || create == 0); map.m_lblk = block; map.m_len = 1; @@ -846,9 +846,9 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, if (unlikely(!bh)) return ERR_PTR(-ENOMEM); if (map.m_flags & EXT4_MAP_NEW) { - J_ASSERT(create != 0); - J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - || (handle != NULL)); + ASSERT(create != 0); + ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + || (handle != NULL)); /* * Now that we do not always journal data, we should diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 33509266f5a00..7a890ff214f1a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -182,10 +182,6 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } -#ifndef assert -#define assert(test) J_ASSERT(test) -#endif - #ifdef DX_DEBUG #define dxtrace(command) command #else @@ -849,7 +845,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, break; } } - assert (at == p - 1); + ASSERT(at == p - 1); } at = p - 1; @@ -1265,8 +1261,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) struct dx_entry *old = frame->at, *new = old + 1; int count = dx_get_count(entries); - assert(count < dx_get_limit(entries)); - assert(old < entries + count); + ASSERT(count < dx_get_limit(entries)); + ASSERT(old < entries + count); memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); dx_set_hash(new, hash); dx_set_block(new, block); @@ -2961,7 +2957,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * hold i_mutex, or the inode can not be referenced from outside, * so i_nlink should not be bumped due to race */ - J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 132adb2a2c01d..f86220a8df50a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1240,7 +1240,7 @@ static void ext4_put_super(struct super_block *sb) * in-memory list had better be clean by this point. */ if (!list_empty(&sbi->s_orphan)) dump_orphan_list(sb, sbi); - J_ASSERT(list_empty(&sbi->s_orphan)); + ASSERT(list_empty(&sbi->s_orphan)); sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); -- GitLab From 6bd97bf273bdb4944904e57480f6545bca48ad77 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 7 Nov 2020 23:58:12 +0800 Subject: [PATCH 0366/1894] ext4: remove redundant mb_regenerate_buddy() After this patch (163a203), if an abnormal bitmap is detected, we will mark the group as corrupt, and we will not use this group in the future. Therefore, it should be meaningless to regenerate the buddy bitmap of this group, It might be better to delete it. Signed-off-by: Chunguang Xu Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1604764698-4269-2-git-send-email-brookxu@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 24af9ed5c3e52..5b745551e7a96 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -822,24 +822,6 @@ void ext4_mb_generate_buddy(struct super_block *sb, spin_unlock(&sbi->s_bal_lock); } -static void mb_regenerate_buddy(struct ext4_buddy *e4b) -{ - int count; - int order = 1; - void *buddy; - - while ((buddy = mb_find_buddy(e4b, order++, &count))) { - ext4_set_bits(buddy, 0, count); - } - e4b->bd_info->bb_fragments = 0; - memset(e4b->bd_info->bb_counters, 0, - sizeof(*e4b->bd_info->bb_counters) * - (e4b->bd_sb->s_blocksize_bits + 2)); - - ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, - e4b->bd_bitmap, e4b->bd_group); -} - /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1512,7 +1494,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); } - mb_regenerate_buddy(e4b); goto done; } -- GitLab From ce3cca337401123c9cf96896fc4e1657bb016bd3 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 7 Nov 2020 23:58:13 +0800 Subject: [PATCH 0367/1894] ext4: simplify the code of mb_find_order_for_block The code of mb_find_order_for_block is a bit obscure, but we can simplify it with mb_find_buddy(), make the code more concise. Signed-off-by: Chunguang Xu Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1604764698-4269-3-git-send-email-brookxu@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 5b745551e7a96..29dfeb043050c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1289,22 +1289,18 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) { - int order = 1; - int bb_incr = 1 << (e4b->bd_blkbits - 1); + int order = 1, max; void *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); - bb = e4b->bd_buddy; while (order <= e4b->bd_blkbits + 1) { - block = block >> 1; - if (!mb_test_bit(block, bb)) { + bb = mb_find_buddy(e4b, order, &max); + if (!mb_test_bit(block >> order, bb)) { /* this block is part of buddy of order 'order' */ return order; } - bb += bb_incr; - bb_incr >>= 1; order++; } return 0; -- GitLab From dee734a7de9169018b8108208587d3ff1fdfff18 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 30 Nov 2020 09:39:59 -0500 Subject: [PATCH 0368/1894] KVM: x86: adjust SEV for commit 7e8e6eed75e Since the ASID is now stored in svm->asid, pre_sev_run should also place it there and not directly in the VMCB control area. Reported-by: Ashish Kalra Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b14106258a7..3418bb18dae7f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1187,7 +1187,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ - svm->vmcb->control.asid = asid; + svm->asid = asid; /* * Flush guest TLB: -- GitLab From 94558543213ae8c83be5d01b83c1fe7530e8a1a0 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Mon, 17 Aug 2020 19:07:27 +0800 Subject: [PATCH 0369/1894] KVM: arm64: Some fixes of PV-time interface document Rename PV_FEATURES to PV_TIME_FEATURES. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200817110728.12196-2-zhukeqian1@huawei.com --- Documentation/virt/kvm/arm/pvtime.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst index 687b60d76ca90..392521af7c905 100644 --- a/Documentation/virt/kvm/arm/pvtime.rst +++ b/Documentation/virt/kvm/arm/pvtime.rst @@ -19,8 +19,8 @@ Two new SMCCC compatible hypercalls are defined: These are only available in the SMC64/HVC64 calling convention as paravirtualized time is not available to 32 bit Arm guests. The existence of -the PV_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES -mechanism before calling it. +the PV_TIME_FEATURES hypercall should be probed using the SMCCC 1.1 +ARCH_FEATURES mechanism before calling it. PV_TIME_FEATURES ============= ======== ========== -- GitLab From 652d0b701d136ede6bc8a977b3abbe2d420226b9 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Mon, 17 Aug 2020 19:07:28 +0800 Subject: [PATCH 0370/1894] KVM: arm64: Use kvm_write_guest_lock when init stolen time There is a lock version kvm_write_guest. Use it to simplify code. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200817110728.12196-3-zhukeqian1@huawei.com --- arch/arm64/kvm/pvtime.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 920ac43077ad3..78a09f7a66373 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -53,7 +53,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) struct pvclock_vcpu_stolen_time init_values = {}; struct kvm *kvm = vcpu->kvm; u64 base = vcpu->arch.steal.base; - int idx; if (base == GPA_INVALID) return base; @@ -63,10 +62,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) * the feature enabled. */ vcpu->arch.steal.last_steal = current->sched_info.run_delay; - - idx = srcu_read_lock(&kvm->srcu); - kvm_write_guest(kvm, base, &init_values, sizeof(init_values)); - srcu_read_unlock(&kvm->srcu, idx); + kvm_write_guest_lock(kvm, base, &init_values, sizeof(init_values)); return base; } -- GitLab From 9c7957991e56291c59803cf0412127ae7177beac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Nov 2020 21:11:06 +0100 Subject: [PATCH 0371/1894] rtc: enable RTC framework on ARCH=um There's no real reason it should be disabled, and at least we can use it for development & testing with the RTC test driver. However, two devices are missing a HAS_IOMEM dependency, so add that to avoid build failures from e.g. allyesconfig. Signed-off-by: Johannes Berg Signed-off-by: Alexandre Belloni Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201120211103.6895ac740d11.Ic19a9926e8e4c70c03329e55f9e5b1d45095b904@changeid --- drivers/rtc/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 4d2c5d1f75cce..172b4117ace5f 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -13,7 +13,7 @@ config RTC_MC146818_LIB menuconfig RTC_CLASS bool "Real Time Clock" default n - depends on !S390 && !UML + depends on !S390 select RTC_LIB help Generic RTC class support. If you say yes here, you will @@ -1019,6 +1019,7 @@ config RTC_DRV_DS1553 config RTC_DRV_DS1685_FAMILY tristate "Dallas/Maxim DS1685 Family" + depends on HAS_IOMEM help If you say yes here you get support for the Dallas/Maxim DS1685 family of real time chips. This family includes the DS1685/DS1687, @@ -1152,6 +1153,7 @@ config RTC_DRV_STK17TA8 config RTC_DRV_M48T86 tristate "ST M48T86/Dallas DS12887" + depends on HAS_IOMEM help If you say Y here you will get support for the ST M48T86 and Dallas DS12887 RTC chips. -- GitLab From 0020868f2a7037e87d6b3b196526de2fb885830d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sat, 21 Nov 2020 23:45:29 +0100 Subject: [PATCH 0372/1894] rtc: mxc{,_v2}: enable COMPILE_TEST Extend code coverage for the rtc-mxc and rtc-mxc-v2 drivers. Signed-off-by: Alexandre Belloni Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20201121224529.568237-1-alexandre.belloni@bootlin.com --- drivers/rtc/Kconfig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 172b4117ace5f..6123f9f4fbc90 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1756,7 +1756,9 @@ config RTC_DRV_LOONGSON1 config RTC_DRV_MXC tristate "Freescale MXC Real Time Clock" - depends on ARCH_MXC + depends on ARCH_MXC || COMPILE_TEST + depends on HAS_IOMEM + depends on OF help If you say yes here you get support for the Freescale MXC RTC module. @@ -1766,7 +1768,9 @@ config RTC_DRV_MXC config RTC_DRV_MXC_V2 tristate "Freescale MXC Real Time Clock for i.MX53" - depends on ARCH_MXC + depends on ARCH_MXC || COMPILE_TEST + depends on HAS_IOMEM + depends on OF help If you say yes here you get support for the Freescale MXC SRTC module in i.MX53 processor. -- GitLab From 1ae20eb1eca733c640f6d478bbd57c1d814b6064 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 22 Nov 2020 00:06:44 +0100 Subject: [PATCH 0373/1894] rtc: test: remove debug message Remove leftover debug message Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201121230644.572419-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index b092a16485139..7e0d8fb264655 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -50,7 +50,6 @@ static int test_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (expires > U32_MAX) expires = U32_MAX; - pr_err("ABE: %s +%d %s\n", __FILE__, __LINE__, __func__); rtd->alarm.expires = expires; if (alrm->enabled) -- GitLab From 00c33482bb6110bce8110daa351f9b3baf4df7dc Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 1 Dec 2020 12:55:07 +0300 Subject: [PATCH 0374/1894] rtc: ep93xx: Fix NULL pointer dereference in ep93xx_rtc_read_time Mismatch in probe platform_set_drvdata set's and method's that call dev_get_platdata will result in "Unable to handle kernel NULL pointer dereference", let's use according method for getting driver data after platform_set_drvdata. 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = (ptrval) [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 5.9.10-00003-g723e101e0037-dirty #4 Hardware name: Technologic Systems TS-72xx SBC PC is at ep93xx_rtc_read_time+0xc/0x2c LR is at __rtc_read_time+0x4c/0x8c [...] [] (ep93xx_rtc_read_time) from [] (__rtc_read_time+0x4c/0x8c) [] (__rtc_read_time) from [] (rtc_read_time+0x2c/0x4c) [] (rtc_read_time) from [] (__rtc_read_alarm+0x28/0x358) [] (__rtc_read_alarm) from [] (__rtc_register_device+0x124/0x2ec) [] (__rtc_register_device) from [] (ep93xx_rtc_probe+0xa4/0xac) [] (ep93xx_rtc_probe) from [] (platform_drv_probe+0x24/0x5c) [] (platform_drv_probe) from [] (really_probe+0x218/0x374) [] (really_probe) from [] (device_driver_attach+0x44/0x60) [] (device_driver_attach) from [] (__driver_attach+0xb4/0xc0) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0xac) [] (bus_for_each_dev) from [] (driver_attach+0x18/0x24) [] (driver_attach) from [] (bus_add_driver+0x150/0x1b4) [] (bus_add_driver) from [] (driver_register+0xb0/0xf4) [] (driver_register) from [] (__platform_driver_register+0x30/0x48) [] (__platform_driver_register) from [] (ep93xx_rtc_driver_init+0x10/0x1c) [] (ep93xx_rtc_driver_init) from [] (do_one_initcall+0x7c/0x1c0) [] (do_one_initcall) from [] (kernel_init_freeable+0x168/0x1ac) [] (kernel_init_freeable) from [] (kernel_init+0x8/0xf4) [] (kernel_init) from [] (ret_from_fork+0x14/0x34) Exception stack(0xc441dfb0 to 0xc441dff8) dfa0: 00000000 00000000 00000000 00000000 dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 dfe0: 00000000 00000000 00000000 00000000 00000013 00000000 Code: e12fff1e e92d4010 e590303c e1a02001 (e5933000) ---[ end trace c914d6030eaa95c8 ]--- Fixes: b809d192eb98 ("rtc: ep93xx: stop setting platform_data") Signed-off-by: Nikita Shubin Signed-off-by: Alexandre Belloni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201201095507.10317-1-nikita.shubin@maquefel.me --- drivers/rtc/rtc-ep93xx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 9a5a15cbcd9b7..acae7f16808f7 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -33,7 +33,7 @@ struct ep93xx_rtc { static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, unsigned short *delete) { - struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev); + struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev); unsigned long comp; comp = readl(ep93xx_rtc->mmio_base + EP93XX_RTC_SWCOMP); @@ -51,7 +51,7 @@ static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm) { - struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev); + struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev); unsigned long time; time = readl(ep93xx_rtc->mmio_base + EP93XX_RTC_DATA); @@ -62,7 +62,7 @@ static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm) static int ep93xx_rtc_set_time(struct device *dev, struct rtc_time *tm) { - struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev); + struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev); unsigned long secs = rtc_tm_to_time64(tm); writel(secs + 1, ep93xx_rtc->mmio_base + EP93XX_RTC_LOAD); -- GitLab From 31b16d978f902bd9ac7fdc20738f67e39959cd5c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 2 Dec 2020 12:13:17 +0100 Subject: [PATCH 0375/1894] rtc: s3c: Disable all enable (RTC, tick) bits in the probe Bootloader might use RTC hardware and leave it in the enabled state. Ensure that the potentially enabled periodic tick interrupts are disabled before enabling the driver, because they might cause lockup if tick interrupt happens after disabling RTC gate clock. Signed-off-by: Marek Szyprowski Signed-off-by: Alexandre Belloni Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201202111318.5353-1-m.szyprowski@samsung.com --- drivers/rtc/rtc-s3c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 24a41909f049f..16a326e95d46e 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -503,6 +503,10 @@ static int s3c_rtc_probe(struct platform_device *pdev) goto err_src_clk; } + /* disable RTC enable bits potentially set by the bootloader */ + if (info->data->disable) + info->data->disable(info); + /* check to see if everything is setup correctly */ if (info->data->enable) info->data->enable(info); -- GitLab From ce9af89392024f57247187afc345991b784f9bae Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 2 Dec 2020 12:13:18 +0100 Subject: [PATCH 0376/1894] rtc: s3c: Remove dead code related to periodic tick handling Support for periodic tick interrupts has been moved from the RTC class to the HR-timers long time ago. Then it has been removed from this driver by commits 80d4bb515b78 ("RTC: Cleanup rtc_class_ops->irq_set_state") and 696160fec162 ("RTC: Cleanup rtc_class_ops->irq_set_freq()"). They however did not remove all the code related to the tick handling. Do it now then. Signed-off-by: Marek Szyprowski Signed-off-by: Alexandre Belloni Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201202111318.5353-2-m.szyprowski@samsung.com --- drivers/rtc/rtc-s3c.c | 226 +----------------------------------------- 1 file changed, 2 insertions(+), 224 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 16a326e95d46e..fab326ba9cece 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -42,26 +42,15 @@ struct s3c_rtc { const struct s3c_rtc_data *data; int irq_alarm; - int irq_tick; - - spinlock_t pie_lock; spinlock_t alarm_lock; - int ticnt_save; - int ticnt_en_save; bool wake_en; }; struct s3c_rtc_data { - int max_user_freq; bool needs_src_clk; void (*irq_handler) (struct s3c_rtc *info, int mask); - void (*set_freq) (struct s3c_rtc *info, int freq); - void (*enable_tick) (struct s3c_rtc *info, struct seq_file *seq); - void (*select_tick_clk) (struct s3c_rtc *info); - void (*save_tick_cnt) (struct s3c_rtc *info); - void (*restore_tick_cnt) (struct s3c_rtc *info); void (*enable) (struct s3c_rtc *info); void (*disable) (struct s3c_rtc *info); }; @@ -91,17 +80,7 @@ static void s3c_rtc_disable_clk(struct s3c_rtc *info) clk_disable(info->rtc_clk); } -/* IRQ Handlers */ -static irqreturn_t s3c_rtc_tickirq(int irq, void *id) -{ - struct s3c_rtc *info = (struct s3c_rtc *)id; - - if (info->data->irq_handler) - info->data->irq_handler(info, S3C2410_INTP_TIC); - - return IRQ_HANDLED; -} - +/* IRQ Handler */ static irqreturn_t s3c_rtc_alarmirq(int irq, void *id) { struct s3c_rtc *info = (struct s3c_rtc *)id; @@ -148,28 +127,6 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) return ret; } -/* Set RTC frequency */ -static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq) -{ - int ret; - - if (!is_power_of_2(freq)) - return -EINVAL; - - ret = s3c_rtc_enable_clk(info); - if (ret) - return ret; - spin_lock_irq(&info->pie_lock); - - if (info->data->set_freq) - info->data->set_freq(info, freq); - - spin_unlock_irq(&info->pie_lock); - s3c_rtc_disable_clk(info); - - return 0; -} - /* Time read/write */ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) { @@ -348,29 +305,11 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) return 0; } -static int s3c_rtc_proc(struct device *dev, struct seq_file *seq) -{ - struct s3c_rtc *info = dev_get_drvdata(dev); - int ret; - - ret = s3c_rtc_enable_clk(info); - if (ret) - return ret; - - if (info->data->enable_tick) - info->data->enable_tick(info, seq); - - s3c_rtc_disable_clk(info); - - return 0; -} - static const struct rtc_class_ops s3c_rtcops = { .read_time = s3c_rtc_gettime, .set_time = s3c_rtc_settime, .read_alarm = s3c_rtc_getalarm, .set_alarm = s3c_rtc_setalarm, - .proc = s3c_rtc_proc, .alarm_irq_enable = s3c_rtc_setaie, }; @@ -450,18 +389,12 @@ static int s3c_rtc_probe(struct platform_device *pdev) if (!info) return -ENOMEM; - /* find the IRQs */ - info->irq_tick = platform_get_irq(pdev, 1); - if (info->irq_tick < 0) - return info->irq_tick; - info->dev = &pdev->dev; info->data = of_device_get_match_data(&pdev->dev); if (!info->data) { dev_err(&pdev->dev, "failed getting s3c_rtc_data\n"); return -EINVAL; } - spin_lock_init(&info->pie_lock); spin_lock_init(&info->alarm_lock); platform_set_drvdata(pdev, info); @@ -470,8 +403,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) if (info->irq_alarm < 0) return info->irq_alarm; - dev_dbg(&pdev->dev, "s3c2410_rtc: tick irq %d, alarm irq %d\n", - info->irq_tick, info->irq_alarm); + dev_dbg(&pdev->dev, "s3c2410_rtc: alarm irq %d\n", info->irq_alarm); /* get the memory region */ info->base = devm_platform_ioremap_resource(pdev, 0); @@ -546,18 +478,6 @@ static int s3c_rtc_probe(struct platform_device *pdev) goto err_nortc; } - ret = devm_request_irq(&pdev->dev, info->irq_tick, s3c_rtc_tickirq, - 0, "s3c2410-rtc tick", info); - if (ret) { - dev_err(&pdev->dev, "IRQ%d error %d\n", info->irq_tick, ret); - goto err_nortc; - } - - if (info->data->select_tick_clk) - info->data->select_tick_clk(info); - - s3c_rtc_setfreq(info, 1); - s3c_rtc_disable_clk(info); return 0; @@ -585,10 +505,6 @@ static int s3c_rtc_suspend(struct device *dev) if (ret) return ret; - /* save TICNT for anyone using periodic interrupts */ - if (info->data->save_tick_cnt) - info->data->save_tick_cnt(info); - if (info->data->disable) info->data->disable(info); @@ -609,9 +525,6 @@ static int s3c_rtc_resume(struct device *dev) if (info->data->enable) info->data->enable(info); - if (info->data->restore_tick_cnt) - info->data->restore_tick_cnt(info); - s3c_rtc_disable_clk(info); if (device_may_wakeup(dev) && info->wake_en) { @@ -635,162 +548,27 @@ static void s3c6410_rtc_irq(struct s3c_rtc *info, int mask) writeb(mask, info->base + S3C2410_INTP); } -static void s3c2410_rtc_setfreq(struct s3c_rtc *info, int freq) -{ - unsigned int tmp = 0; - int val; - - tmp = readb(info->base + S3C2410_TICNT); - tmp &= S3C2410_TICNT_ENABLE; - - val = (info->rtc->max_user_freq / freq) - 1; - tmp |= val; - - writel(tmp, info->base + S3C2410_TICNT); -} - -static void s3c2416_rtc_setfreq(struct s3c_rtc *info, int freq) -{ - unsigned int tmp = 0; - int val; - - tmp = readb(info->base + S3C2410_TICNT); - tmp &= S3C2410_TICNT_ENABLE; - - val = (info->rtc->max_user_freq / freq) - 1; - - tmp |= S3C2443_TICNT_PART(val); - writel(S3C2443_TICNT1_PART(val), info->base + S3C2443_TICNT1); - - writel(S3C2416_TICNT2_PART(val), info->base + S3C2416_TICNT2); - - writel(tmp, info->base + S3C2410_TICNT); -} - -static void s3c2443_rtc_setfreq(struct s3c_rtc *info, int freq) -{ - unsigned int tmp = 0; - int val; - - tmp = readb(info->base + S3C2410_TICNT); - tmp &= S3C2410_TICNT_ENABLE; - - val = (info->rtc->max_user_freq / freq) - 1; - - tmp |= S3C2443_TICNT_PART(val); - writel(S3C2443_TICNT1_PART(val), info->base + S3C2443_TICNT1); - - writel(tmp, info->base + S3C2410_TICNT); -} - -static void s3c6410_rtc_setfreq(struct s3c_rtc *info, int freq) -{ - int val; - - val = (info->rtc->max_user_freq / freq) - 1; - writel(val, info->base + S3C2410_TICNT); -} - -static void s3c24xx_rtc_enable_tick(struct s3c_rtc *info, struct seq_file *seq) -{ - unsigned int ticnt; - - ticnt = readb(info->base + S3C2410_TICNT); - ticnt &= S3C2410_TICNT_ENABLE; - - seq_printf(seq, "periodic_IRQ\t: %s\n", ticnt ? "yes" : "no"); -} - -static void s3c2416_rtc_select_tick_clk(struct s3c_rtc *info) -{ - unsigned int con; - - con = readw(info->base + S3C2410_RTCCON); - con |= S3C2443_RTCCON_TICSEL; - writew(con, info->base + S3C2410_RTCCON); -} - -static void s3c6410_rtc_enable_tick(struct s3c_rtc *info, struct seq_file *seq) -{ - unsigned int ticnt; - - ticnt = readw(info->base + S3C2410_RTCCON); - ticnt &= S3C64XX_RTCCON_TICEN; - - seq_printf(seq, "periodic_IRQ\t: %s\n", ticnt ? "yes" : "no"); -} - -static void s3c24xx_rtc_save_tick_cnt(struct s3c_rtc *info) -{ - info->ticnt_save = readb(info->base + S3C2410_TICNT); -} - -static void s3c24xx_rtc_restore_tick_cnt(struct s3c_rtc *info) -{ - writeb(info->ticnt_save, info->base + S3C2410_TICNT); -} - -static void s3c6410_rtc_save_tick_cnt(struct s3c_rtc *info) -{ - info->ticnt_en_save = readw(info->base + S3C2410_RTCCON); - info->ticnt_en_save &= S3C64XX_RTCCON_TICEN; - info->ticnt_save = readl(info->base + S3C2410_TICNT); -} - -static void s3c6410_rtc_restore_tick_cnt(struct s3c_rtc *info) -{ - unsigned int con; - - writel(info->ticnt_save, info->base + S3C2410_TICNT); - if (info->ticnt_en_save) { - con = readw(info->base + S3C2410_RTCCON); - writew(con | info->ticnt_en_save, info->base + S3C2410_RTCCON); - } -} - static struct s3c_rtc_data const s3c2410_rtc_data = { - .max_user_freq = 128, .irq_handler = s3c24xx_rtc_irq, - .set_freq = s3c2410_rtc_setfreq, - .enable_tick = s3c24xx_rtc_enable_tick, - .save_tick_cnt = s3c24xx_rtc_save_tick_cnt, - .restore_tick_cnt = s3c24xx_rtc_restore_tick_cnt, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; static struct s3c_rtc_data const s3c2416_rtc_data = { - .max_user_freq = 32768, .irq_handler = s3c24xx_rtc_irq, - .set_freq = s3c2416_rtc_setfreq, - .enable_tick = s3c24xx_rtc_enable_tick, - .select_tick_clk = s3c2416_rtc_select_tick_clk, - .save_tick_cnt = s3c24xx_rtc_save_tick_cnt, - .restore_tick_cnt = s3c24xx_rtc_restore_tick_cnt, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; static struct s3c_rtc_data const s3c2443_rtc_data = { - .max_user_freq = 32768, .irq_handler = s3c24xx_rtc_irq, - .set_freq = s3c2443_rtc_setfreq, - .enable_tick = s3c24xx_rtc_enable_tick, - .select_tick_clk = s3c2416_rtc_select_tick_clk, - .save_tick_cnt = s3c24xx_rtc_save_tick_cnt, - .restore_tick_cnt = s3c24xx_rtc_restore_tick_cnt, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; static struct s3c_rtc_data const s3c6410_rtc_data = { - .max_user_freq = 32768, .needs_src_clk = true, .irq_handler = s3c6410_rtc_irq, - .set_freq = s3c6410_rtc_setfreq, - .enable_tick = s3c6410_rtc_enable_tick, - .save_tick_cnt = s3c6410_rtc_save_tick_cnt, - .restore_tick_cnt = s3c6410_rtc_restore_tick_cnt, .enable = s3c24xx_rtc_enable, .disable = s3c6410_rtc_disable, }; -- GitLab From d8b369c4e31430a4746571bcae45a98933827232 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:40:57 +0000 Subject: [PATCH 0377/1894] KVM: arm64: Add kvm-arm.mode early kernel parameter Add an early parameter that allows users to select the mode of operation for KVM/arm64. For now, the only supported value is "protected". By passing this flag users opt into the hypervisor placing additional restrictions on the host kernel. These allow the hypervisor to spawn guests whose state is kept private from the host. Restrictions will include stage-2 address translation to prevent host from accessing guest memory, filtering its SMC calls, etc. Without this parameter, the default behaviour remains selecting VHE/nVHE based on hardware support and CONFIG_ARM64_VHE. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-2-dbrazdil@google.com --- Documentation/admin-guide/kernel-parameters.txt | 10 ++++++++++ arch/arm64/include/asm/kvm_host.h | 9 +++++++++ arch/arm64/kvm/arm.c | 16 ++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 526d65d8573a4..ee9f137763888 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2259,6 +2259,16 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.mode= + [KVM,ARM] Select one of KVM/arm64's modes of operation. + + protected: nVHE-based mode with support for guests whose + state is kept private from the host. + Not valid if the kernel is running in EL2. + + Defaults to VHE/nVHE based on hardware support and + the value of CONFIG_ARM64_VHE. + kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 system registers diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 781d029b8aa85..820954ad24776 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -50,6 +50,15 @@ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) +/* + * Mode of operation configurable with kvm-arm.mode early param. + * See Documentation/admin-guide/kernel-parameters.txt for more information. + */ +enum kvm_mode { + KVM_MODE_DEFAULT, + KVM_MODE_PROTECTED, +}; + DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e9..5db90680742c2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -46,6 +46,8 @@ __asm__(".arch_extension virt"); #endif +static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); @@ -1790,6 +1792,20 @@ void kvm_arch_exit(void) kvm_perf_teardown(); } +static int __init early_kvm_mode_cfg(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "protected") == 0) { + kvm_mode = KVM_MODE_PROTECTED; + return 0; + } + + return -EINVAL; +} +early_param("kvm-arm.mode", early_kvm_mode_cfg); + static int arm_init(void) { int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); -- GitLab From 3eb681fba2bf8b67b65ce92d0ebfd7cbfc263da9 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:40:58 +0000 Subject: [PATCH 0378/1894] KVM: arm64: Add ARM64_KVM_PROTECTED_MODE CPU capability Expose the boolean value whether the system is running with KVM in protected mode (nVHE + kernel param). CPU capability was selected over a global variable to allow use in alternatives. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-3-dbrazdil@google.com --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/virt.h | 8 ++++++++ arch/arm64/kernel/cpufeature.c | 22 ++++++++++++++++++++++ arch/arm64/kvm/arm.c | 9 ++++++++- 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index a7242ef2a2cd6..350c98103d45e 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -65,7 +65,8 @@ #define ARM64_MTE 57 #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 +#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_NCAPS 60 +#define ARM64_NCAPS 61 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 820954ad24776..7119d38e3c560 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,6 +58,7 @@ enum kvm_mode { KVM_MODE_DEFAULT, KVM_MODE_PROTECTED, }; +enum kvm_mode kvm_get_mode(void); DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 6069be50baf9f..eb81dcc220b65 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -97,6 +97,14 @@ static __always_inline bool has_vhe(void) return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); } +static __always_inline bool is_protected_kvm_enabled(void) +{ + if (is_vhe_hyp_code()) + return false; + else + return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); +} + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cffcb0198bae8..0c029adf72d33 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -1703,6 +1704,21 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +#ifdef CONFIG_KVM +static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) +{ + if (kvm_get_mode() != KVM_MODE_PROTECTED) + return false; + + if (is_kernel_in_hyp_mode()) { + pr_warn("Protected KVM not available with VHE\n"); + return false; + } + + return true; +} +#endif /* CONFIG_KVM */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -1794,6 +1810,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL1_SHIFT, .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT, }, + { + .desc = "Protected KVM", + .capability = ARM64_KVM_PROTECTED_MODE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = is_kvm_protected_mode, + }, #endif { .desc = "Kernel page table isolation (KPTI)", diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5db90680742c2..745e1b4bcbe05 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1771,7 +1771,9 @@ int kvm_arch_init(void *opaque) if (err) goto out_hyp; - if (in_hyp_mode) + if (is_protected_kvm_enabled()) + kvm_info("Protected nVHE mode initialized successfully\n"); + else if (in_hyp_mode) kvm_info("VHE mode initialized successfully\n"); else kvm_info("Hyp mode initialized successfully\n"); @@ -1806,6 +1808,11 @@ static int __init early_kvm_mode_cfg(char *arg) } early_param("kvm-arm.mode", early_kvm_mode_cfg); +enum kvm_mode kvm_get_mode(void) +{ + return kvm_mode; +} + static int arm_init(void) { int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); -- GitLab From e6dd9d89a64e30b25339d0dbe5c5aa589db8d530 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:40:59 +0000 Subject: [PATCH 0379/1894] psci: Support psci_ops.get_version for v0.1 KVM's host PSCI SMC filter needs to be aware of the PSCI version of the system but currently it is impossible to distinguish between v0.1 and PSCI disabled because both have get_version == NULL. Populate get_version for v0.1 with a function that returns a constant. psci_opt.get_version is currently unused so this has no effect on existing functionality. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20201202184122.26046-4-dbrazdil@google.com --- drivers/firmware/psci/psci.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 00af99b6f97c1..ace5b9ac676c6 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -146,7 +146,12 @@ static int psci_to_linux_errno(int errno) return -EINVAL; } -static u32 psci_get_version(void) +static u32 psci_0_1_get_version(void) +{ + return PSCI_VERSION(0, 1); +} + +static u32 psci_0_2_get_version(void) { return invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); } @@ -421,7 +426,7 @@ static void __init psci_init_smccc(void) static void __init psci_0_2_set_functions(void) { pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_ops.get_version = psci_get_version; + psci_ops.get_version = psci_0_2_get_version; psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_FN_NATIVE(0_2, CPU_SUSPEND); @@ -450,7 +455,7 @@ static void __init psci_0_2_set_functions(void) */ static int __init psci_probe(void) { - u32 ver = psci_get_version(); + u32 ver = psci_0_2_get_version(); pr_info("PSCIv%d.%d detected in firmware.\n", PSCI_VERSION_MAJOR(ver), @@ -514,6 +519,8 @@ static int __init psci_0_1_init(struct device_node *np) pr_info("Using PSCI v0.1 Function IDs from DT\n"); + psci_ops.get_version = psci_0_1_get_version; + if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; psci_ops.cpu_suspend = psci_cpu_suspend; -- GitLab From 0bc7474fb7673422b134e88feb49cde54b22bb75 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:00 +0000 Subject: [PATCH 0380/1894] psci: Split functions to v0.1 and v0.2+ variants Refactor implementation of v0.1+ functions (CPU_SUSPEND, CPU_OFF, CPU_ON, MIGRATE) to have two functions psci_0_1_foo / psci_0_2_foo that select the function ID and call a common helper __psci_foo. This is a small cleanup so that the function ID array is only used for v0.1 configurations. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20201202184122.26046-5-dbrazdil@google.com --- drivers/firmware/psci/psci.c | 94 +++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 34 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index ace5b9ac676c6..13b9ed71b4461 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -168,46 +168,80 @@ int psci_set_osi_mode(bool enable) return psci_to_linux_errno(err); } -static int psci_cpu_suspend(u32 state, unsigned long entry_point) +static int __psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; err = invoke_psci_fn(fn, state, entry_point, 0); return psci_to_linux_errno(err); } -static int psci_cpu_off(u32 state) +static int psci_0_1_cpu_suspend(u32 state, unsigned long entry_point) +{ + return __psci_cpu_suspend(psci_function_id[PSCI_FN_CPU_SUSPEND], + state, entry_point); +} + +static int psci_0_2_cpu_suspend(u32 state, unsigned long entry_point) +{ + return __psci_cpu_suspend(PSCI_FN_NATIVE(0_2, CPU_SUSPEND), + state, entry_point); +} + +static int __psci_cpu_off(u32 fn, u32 state) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_CPU_OFF]; err = invoke_psci_fn(fn, state, 0, 0); return psci_to_linux_errno(err); } -static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +static int psci_0_1_cpu_off(u32 state) +{ + return __psci_cpu_off(psci_function_id[PSCI_FN_CPU_OFF], state); +} + +static int psci_0_2_cpu_off(u32 state) +{ + return __psci_cpu_off(PSCI_0_2_FN_CPU_OFF, state); +} + +static int __psci_cpu_on(u32 fn, unsigned long cpuid, unsigned long entry_point) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_CPU_ON]; err = invoke_psci_fn(fn, cpuid, entry_point, 0); return psci_to_linux_errno(err); } -static int psci_migrate(unsigned long cpuid) +static int psci_0_1_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + return __psci_cpu_on(psci_function_id[PSCI_FN_CPU_ON], cpuid, entry_point); +} + +static int psci_0_2_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + return __psci_cpu_on(PSCI_FN_NATIVE(0_2, CPU_ON), cpuid, entry_point); +} + +static int __psci_migrate(u32 fn, unsigned long cpuid) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_MIGRATE]; err = invoke_psci_fn(fn, cpuid, 0, 0); return psci_to_linux_errno(err); } +static int psci_0_1_migrate(unsigned long cpuid) +{ + return __psci_migrate(psci_function_id[PSCI_FN_MIGRATE], cpuid); +} + +static int psci_0_2_migrate(unsigned long cpuid) +{ + return __psci_migrate(PSCI_FN_NATIVE(0_2, MIGRATE), cpuid); +} + static int psci_affinity_info(unsigned long target_affinity, unsigned long lowest_affinity_level) { @@ -352,7 +386,7 @@ static void __init psci_init_system_suspend(void) static void __init psci_init_cpu_suspend(void) { - int feature = psci_features(psci_function_id[PSCI_FN_CPU_SUSPEND]); + int feature = psci_features(PSCI_FN_NATIVE(0_2, CPU_SUSPEND)); if (feature != PSCI_RET_NOT_SUPPORTED) psci_cpu_suspend_feature = feature; @@ -426,24 +460,16 @@ static void __init psci_init_smccc(void) static void __init psci_0_2_set_functions(void) { pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_ops.get_version = psci_0_2_get_version; - - psci_function_id[PSCI_FN_CPU_SUSPEND] = - PSCI_FN_NATIVE(0_2, CPU_SUSPEND); - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON); - psci_ops.cpu_on = psci_cpu_on; - psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE); - psci_ops.migrate = psci_migrate; - - psci_ops.affinity_info = psci_affinity_info; - - psci_ops.migrate_info_type = psci_migrate_info_type; + psci_ops = (struct psci_operations){ + .get_version = psci_0_2_get_version, + .cpu_suspend = psci_0_2_cpu_suspend, + .cpu_off = psci_0_2_cpu_off, + .cpu_on = psci_0_2_cpu_on, + .migrate = psci_0_2_migrate, + .affinity_info = psci_affinity_info, + .migrate_info_type = psci_migrate_info_type, + }; arm_pm_restart = psci_sys_reset; @@ -523,22 +549,22 @@ static int __init psci_0_1_init(struct device_node *np) if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; - psci_ops.cpu_suspend = psci_cpu_suspend; + psci_ops.cpu_suspend = psci_0_1_cpu_suspend; } if (!of_property_read_u32(np, "cpu_off", &id)) { psci_function_id[PSCI_FN_CPU_OFF] = id; - psci_ops.cpu_off = psci_cpu_off; + psci_ops.cpu_off = psci_0_1_cpu_off; } if (!of_property_read_u32(np, "cpu_on", &id)) { psci_function_id[PSCI_FN_CPU_ON] = id; - psci_ops.cpu_on = psci_cpu_on; + psci_ops.cpu_on = psci_0_1_cpu_on; } if (!of_property_read_u32(np, "migrate", &id)) { psci_function_id[PSCI_FN_MIGRATE] = id; - psci_ops.migrate = psci_migrate; + psci_ops.migrate = psci_0_1_migrate; } return 0; -- GitLab From 82ac62d1658b42392282550078a189ccd3f50214 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:01 +0000 Subject: [PATCH 0381/1894] psci: Replace psci_function_id array with a struct Small refactor that replaces array of v0.1 function IDs indexed by an enum of function-name constants with a struct of function IDs "indexed" by field names. This is done in preparation for exposing the IDs to other parts of the kernel. Exposing a struct avoids the need for bounds checking. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20201202184122.26046-6-dbrazdil@google.com --- drivers/firmware/psci/psci.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 13b9ed71b4461..593fdd0e09a22 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -58,15 +58,14 @@ typedef unsigned long (psci_fn)(unsigned long, unsigned long, unsigned long, unsigned long); static psci_fn *invoke_psci_fn; -enum psci_function { - PSCI_FN_CPU_SUSPEND, - PSCI_FN_CPU_ON, - PSCI_FN_CPU_OFF, - PSCI_FN_MIGRATE, - PSCI_FN_MAX, +struct psci_0_1_function_ids { + u32 cpu_suspend; + u32 cpu_on; + u32 cpu_off; + u32 migrate; }; -static u32 psci_function_id[PSCI_FN_MAX]; +static struct psci_0_1_function_ids psci_0_1_function_ids; #define PSCI_0_2_POWER_STATE_MASK \ (PSCI_0_2_POWER_STATE_ID_MASK | \ @@ -178,7 +177,7 @@ static int __psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point) static int psci_0_1_cpu_suspend(u32 state, unsigned long entry_point) { - return __psci_cpu_suspend(psci_function_id[PSCI_FN_CPU_SUSPEND], + return __psci_cpu_suspend(psci_0_1_function_ids.cpu_suspend, state, entry_point); } @@ -198,7 +197,7 @@ static int __psci_cpu_off(u32 fn, u32 state) static int psci_0_1_cpu_off(u32 state) { - return __psci_cpu_off(psci_function_id[PSCI_FN_CPU_OFF], state); + return __psci_cpu_off(psci_0_1_function_ids.cpu_off, state); } static int psci_0_2_cpu_off(u32 state) @@ -216,7 +215,7 @@ static int __psci_cpu_on(u32 fn, unsigned long cpuid, unsigned long entry_point) static int psci_0_1_cpu_on(unsigned long cpuid, unsigned long entry_point) { - return __psci_cpu_on(psci_function_id[PSCI_FN_CPU_ON], cpuid, entry_point); + return __psci_cpu_on(psci_0_1_function_ids.cpu_on, cpuid, entry_point); } static int psci_0_2_cpu_on(unsigned long cpuid, unsigned long entry_point) @@ -234,7 +233,7 @@ static int __psci_migrate(u32 fn, unsigned long cpuid) static int psci_0_1_migrate(unsigned long cpuid) { - return __psci_migrate(psci_function_id[PSCI_FN_MIGRATE], cpuid); + return __psci_migrate(psci_0_1_function_ids.migrate, cpuid); } static int psci_0_2_migrate(unsigned long cpuid) @@ -548,22 +547,22 @@ static int __init psci_0_1_init(struct device_node *np) psci_ops.get_version = psci_0_1_get_version; if (!of_property_read_u32(np, "cpu_suspend", &id)) { - psci_function_id[PSCI_FN_CPU_SUSPEND] = id; + psci_0_1_function_ids.cpu_suspend = id; psci_ops.cpu_suspend = psci_0_1_cpu_suspend; } if (!of_property_read_u32(np, "cpu_off", &id)) { - psci_function_id[PSCI_FN_CPU_OFF] = id; + psci_0_1_function_ids.cpu_off = id; psci_ops.cpu_off = psci_0_1_cpu_off; } if (!of_property_read_u32(np, "cpu_on", &id)) { - psci_function_id[PSCI_FN_CPU_ON] = id; + psci_0_1_function_ids.cpu_on = id; psci_ops.cpu_on = psci_0_1_cpu_on; } if (!of_property_read_u32(np, "migrate", &id)) { - psci_function_id[PSCI_FN_MIGRATE] = id; + psci_0_1_function_ids.migrate = id; psci_ops.migrate = psci_0_1_migrate; } -- GitLab From 6df3e14436f6ee254b1a4952d90ee8988be59c89 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:02 +0000 Subject: [PATCH 0382/1894] psci: Add accessor for psci_0_1_function_ids Make it possible to retrieve a copy of the psci_0_1_function_ids struct. This is useful for KVM if it is configured to intercept host's PSCI SMCs. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20201202184122.26046-7-dbrazdil@google.com --- drivers/firmware/psci/psci.c | 12 +++++------- include/linux/psci.h | 9 +++++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 593fdd0e09a22..f5fc429cae3f7 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -58,15 +58,13 @@ typedef unsigned long (psci_fn)(unsigned long, unsigned long, unsigned long, unsigned long); static psci_fn *invoke_psci_fn; -struct psci_0_1_function_ids { - u32 cpu_suspend; - u32 cpu_on; - u32 cpu_off; - u32 migrate; -}; - static struct psci_0_1_function_ids psci_0_1_function_ids; +struct psci_0_1_function_ids get_psci_0_1_function_ids(void) +{ + return psci_0_1_function_ids; +} + #define PSCI_0_2_POWER_STATE_MASK \ (PSCI_0_2_POWER_STATE_ID_MASK | \ PSCI_0_2_POWER_STATE_TYPE_MASK | \ diff --git a/include/linux/psci.h b/include/linux/psci.h index 2a1bfb890e588..4ca0060a3fc42 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -34,6 +34,15 @@ struct psci_operations { extern struct psci_operations psci_ops; +struct psci_0_1_function_ids { + u32 cpu_suspend; + u32 cpu_on; + u32 cpu_off; + u32 migrate; +}; + +struct psci_0_1_function_ids get_psci_0_1_function_ids(void); + #if defined(CONFIG_ARM_PSCI_FW) int __init psci_dt_init(void); #else -- GitLab From c1f45f4eb6fd8704f72d5ed64184121e9fe129a0 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:03 +0000 Subject: [PATCH 0383/1894] arm64: Make cpu_logical_map() take unsigned int CPU index should never be negative. Change the signature of (set_)cpu_logical_map to take an unsigned int. This still works even if the users treat the CPU index as an int, and will allow the hypervisor's implementation to check that the index is valid with a single upper-bound check. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-8-dbrazdil@google.com --- arch/arm64/include/asm/smp.h | 4 ++-- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 2e7f529ec5a65..bcb01ca153251 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -46,9 +46,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * Logical CPU mapping. */ extern u64 __cpu_logical_map[NR_CPUS]; -extern u64 cpu_logical_map(int cpu); +extern u64 cpu_logical_map(unsigned int cpu); -static inline void set_cpu_logical_map(int cpu, u64 hwid) +static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid) { __cpu_logical_map[cpu] = hwid; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 133257ffd8591..2f2973bc67c75 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -276,7 +276,7 @@ arch_initcall(reserve_memblock_reserved_regions); u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; -u64 cpu_logical_map(int cpu) +u64 cpu_logical_map(unsigned int cpu) { return __cpu_logical_map[cpu]; } -- GitLab From 78869f0f0552d032c7e32724c4abb2715e8f974a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:04 +0000 Subject: [PATCH 0384/1894] arm64: Extract parts of el2_setup into a macro When a CPU is booted in EL2, the kernel checks for VHE support and initializes the CPU core accordingly. For nVHE it also installs the stub vectors and drops down to EL1. Once KVM gains the ability to boot cores without going through the kernel entry point, it will need to initialize the CPU the same way. Extract the relevant bits of el2_setup into an init_el2_state macro with an argument specifying whether to initialize for VHE or nVHE. The following ifdefs are removed: * CONFIG_ARM_GIC_V3 - always selected on arm64 * CONFIG_COMPAT - hstr_el2 can be set even without 32-bit support No functional change intended. Size of el2_setup increased by 148 bytes due to duplication. Signed-off-by: David Brazdil [maz: reworked to fit the new PSTATE initial setup code] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-9-dbrazdil@google.com --- arch/arm64/include/asm/el2_setup.h | 181 +++++++++++++++++++++++++++++ arch/arm64/kernel/head.S | 138 +++------------------- 2 files changed, 199 insertions(+), 120 deletions(-) create mode 100644 arch/arm64/include/asm/el2_setup.h diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h new file mode 100644 index 0000000000000..a7f5a1bbc8aca --- /dev/null +++ b/arch/arm64/include/asm/el2_setup.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM_KVM_INIT_H__ +#define __ARM_KVM_INIT_H__ + +#ifndef __ASSEMBLY__ +#error Assembly-only header +#endif + +#include +#include +#include +#include + +.macro __init_el2_sctlr + mov_q x0, INIT_SCTLR_EL2_MMU_OFF + msr sctlr_el2, x0 + isb +.endm + +/* + * Allow Non-secure EL1 and EL0 to access physical timer and counter. + * This is not necessary for VHE, since the host kernel runs in EL2, + * and EL0 accesses are configured in the later stage of boot process. + * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout + * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined + * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 + * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in + * EL2. + */ +.macro __init_el2_timers mode +.ifeqs "\mode", "nvhe" + mrs x0, cnthctl_el2 + orr x0, x0, #3 // Enable EL1 physical timers + msr cnthctl_el2, x0 +.endif + msr cntvoff_el2, xzr // Clear virtual offset +.endm + +.macro __init_el2_debug mode + mrs x1, id_aa64dfr0_el1 + sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 + cmp x0, #1 + b.lt 1f // Skip if no PMU present + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to +1: + csel x2, xzr, x0, lt // all PMU counters from EL1 + + /* Statistical profiling */ + ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 + cbz x0, 3f // Skip if SPE not present + +.ifeqs "\mode", "nvhe" + mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, + and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT) + cbnz x0, 2f // then permit sampling of physical + mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ + 1 << SYS_PMSCR_EL2_PA_SHIFT) + msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter +2: + mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + orr x2, x2, x0 // If we don't have VHE, then + // use EL1&0 translation. +.else + orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation + // and disable access from EL1 +.endif + +3: + msr mdcr_el2, x2 // Configure debug traps +.endm + +/* LORegions */ +.macro __init_el2_lor + mrs x1, id_aa64mmfr1_el1 + ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 + cbz x0, 1f + msr_s SYS_LORC_EL1, xzr +1: +.endm + +/* Stage-2 translation */ +.macro __init_el2_stage2 + msr vttbr_el2, xzr +.endm + +/* GICv3 system register access */ +.macro __init_el2_gicv3 + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 + cbz x0, 1f + + mrs_s x0, SYS_ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s SYS_ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, + tbz x0, #0, 1f // and check that it sticks + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults +1: +.endm + +.macro __init_el2_hstr + msr hstr_el2, xzr // Disable CP15 traps to EL2 +.endm + +/* Virtual CPU ID registers */ +.macro __init_el2_nvhe_idregs + mrs x0, midr_el1 + mrs x1, mpidr_el1 + msr vpidr_el2, x0 + msr vmpidr_el2, x1 +.endm + +/* Coprocessor traps */ +.macro __init_el2_nvhe_cptr + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 +.endm + +/* SVE register access */ +.macro __init_el2_nvhe_sve + mrs x1, id_aa64pfr0_el1 + ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 + cbz x1, 1f + + bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL2, x1 // length for EL1. +1: +.endm + +.macro __init_el2_nvhe_prepare_eret + mov x0, #INIT_PSTATE_EL1 + msr spsr_el2, x0 +.endm + +/** + * Initialize EL2 registers to sane values. This should be called early on all + * cores that were booted in EL2. + * + * Regs: x0, x1 and x2 are clobbered. + */ +.macro init_el2_state mode +.ifnes "\mode", "vhe" +.ifnes "\mode", "nvhe" +.error "Invalid 'mode' argument" +.endif +.endif + + __init_el2_sctlr + __init_el2_timers \mode + __init_el2_debug \mode + __init_el2_lor + __init_el2_stage2 + __init_el2_gicv3 + __init_el2_hstr + + /* + * When VHE is not in use, early init of EL2 needs to be done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ +.ifeqs "\mode", "nvhe" + __init_el2_nvhe_idregs + __init_el2_nvhe_cptr + __init_el2_nvhe_sve + __init_el2_nvhe_prepare_eret +.endif +.endm + +#endif /* __ARM_KVM_INIT_H__ */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0b145bca1b0e8..7eba3a1e84b0e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -11,7 +11,6 @@ #include #include -#include #include #include @@ -21,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -508,155 +508,53 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) eret SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) - mov_q x0, INIT_SCTLR_EL2_MMU_OFF - msr sctlr_el2, x0 - #ifdef CONFIG_ARM64_VHE /* - * Check for VHE being present. For the rest of the EL2 setup, - * x2 being non-zero indicates that we do have VHE, and that the - * kernel is intended to run at EL2. + * Check for VHE being present. x2 being non-zero indicates that we + * do have VHE, and that the kernel is intended to run at EL2. */ mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 -#else - mov x2, xzr + cbz x2, init_el2_nvhe #endif - - /* Hyp configuration. */ - mov_q x0, HCR_HOST_NVHE_FLAGS - cbz x2, set_hcr + /* + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ mov_q x0, HCR_HOST_VHE_FLAGS -set_hcr: msr hcr_el2, x0 isb - /* - * Allow Non-secure EL1 and EL0 to access physical timer and counter. - * This is not necessary for VHE, since the host kernel runs in EL2, - * and EL0 accesses are configured in the later stage of boot process. - * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout - * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined - * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 - * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in - * EL2. - */ - cbnz x2, 1f - mrs x0, cnthctl_el2 - orr x0, x0, #3 // Enable EL1 physical timers - msr cnthctl_el2, x0 -1: - msr cntvoff_el2, xzr // Clear virtual offset - -#ifdef CONFIG_ARM_GIC_V3 - /* GICv3 system register access */ - mrs x0, id_aa64pfr0_el1 - ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 - cbz x0, 3f - - mrs_s x0, SYS_ICC_SRE_EL2 - orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 - orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr_s SYS_ICC_SRE_EL2, x0 - isb // Make sure SRE is now set - mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 3f // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults - -3: -#endif - - /* Populate ID registers. */ - mrs x0, midr_el1 - mrs x1, mpidr_el1 - msr vpidr_el2, x0 - msr vmpidr_el2, x1 - -#ifdef CONFIG_COMPAT - msr hstr_el2, xzr // Disable CP15 traps to EL2 -#endif - - /* EL2 debug */ - mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp x0, #1 - b.lt 4f // Skip if no PMU present - mrs x0, pmcr_el0 // Disable debug access traps - ubfx x0, x0, #11, #5 // to EL2 and allow access to -4: - csel x3, xzr, x0, lt // all PMU counters from EL1 - - /* Statistical profiling */ - ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 - cbz x0, 7f // Skip if SPE not present - cbnz x2, 6f // VHE? - mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2, - and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT) - cbnz x4, 5f // then permit sampling of physical - mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ - 1 << SYS_PMSCR_EL2_PA_SHIFT) - msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter -5: - mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) - orr x3, x3, x1 // If we don't have VHE, then - b 7f // use EL1&0 translation. -6: // For VHE, use EL2 translation - orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1 -7: - msr mdcr_el2, x3 // Configure debug traps - - /* LORegions */ - mrs x1, id_aa64mmfr1_el1 - ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 - cbz x0, 1f - msr_s SYS_LORC_EL1, xzr -1: - - /* Stage-2 translation */ - msr vttbr_el2, xzr - - cbz x2, install_el2_stub + init_el2_state vhe isb + mov_q x0, INIT_PSTATE_EL2 msr spsr_el2, x0 msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret -SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) +SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL) /* * When VHE is not in use, early init of EL2 and EL1 needs to be * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. */ mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x0 - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 - - /* SVE register access */ - mrs x1, id_aa64pfr0_el1 - ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 - cbz x1, 7f - - bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps - msr cptr_el2, x0 // Disable copro. traps to EL2 + mov_q x0, HCR_HOST_NVHE_FLAGS + msr hcr_el2, x0 isb - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector - msr_s SYS_ZCR_EL2, x1 // length for EL1. + + init_el2_state nvhe /* Hypervisor stub */ -7: adr_l x0, __hyp_stub_vectors + adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 - isb - mov x0, #INIT_PSTATE_EL1 - msr spsr_el2, x0 + msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret -- GitLab From 5be1d6226d35800393579340f35b8b0d7b2a3177 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:05 +0000 Subject: [PATCH 0385/1894] KVM: arm64: Remove vector_ptr param of hyp-init KVM precomputes the hyp VA of __kvm_hyp_host_vector, essentially a constant (minus ASLR), before passing it to __kvm_hyp_init. Now that we have alternatives for converting kimg VA to hyp VA, replace this with computing the constant inside __kvm_hyp_init, thus removing the need for an argument. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-10-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 2 -- arch/arm64/include/asm/kvm_mmu.h | 24 ++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 4 +--- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 9 ++++++--- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a542c422a036f..4698ea1fbb377 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -165,10 +165,8 @@ struct kvm_vcpu; struct kvm_s2_mmu; DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); -DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector); DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) -#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 5633c5a27aad3..ab6f96df20dfb 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -94,6 +94,30 @@ alternative_cb_end sub \reg, \reg, \tmp .endm +/* + * Convert a kernel image address to a hyp VA + * reg: kernel address to be converted in place + * tmp: temporary register + * + * The actual code generation takes place in kvm_get_kimage_voffset, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_kimg_phys_offset uses the + * specific registers encoded in the instructions). + */ +.macro kimg_hyp_va reg, tmp +alternative_cb kvm_update_kimg_phys_offset + movz \tmp, #0 + movk \tmp, #0, lsl #16 + movk \tmp, #0, lsl #32 + movk \tmp, #0, lsl #48 +alternative_cb_end + + sub \reg, \reg, \tmp + mov_q \tmp, PAGE_OFFSET + orr \reg, \reg, \tmp + kern_hyp_va \reg +.endm + #else #include diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 745e1b4bcbe05..1f86d7d0ecf2e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1335,7 +1335,6 @@ static void cpu_init_hyp_mode(void) { phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; - unsigned long vector_ptr; unsigned long tpidr_el2; struct arm_smccc_res res; @@ -1353,7 +1352,6 @@ static void cpu_init_hyp_mode(void) pgd_ptr = kvm_mmu_get_httbr(); hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr); - vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector)); /* * Call initialization code, and switch to the full blown HYP code. @@ -1363,7 +1361,7 @@ static void cpu_init_hyp_mode(void) */ BUG_ON(!system_capabilities_finalized()); arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), - pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res); + pgd_ptr, tpidr_el2, hyp_stack_ptr, &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index b11a9d7db677d..931a8c38f0854 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -50,7 +50,6 @@ __invalid: * x1: HYP pgd * x2: per-CPU offset * x3: HYP stack - * x4: HYP vectors */ __do_hyp_init: /* Check for a stub HVC call */ @@ -134,9 +133,13 @@ alternative_else_nop_endif msr sctlr_el2, x0 isb - /* Set the stack and new vectors */ + /* Set the stack */ mov sp, x3 - msr vbar_el2, x4 + + /* Set the host vector */ + ldr x0, =__kvm_hyp_host_vector + kimg_hyp_va x0, x1 + msr vbar_el2, x0 /* Hello, World! */ mov x0, #SMCCC_RET_SUCCESS -- GitLab From 63fec24351e827021137a15b307bd1e64772b7fe Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:06 +0000 Subject: [PATCH 0386/1894] KVM: arm64: Move hyp-init params to a per-CPU struct Once we start initializing KVM on newly booted cores before the rest of the kernel, parameters to __do_hyp_init will need to be provided by EL2 rather than EL1. At that point it will not be possible to pass its three arguments directly because PSCI_CPU_ON only supports one context argument. Refactor __do_hyp_init to accept its parameters in a struct. This prepares the code for KVM booting cores as well as removes any limits on the number of __do_hyp_init arguments. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-11-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 6 ++++++ arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kernel/asm-offsets.c | 3 +++ arch/arm64/kvm/arm.c | 23 +++++++++++++---------- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 16 +++++++--------- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 ++ 6 files changed, 32 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 4698ea1fbb377..cb750c525aee0 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -150,6 +150,12 @@ extern void *__vhe_undefined_symbol; #endif +struct kvm_nvhe_init_params { + unsigned long tpidr_el2; + unsigned long stack_hyp_va; + phys_addr_t pgd_pa; +}; + /* Translate a kernel address @ptr into its equivalent linear mapping */ #define kvm_ksym_ref(ptr) \ ({ \ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 6b664de5ec1f4..cb25c15e3d8d3 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -14,6 +14,7 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DECLARE_PER_CPU(unsigned long, kvm_hyp_vector); +DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); #define read_sysreg_elx(r,nvh,vh) \ ({ \ @@ -98,4 +99,3 @@ void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ - diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 679b19b8a7ff2..52771a46f6009 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -109,6 +109,9 @@ int main(void) DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); + DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2)); + DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va)); + DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1f86d7d0ecf2e..0b823e4489170 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -52,6 +52,7 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); @@ -1333,9 +1334,7 @@ static int kvm_map_vectors(void) static void cpu_init_hyp_mode(void) { - phys_addr_t pgd_ptr; - unsigned long hyp_stack_ptr; - unsigned long tpidr_el2; + struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); struct arm_smccc_res res; /* Switch from the HYP stub to our own HYP init vector */ @@ -1346,12 +1345,17 @@ static void cpu_init_hyp_mode(void) * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. */ - tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - + (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); - pgd_ptr = kvm_mmu_get_httbr(); - hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; - hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr); + params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); + params->pgd_pa = kvm_mmu_get_httbr(); + + /* + * Flush the init params from the data cache because the struct will + * be read while the MMU is off. + */ + kvm_flush_dcache_to_poc(params, sizeof(*params)); /* * Call initialization code, and switch to the full blown HYP code. @@ -1360,8 +1364,7 @@ static void cpu_init_hyp_mode(void) * cpus_have_const_cap() wrapper. */ BUG_ON(!system_capabilities_finalized()); - arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), - pgd_ptr, tpidr_el2, hyp_stack_ptr, &res); + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 931a8c38f0854..e712e317337cb 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -47,9 +47,7 @@ __invalid: /* * x0: SMCCC function ID - * x1: HYP pgd - * x2: per-CPU offset - * x3: HYP stack + * x1: struct kvm_nvhe_init_params PA */ __do_hyp_init: /* Check for a stub HVC call */ @@ -70,10 +68,13 @@ __do_hyp_init: mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: - /* Set tpidr_el2 for use by HYP to free a register */ - msr tpidr_el2, x2 +1: ldr x0, [x1, #NVHE_INIT_TPIDR_EL2] + msr tpidr_el2, x0 + ldr x0, [x1, #NVHE_INIT_STACK_HYP_VA] + mov sp, x0 + + ldr x1, [x1, #NVHE_INIT_PGD_PA] phys_to_ttbr x0, x1 alternative_if ARM64_HAS_CNP orr x0, x0, #TTBR_CNP_BIT @@ -133,9 +134,6 @@ alternative_else_nop_endif msr sctlr_el2, x0 isb - /* Set the stack */ - mov sp, x3 - /* Set the host vector */ ldr x0, =__kvm_hyp_host_vector kimg_hyp_va x0, x1 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 82df7fc247606..a4f1cac714d7b 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -16,6 +16,8 @@ #define DECLARE_REG(type, name, ctxt, reg) \ type name = (type)cpu_reg(ctxt, (reg)) +DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); -- GitLab From d3e1086c64528ee0b955326b4c0e947cde3b6923 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:07 +0000 Subject: [PATCH 0387/1894] KVM: arm64: Init MAIR/TCR_EL2 from params struct MAIR_EL2 and TCR_EL2 are currently initialized from their _EL1 values. This will not work once KVM starts intercepting PSCI ON/SUSPEND SMCs and initializing EL2 state before EL1 state. Obtain the EL1 values during KVM init and store them in the init params struct. The struct will stay in memory and can be used when booting new cores. Take the opportunity to move copying the T0SZ value from idmap_t0sz in KVM init rather than in .hyp.idmap.text. This avoids the need for the idmap_t0sz symbol alias. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-12-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 2 ++ arch/arm64/kernel/asm-offsets.c | 2 ++ arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 22 +++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-init.S | 38 +++++++----------------------- 5 files changed, 34 insertions(+), 33 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index cb750c525aee0..4e2661d1ae2b7 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -151,6 +151,8 @@ extern void *__vhe_undefined_symbol; #endif struct kvm_nvhe_init_params { + unsigned long mair_el2; + unsigned long tcr_el2; unsigned long tpidr_el2; unsigned long stack_hyp_va; phys_addr_t pgd_pa; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 52771a46f6009..5e82488f1b828 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -109,6 +109,8 @@ int main(void) DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); + DEFINE(NVHE_INIT_MAIR_EL2, offsetof(struct kvm_nvhe_init_params, mair_el2)); + DEFINE(NVHE_INIT_TCR_EL2, offsetof(struct kvm_nvhe_init_params, tcr_el2)); DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2)); DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va)); DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa)); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 4b32588918d90..08e69faedf6cb 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -77,9 +77,6 @@ KVM_NVHE_ALIAS(panic); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ -KVM_NVHE_ALIAS(idmap_t0sz); - /* Kernel symbol used by icache_is_vpipt(). */ KVM_NVHE_ALIAS(__icache_flags); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 0b823e4489170..d9961f0b767e8 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1336,6 +1336,7 @@ static void cpu_init_hyp_mode(void) { struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); struct arm_smccc_res res; + unsigned long tcr; /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); @@ -1348,6 +1349,27 @@ static void cpu_init_hyp_mode(void) params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + params->mair_el2 = read_sysreg(mair_el1); + + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, at EL2, there is only one TTBR register, and we can't switch + * between translation tables *and* update TCR_EL2.T0SZ at the same + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. + * + * So use the same T0SZ value we use for the ID map. + */ + tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; + tcr &= ~TCR_T0SZ_MASK; + tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + params->tcr_el2 = tcr; + params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index e712e317337cb..712f57289357a 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -71,48 +71,26 @@ __do_hyp_init: 1: ldr x0, [x1, #NVHE_INIT_TPIDR_EL2] msr tpidr_el2, x0 + ldr x0, [x1, #NVHE_INIT_MAIR_EL2] + msr mair_el2, x0 + ldr x0, [x1, #NVHE_INIT_STACK_HYP_VA] mov sp, x0 - ldr x1, [x1, #NVHE_INIT_PGD_PA] - phys_to_ttbr x0, x1 + ldr x0, [x1, #NVHE_INIT_PGD_PA] + phys_to_ttbr x2, x0 alternative_if ARM64_HAS_CNP - orr x0, x0, #TTBR_CNP_BIT + orr x2, x2, #TTBR_CNP_BIT alternative_else_nop_endif - msr ttbr0_el2, x0 - - mrs x0, tcr_el1 - mov_q x1, TCR_EL2_MASK - and x0, x0, x1 - mov x1, #TCR_EL2_RES1 - orr x0, x0, x1 - - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ - ldr_l x1, idmap_t0sz - bfi x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + msr ttbr0_el2, x2 /* * Set the PS bits in TCR_EL2. */ + ldr x0, [x1, #NVHE_INIT_TCR_EL2] tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 - msr tcr_el2, x0 - mrs x0, mair_el1 - msr mair_el2, x0 isb /* Invalidate the stale TLBs from Bootloader */ -- GitLab From 2d7bf218ca739554bf7277ab0dbfa5399d01f7c6 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:08 +0000 Subject: [PATCH 0388/1894] KVM: arm64: Add .hyp.data..ro_after_init ELF section Add rules for renaming the .data..ro_after_init ELF section in KVM nVHE object files to .hyp.data..ro_after_init, linking it into the kernel and mapping it in hyp at runtime. The section is RW to the host, then mapped RO in hyp. The expectation is that the host populates the variables in the section and they are never changed by hyp afterwards. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-13-dbrazdil@google.com --- arch/arm64/include/asm/sections.h | 1 + arch/arm64/kernel/vmlinux.lds.S | 10 ++++++++++ arch/arm64/kvm/arm.c | 8 ++++++++ arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 1 + 4 files changed, 20 insertions(+) diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 3994169985efc..8ff5793617311 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; +extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index d6cdcf4aa6a54..43af13968dfd3 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -30,6 +30,13 @@ jiffies = jiffies_64; *(__kvm_ex_table) \ __stop___kvm_ex_table = .; +#define HYPERVISOR_DATA_SECTIONS \ + HYP_SECTION_NAME(.data..ro_after_init) : { \ + __hyp_data_ro_after_init_start = .; \ + *(HYP_SECTION_NAME(.data..ro_after_init)) \ + __hyp_data_ro_after_init_end = .; \ + } + #define HYPERVISOR_PERCPU_SECTION \ . = ALIGN(PAGE_SIZE); \ HYP_SECTION_NAME(.data..percpu) : { \ @@ -37,6 +44,7 @@ jiffies = jiffies_64; } #else /* CONFIG_KVM */ #define HYPERVISOR_EXTABLE +#define HYPERVISOR_DATA_SECTIONS #define HYPERVISOR_PERCPU_SECTION #endif @@ -234,6 +242,8 @@ SECTIONS _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) + HYPERVISOR_DATA_SECTIONS + /* * Data written with the MMU off but read with the MMU on requires * cache lines to be invalidated, discarding up to a Cache Writeback diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d9961f0b767e8..ef519e4826165 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1627,6 +1627,14 @@ static int init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start), + kvm_ksym_ref(__hyp_data_ro_after_init_end), + PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map .hyp.data..ro_after_init section\n"); + goto out_err; + } + err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); if (err) { diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index bb2d986ff6962..5d76ff2ba63e7 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -16,4 +16,5 @@ SECTIONS { HYP_SECTION_NAME(.data..percpu) : { PERCPU_INPUT(L1_CACHE_BYTES) } + HYP_SECTION(.data..ro_after_init) } -- GitLab From 687413d34d4aa72103de3e545f431f480dd21d7f Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:09 +0000 Subject: [PATCH 0389/1894] KVM: arm64: Support per_cpu_ptr in nVHE hyp code When compiling with __KVM_NVHE_HYPERVISOR__, redefine per_cpu_offset() to __hyp_per_cpu_offset() which looks up the base of the nVHE per-CPU region of the given cpu and computes its offset from the .hyp.data..percpu section. This enables use of per_cpu_ptr() helpers in nVHE hyp code. Until now only this_cpu_ptr() was supported by setting TPIDR_EL2. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-14-dbrazdil@google.com --- arch/arm64/include/asm/percpu.h | 6 ++++++ arch/arm64/kernel/image-vars.h | 3 +++ arch/arm64/kvm/hyp/nvhe/Makefile | 3 ++- arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 24 ++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/hyp-smp.c diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 1599e17379d86..8f1661603b788 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -239,6 +239,12 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_cmpxchg_8(pcp, o, n) \ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#ifdef __KVM_NVHE_HYPERVISOR__ +extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); +#define __per_cpu_offset +#define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu)) +#endif + #include /* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 08e69faedf6cb..39289d75118d7 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -99,6 +99,9 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities); KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); +/* Array containing bases of nVHE per-CPU memory regions. */ +KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index ddde15fe85f2f..2d842e009a403 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,8 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ + hyp-main.o hyp-smp.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c new file mode 100644 index 0000000000000..7b0363b4857f2 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google LLC + * Author: David Brazdil + */ + +#include +#include +#include + +unsigned long __hyp_per_cpu_offset(unsigned int cpu) +{ + unsigned long *cpu_base_array; + unsigned long this_cpu_base; + unsigned long elf_base; + + if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base)) + hyp_panic(); + + cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base); + this_cpu_base = kern_hyp_va(cpu_base_array[cpu]); + elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start); + return this_cpu_base - elf_base; +} -- GitLab From 94f5e8a4642aedb19ca73f534372d7ed65e1c84e Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:10 +0000 Subject: [PATCH 0390/1894] KVM: arm64: Create nVHE copy of cpu_logical_map When KVM starts validating host's PSCI requests, it will need to map MPIDR back to the CPU ID. To this end, copy cpu_logical_map into nVHE hyp memory when KVM is initialized. Only copy the information for CPUs that are online at the point of KVM initialization so that KVM rejects CPUs whose features were not checked against the finalized capabilities. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-15-dbrazdil@google.com --- arch/arm64/kvm/arm.c | 19 +++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 16 ++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ef519e4826165..bcd6e5d3c89a6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -64,6 +64,8 @@ static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); +extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -1506,6 +1508,20 @@ static inline void hyp_cpu_pm_exit(void) } #endif +static void init_cpu_logical_map(void) +{ + unsigned int cpu; + + /* + * Copy the MPIDR <-> logical CPU ID mapping to hyp. + * Only copy the set of online CPUs whose features have been chacked + * against the finalized system capabilities. The hypervisor will not + * allow any other CPUs from the `possible` set to boot. + */ + for_each_online_cpu(cpu) + kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); +} + static int init_common_resources(void) { return kvm_set_ipa_limit(); @@ -1684,6 +1700,9 @@ static int init_hyp_mode(void) } } + if (is_protected_kvm_enabled()) + init_cpu_logical_map(); + return 0; out_err: diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 7b0363b4857f2..cbab0c6246e20 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -8,6 +8,22 @@ #include #include +/* + * nVHE copy of data structures tracking available CPU cores. + * Only entries for CPUs that were online at KVM init are populated. + * Other CPUs should not be allowed to boot because their features were + * not checked against the finalized system capabilities. + */ +u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; + +u64 cpu_logical_map(unsigned int cpu) +{ + if (cpu >= ARRAY_SIZE(__cpu_logical_map)) + hyp_panic(); + + return __cpu_logical_map[cpu]; +} + unsigned long __hyp_per_cpu_offset(unsigned int cpu) { unsigned long *cpu_base_array; -- GitLab From a805e1fb30990e29b3174c39bf39015065e5dc19 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:11 +0000 Subject: [PATCH 0391/1894] KVM: arm64: Add SMC handler in nVHE EL2 Add handler of host SMCs in KVM nVHE trap handler. Forward all SMCs to EL3 and propagate the result back to EL1. This is done in preparation for validating host SMCs in KVM protected mode. The implementation assumes that firmware uses SMCCC v1.2 or older. That means x0-x17 can be used both for arguments and results, other GPRs are preserved. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-16-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/host.S | 38 ++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 35 ++++++++++++++++++++++++--- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index fe2740b224cf0..2b56f0bdf8743 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -180,3 +180,41 @@ SYM_CODE_START(__kvm_hyp_host_vector) invalid_host_el1_vect // FIQ 32-bit EL1 invalid_host_el1_vect // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_host_vector) + +/* + * Forward SMC with arguments in struct kvm_cpu_context, and + * store the result into the same struct. Assumes SMCCC 1.2 or older. + * + * x0: struct kvm_cpu_context* + */ +SYM_CODE_START(__kvm_hyp_host_forward_smc) + /* + * Use x18 to keep the pointer to the host context because + * x18 is callee-saved in SMCCC but not in AAPCS64. + */ + mov x18, x0 + + ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + + smc #0 + + stp x0, x1, [x18, #CPU_XREG_OFFSET(0)] + stp x2, x3, [x18, #CPU_XREG_OFFSET(2)] + stp x4, x5, [x18, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x18, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x18, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x18, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x18, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x18, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + + ret +SYM_CODE_END(__kvm_hyp_host_forward_smc) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a4f1cac714d7b..f25680ede080f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -18,6 +18,8 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); +void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); @@ -152,12 +154,39 @@ inval: cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; } +static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) +{ + __kvm_hyp_host_forward_smc(host_ctxt); +} + +static void skip_host_instruction(void) +{ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); +} + +static void handle_host_smc(struct kvm_cpu_context *host_ctxt) +{ + default_host_smc_handler(host_ctxt); + + /* + * Unlike HVC, the return address of an SMC is the instruction's PC. + * Move the return address past the instruction. + */ + skip_host_instruction(); +} + void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); - if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)) + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_HVC64: + handle_host_hcall(host_ctxt); + break; + case ESR_ELx_EC_SMC64: + handle_host_smc(host_ctxt); + break; + default: hyp_panic(); - - handle_host_hcall(host_ctxt); + } } -- GitLab From eeeee7193df015074c8302381356e8e617a5e2b0 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:12 +0000 Subject: [PATCH 0392/1894] KVM: arm64: Bootstrap PSCI SMC handler in nVHE EL2 Add a handler of PSCI SMCs in nVHE hyp code. The handler is initialized with the version used by the host's PSCI driver and the function IDs it was configured with. If the SMC function ID matches one of the configured PSCI calls (for v0.1) or falls into the PSCI function ID range (for v0.2+), the SMC is handled by the PSCI handler. For now, all SMCs return PSCI_RET_NOT_SUPPORTED. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-17-dbrazdil@google.com --- arch/arm64/include/asm/kvm_hyp.h | 2 + arch/arm64/kvm/arm.c | 25 ++++- .../arm64/kvm/hyp/include/nvhe/trap_handler.h | 18 ++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 10 +- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 100 ++++++++++++++++++ 6 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/trap_handler.h create mode 100644 arch/arm64/kvm/hyp/nvhe/psci-relay.c diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index cb25c15e3d8d3..c0450828378b5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -93,6 +93,8 @@ void deactivate_traps_vhe_put(void); u64 __guest_enter(struct kvm_vcpu *vcpu); +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); + void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bcd6e5d3c89a6..aa40bef09dfce 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -65,6 +66,8 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; +extern u32 kvm_nvhe_sym(kvm_host_psci_version); +extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { @@ -1522,6 +1525,22 @@ static void init_cpu_logical_map(void) kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); } +static bool init_psci_relay(void) +{ + /* + * If PSCI has not been initialized, protected KVM cannot install + * itself on newly booted CPUs. + */ + if (!psci_ops.get_version) { + kvm_err("Cannot initialize protected mode without PSCI\n"); + return false; + } + + kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version(); + kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids(); + return true; +} + static int init_common_resources(void) { return kvm_set_ipa_limit(); @@ -1700,9 +1719,13 @@ static int init_hyp_mode(void) } } - if (is_protected_kvm_enabled()) + if (is_protected_kvm_enabled()) { init_cpu_logical_map(); + if (!init_psci_relay()) + goto out_err; + } + return 0; out_err: diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h new file mode 100644 index 0000000000000..1e6d995968a1f --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trap handler helpers. + * + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_NVHE_TRAP_HANDLER_H__ +#define __ARM64_KVM_NVHE_TRAP_HANDLER_H__ + +#include + +#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] +#define DECLARE_REG(type, name, ctxt, reg) \ + type name = (type)cpu_reg(ctxt, (reg)) + +#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 2d842e009a403..bf62c8e42ab23 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -7,7 +7,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ - hyp-main.o hyp-smp.o + hyp-main.o hyp-smp.o psci-relay.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index f25680ede080f..bde658d51404b 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -12,9 +12,7 @@ #include #include -#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] -#define DECLARE_REG(type, name, ctxt, reg) \ - type name = (type)cpu_reg(ctxt, (reg)) +#include DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); @@ -166,7 +164,11 @@ static void skip_host_instruction(void) static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { - default_host_smc_handler(host_ctxt); + bool handled; + + handled = kvm_host_psci_handler(host_ctxt); + if (!handled) + default_host_smc_handler(host_ctxt); /* * Unlike HVC, the return address of an SMC is the instruction's PC. diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c new file mode 100644 index 0000000000000..61375d4571c29 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google LLC + * Author: David Brazdil + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Config options set by the host. */ +__ro_after_init u32 kvm_host_psci_version; +__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; + +static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, func_id, host_ctxt, 0); + + return func_id; +} + +static bool is_psci_0_1_call(u64 func_id) +{ + return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) || + (func_id == kvm_host_psci_0_1_function_ids.cpu_on) || + (func_id == kvm_host_psci_0_1_function_ids.cpu_off) || + (func_id == kvm_host_psci_0_1_function_ids.migrate); +} + +static bool is_psci_0_2_call(u64 func_id) +{ + /* SMCCC reserves IDs 0x00-1F with the given 32/64-bit base for PSCI. */ + return (PSCI_0_2_FN(0) <= func_id && func_id <= PSCI_0_2_FN(31)) || + (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31)); +} + +static bool is_psci_call(u64 func_id) +{ + switch (kvm_host_psci_version) { + case PSCI_VERSION(0, 1): + return is_psci_0_1_call(func_id); + default: + return is_psci_0_2_call(func_id); + } +} + +static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + return PSCI_RET_NOT_SUPPORTED; +} + +static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + switch (func_id) { + default: + return PSCI_RET_NOT_SUPPORTED; + } +} + +static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + switch (func_id) { + default: + return psci_0_2_handler(func_id, host_ctxt); + } +} + +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) +{ + u64 func_id = get_psci_func_id(host_ctxt); + unsigned long ret; + + if (!is_psci_call(func_id)) + return false; + + switch (kvm_host_psci_version) { + case PSCI_VERSION(0, 1): + ret = psci_0_1_handler(func_id, host_ctxt); + break; + case PSCI_VERSION(0, 2): + ret = psci_0_2_handler(func_id, host_ctxt); + break; + default: + ret = psci_1_0_handler(func_id, host_ctxt); + break; + } + + cpu_reg(host_ctxt, 0) = ret; + cpu_reg(host_ctxt, 1) = 0; + cpu_reg(host_ctxt, 2) = 0; + cpu_reg(host_ctxt, 3) = 0; + return true; +} -- GitLab From d084ecc5c72811e7231838f7c128bfcc7f8d2889 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:13 +0000 Subject: [PATCH 0393/1894] KVM: arm64: Add offset for hyp VA <-> PA conversion Add a host-initialized constant to KVM nVHE hyp code for converting between EL2 linear map virtual addresses and physical addresses. Also add `__hyp_pa` macro that performs the conversion. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-18-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 +++ arch/arm64/kvm/va_layout.c | 30 +++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 61375d4571c29..70b42f4334490 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -18,6 +18,9 @@ /* Config options set by the host. */ __ro_after_init u32 kvm_host_psci_version; __ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; +__ro_after_init s64 hyp_physvirt_offset; + +#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) { diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index d61117805de06..16aa8615c279f 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -23,6 +23,30 @@ static u8 tag_lsb; static u64 tag_val; static u64 va_mask; +/* + * Compute HYP VA by using the same computation as kern_hyp_va(). + */ +static u64 __early_kern_hyp_va(u64 addr) +{ + addr &= va_mask; + addr |= tag_val << tag_lsb; + return addr; +} + +/* + * Store a hyp VA <-> PA offset into a hyp-owned variable. + */ +static void init_hyp_physvirt_offset(void) +{ + extern s64 kvm_nvhe_sym(hyp_physvirt_offset); + u64 kern_va, hyp_va; + + /* Compute the offset from the hyp VA and PA of a random symbol. */ + kern_va = (u64)kvm_ksym_ref(__hyp_text_start); + hyp_va = __early_kern_hyp_va(kern_va); + CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; +} + /* * We want to generate a hyp VA with the following format (with V == * vabits_actual): @@ -54,6 +78,8 @@ __init void kvm_compute_layout(void) tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } tag_val >>= tag_lsb; + + init_hyp_physvirt_offset(); } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -151,9 +177,7 @@ void kvm_patch_vector_branch(struct alt_instr *alt, /* * Compute HYP VA by using the same computation as kern_hyp_va() */ - addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector); - addr &= va_mask; - addr |= tag_val << tag_lsb; + addr = __early_kern_hyp_va((u64)kvm_ksym_ref(__kvm_hyp_vector)); /* Use PC[10:7] to branch to the same vector in KVM */ addr |= ((u64)origptr & GENMASK_ULL(10, 7)); -- GitLab From 1fd12b7e4d0082a9f373e26ab11fc94bcc307d33 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:14 +0000 Subject: [PATCH 0394/1894] KVM: arm64: Forward safe PSCI SMCs coming from host Forward the following PSCI SMCs issued by host to EL3 as they do not require the hypervisor's intervention. This assumes that EL3 correctly implements the PSCI specification. Only function IDs implemented in Linux are included. Where both 32-bit and 64-bit variants exist, it is assumed that the host will always use the 64-bit variant. * SMCs that only return information about the system * PSCI_VERSION - PSCI version implemented by EL3 * PSCI_FEATURES - optional features supported by EL3 * AFFINITY_INFO - power state of core/cluster * MIGRATE_INFO_TYPE - whether Trusted OS can be migrated * MIGRATE_INFO_UP_CPU - resident core of Trusted OS * operations which do not affect the hypervisor * MIGRATE - migrate Trusted OS to a different core * SET_SUSPEND_MODE - toggle OS-initiated mode * system shutdown/reset * SYSTEM_OFF * SYSTEM_RESET * SYSTEM_RESET2 Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-19-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 42 +++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 70b42f4334490..5ad56a875ffa9 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -54,14 +54,50 @@ static bool is_psci_call(u64 func_id) } } +static unsigned long psci_call(unsigned long fn, unsigned long arg0, + unsigned long arg1, unsigned long arg2) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res); + return res.a0; +} + +static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt) +{ + return psci_call(cpu_reg(host_ctxt, 0), cpu_reg(host_ctxt, 1), + cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3)); +} + +static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt) +{ + psci_forward(host_ctxt); + hyp_panic(); /* unreachable */ +} + static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - return PSCI_RET_NOT_SUPPORTED; + if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || + (func_id == kvm_host_psci_0_1_function_ids.migrate)) + return psci_forward(host_ctxt); + else + return PSCI_RET_NOT_SUPPORTED; } static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { switch (func_id) { + case PSCI_0_2_FN_PSCI_VERSION: + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN64_MIGRATE: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + return psci_forward(host_ctxt); + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + psci_forward_noreturn(host_ctxt); + unreachable(); default: return PSCI_RET_NOT_SUPPORTED; } @@ -70,6 +106,10 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { switch (func_id) { + case PSCI_1_0_FN_PSCI_FEATURES: + case PSCI_1_0_FN_SET_SUSPEND_MODE: + case PSCI_1_1_FN64_SYSTEM_RESET2: + return psci_forward(host_ctxt); default: return psci_0_2_handler(func_id, host_ctxt); } -- GitLab From f74e1e2128b7681f0d9c2a66dc4480e7d7196b49 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:15 +0000 Subject: [PATCH 0395/1894] KVM: arm64: Extract __do_hyp_init into a helper function In preparation for adding a CPU entry point in nVHE hyp code, extract most of __do_hyp_init hypervisor initialization code into a common helper function. This will be invoked by the entry point to install KVM on the newly booted CPU. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-20-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 47 ++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 712f57289357a..b0856b006bc00 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -68,17 +68,36 @@ __do_hyp_init: mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: ldr x0, [x1, #NVHE_INIT_TPIDR_EL2] - msr tpidr_el2, x0 +1: mov x0, x1 + mov x4, lr + bl ___kvm_hyp_init + mov lr, x4 - ldr x0, [x1, #NVHE_INIT_MAIR_EL2] - msr mair_el2, x0 + /* Hello, World! */ + mov x0, #SMCCC_RET_SUCCESS + eret +SYM_CODE_END(__kvm_hyp_init) + +/* + * Initialize the hypervisor in EL2. + * + * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers + * and leave x4 for the caller. + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START_LOCAL(___kvm_hyp_init) + ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] + msr tpidr_el2, x1 + + ldr x1, [x0, #NVHE_INIT_STACK_HYP_VA] + mov sp, x1 - ldr x0, [x1, #NVHE_INIT_STACK_HYP_VA] - mov sp, x0 + ldr x1, [x0, #NVHE_INIT_MAIR_EL2] + msr mair_el2, x1 - ldr x0, [x1, #NVHE_INIT_PGD_PA] - phys_to_ttbr x2, x0 + ldr x1, [x0, #NVHE_INIT_PGD_PA] + phys_to_ttbr x2, x1 alternative_if ARM64_HAS_CNP orr x2, x2, #TTBR_CNP_BIT alternative_else_nop_endif @@ -87,9 +106,9 @@ alternative_else_nop_endif /* * Set the PS bits in TCR_EL2. */ - ldr x0, [x1, #NVHE_INIT_TCR_EL2] - tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 - msr tcr_el2, x0 + ldr x1, [x0, #NVHE_INIT_TCR_EL2] + tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3 + msr tcr_el2, x1 isb @@ -117,10 +136,8 @@ alternative_else_nop_endif kimg_hyp_va x0, x1 msr vbar_el2, x0 - /* Hello, World! */ - mov x0, #SMCCC_RET_SUCCESS - eret -SYM_CODE_END(__kvm_hyp_init) + ret +SYM_CODE_END(___kvm_hyp_init) SYM_CODE_START(__kvm_handle_stub_hvc) cmp x0, #HVC_SOFT_RESTART -- GitLab From 04e05f057a04275cb68c8053b29c5642ae0bad4f Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:16 +0000 Subject: [PATCH 0396/1894] KVM: arm64: Add function to enter host from KVM nVHE hyp code All nVHE hyp code is currently executed as handlers of host's HVCs. This will change as nVHE starts intercepting host's PSCI CPU_ON SMCs. The newly booted CPU will need to initialize EL2 state and then enter the host. Add __host_enter function that branches into the existing host state-restoring code after the trap handler would have returned. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-21-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/host.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 2b56f0bdf8743..a820dfdc9c25d 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -39,6 +39,7 @@ SYM_FUNC_START(__host_exit) bl handle_trap /* Restore host regs x0-x17 */ +__host_enter_restore_full: ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] @@ -61,6 +62,14 @@ __host_enter_without_restoring: sb SYM_FUNC_END(__host_exit) +/* + * void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); + */ +SYM_FUNC_START(__host_enter) + mov x29, x0 + b __host_enter_restore_full +SYM_FUNC_END(__host_enter) + /* * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); */ -- GitLab From cdf367192766ad11a03e8d5098556be43b8eb6b0 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:17 +0000 Subject: [PATCH 0397/1894] KVM: arm64: Intercept host's CPU_ON SMCs Add a handler of the CPU_ON PSCI call from host. When invoked, it looks up the logical CPU ID corresponding to the provided MPIDR and populates the state struct of the target CPU with the provided x0, pc. It then calls CPU_ON itself, with an entry point in hyp that initializes EL2 state before returning ERET to the provided PC in EL1. There is a simple atomic lock around the boot args struct. If it is already locked, CPU_ON will return PENDING_ON error code. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-22-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 48 +++++++++++ arch/arm64/kvm/hyp/nvhe/psci-relay.c | 115 +++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index b0856b006bc00..d07e75f8242ea 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -139,6 +140,53 @@ alternative_else_nop_endif ret SYM_CODE_END(___kvm_hyp_init) +/* + * PSCI CPU_ON entry point + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START(kvm_hyp_cpu_entry) + mov x1, #1 // is_cpu_on = true + b __kvm_hyp_init_cpu +SYM_CODE_END(kvm_hyp_cpu_entry) + +/* + * Common code for CPU entry points. Initializes EL2 state and + * installs the hypervisor before handing over to a C handler. + * + * x0: struct kvm_nvhe_init_params PA + * x1: bool is_cpu_on + */ +SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) + mov x28, x0 // Stash arguments + mov x29, x1 + + /* Check that the core was booted in EL2. */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.eq 2f + + /* The core booted in EL1. KVM cannot be initialized on it. */ +1: wfe + wfi + b 1b + +2: msr SPsel, #1 // We want to use SP_EL{1,2} + + /* Initialize EL2 CPU state to sane values. */ + init_el2_state nvhe // Clobbers x0..x2 + + /* Enable MMU, set vectors and stack. */ + mov x0, x28 + bl ___kvm_hyp_init // Clobbers x0..x3 + + /* Leave idmap. */ + mov x0, x29 + ldr x1, =kvm_host_psci_cpu_entry + kimg_hyp_va x1, x2 + br x1 +SYM_CODE_END(__kvm_hyp_init_cpu) + SYM_CODE_START(__kvm_handle_stub_hvc) cmp x0, #HVC_SOFT_RESTART b.ne 1f diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 5ad56a875ffa9..637e22ed71fc6 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -9,12 +9,17 @@ #include #include #include +#include #include #include #include #include +void kvm_hyp_cpu_entry(unsigned long r0); + +void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); + /* Config options set by the host. */ __ro_after_init u32 kvm_host_psci_version; __ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; @@ -22,6 +27,24 @@ __ro_after_init s64 hyp_physvirt_offset; #define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) +#define INVALID_CPU_ID UINT_MAX + +struct psci_boot_args { + atomic_t lock; + unsigned long pc; + unsigned long r0; +}; + +#define PSCI_BOOT_ARGS_UNLOCKED 0 +#define PSCI_BOOT_ARGS_LOCKED 1 + +#define PSCI_BOOT_ARGS_INIT \ + ((struct psci_boot_args){ \ + .lock = ATOMIC_INIT(PSCI_BOOT_ARGS_UNLOCKED), \ + }) + +static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; + static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(u64, func_id, host_ctxt, 0); @@ -75,11 +98,101 @@ static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *ho hyp_panic(); /* unreachable */ } +static unsigned int find_cpu_id(u64 mpidr) +{ + unsigned int i; + + /* Reject invalid MPIDRs */ + if (mpidr & ~MPIDR_HWID_BITMASK) + return INVALID_CPU_ID; + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_logical_map(i) == mpidr) + return i; + } + + return INVALID_CPU_ID; +} + +static __always_inline bool try_acquire_boot_args(struct psci_boot_args *args) +{ + return atomic_cmpxchg_acquire(&args->lock, + PSCI_BOOT_ARGS_UNLOCKED, + PSCI_BOOT_ARGS_LOCKED) == + PSCI_BOOT_ARGS_UNLOCKED; +} + +static __always_inline void release_boot_args(struct psci_boot_args *args) +{ + atomic_set_release(&args->lock, PSCI_BOOT_ARGS_UNLOCKED); +} + +static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, mpidr, host_ctxt, 1); + DECLARE_REG(unsigned long, pc, host_ctxt, 2); + DECLARE_REG(unsigned long, r0, host_ctxt, 3); + + unsigned int cpu_id; + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + int ret; + + /* + * Find the logical CPU ID for the given MPIDR. The search set is + * the set of CPUs that were online at the point of KVM initialization. + * Booting other CPUs is rejected because their cpufeatures were not + * checked against the finalized capabilities. This could be relaxed + * by doing the feature checks in hyp. + */ + cpu_id = find_cpu_id(mpidr); + if (cpu_id == INVALID_CPU_ID) + return PSCI_RET_INVALID_PARAMS; + + boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id); + init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id); + + /* Check if the target CPU is already being booted. */ + if (!try_acquire_boot_args(boot_args)) + return PSCI_RET_ALREADY_ON; + + boot_args->pc = pc; + boot_args->r0 = r0; + wmb(); + + ret = psci_call(func_id, mpidr, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)), + __hyp_pa(init_params)); + + /* If successful, the lock will be released by the target CPU. */ + if (ret != PSCI_RET_SUCCESS) + release_boot_args(boot_args); + + return ret; +} + +asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) +{ + struct psci_boot_args *boot_args; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt; + boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + + cpu_reg(host_ctxt, 0) = boot_args->r0; + write_sysreg_el2(boot_args->pc, SYS_ELR); + release_boot_args(boot_args); + + __host_enter(host_ctxt); +} + static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || (func_id == kvm_host_psci_0_1_function_ids.migrate)) return psci_forward(host_ctxt); + else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) + return psci_cpu_on(func_id, host_ctxt); else return PSCI_RET_NOT_SUPPORTED; } @@ -98,6 +211,8 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_0_2_FN_SYSTEM_RESET: psci_forward_noreturn(host_ctxt); unreachable(); + case PSCI_0_2_FN64_CPU_ON: + return psci_cpu_on(func_id, host_ctxt); default: return PSCI_RET_NOT_SUPPORTED; } -- GitLab From abf16336dd22d018cd2577f0789b01ed705484d7 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:18 +0000 Subject: [PATCH 0398/1894] KVM: arm64: Intercept host's CPU_SUSPEND PSCI SMCs Add a handler of CPU_SUSPEND host PSCI SMCs. The SMC can either enter a sleep state indistinguishable from a WFI or a deeper sleep state that behaves like a CPU_OFF+CPU_ON except that the core is still considered online while asleep. The handler saves r0,pc of the host and makes the same call to EL3 with the hyp CPU entry point. It either returns back to the handler and then back to the host, or wakes up into the entry point and initializes EL2 state before dropping back to EL1. No EL2 state needs to be saved/restored for this purpose. CPU_ON and CPU_SUSPEND are both implemented using struct psci_boot_args to store the state upon powerup, with each CPU having separate structs for CPU_ON and CPU_SUSPEND so that CPU_SUSPEND can operate locklessly and so that a CPU_ON call targeting a CPU cannot interfere with a concurrent CPU_SUSPEND call on that CPU. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-23-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 10 +++++++ arch/arm64/kvm/hyp/nvhe/psci-relay.c | 44 ++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index d07e75f8242ea..0853f62b052b1 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -150,6 +150,16 @@ SYM_CODE_START(kvm_hyp_cpu_entry) b __kvm_hyp_init_cpu SYM_CODE_END(kvm_hyp_cpu_entry) +/* + * PSCI CPU_SUSPEND entry point + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START(kvm_hyp_cpu_resume) + mov x1, #0 // is_cpu_on = false + b __kvm_hyp_init_cpu +SYM_CODE_END(kvm_hyp_cpu_resume) + /* * Common code for CPU entry points. Initializes EL2 state and * installs the hypervisor before handing over to a C handler. diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 637e22ed71fc6..688cf7f40d42f 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -17,6 +17,7 @@ #include void kvm_hyp_cpu_entry(unsigned long r0); +void kvm_hyp_cpu_resume(unsigned long r0); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); @@ -44,6 +45,7 @@ struct psci_boot_args { }) static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; +static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT; static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) { @@ -171,17 +173,51 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt) return ret; } +static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, power_state, host_ctxt, 1); + DECLARE_REG(unsigned long, pc, host_ctxt, 2); + DECLARE_REG(unsigned long, r0, host_ctxt, 3); + + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + + /* + * No need to acquire a lock before writing to boot_args because a core + * can only suspend itself. Racy CPU_ON calls use a separate struct. + */ + boot_args->pc = pc; + boot_args->r0 = r0; + + /* + * Will either return if shallow sleep state, or wake up into the entry + * point if it is a deep sleep state. + */ + return psci_call(func_id, power_state, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(init_params)); +} + asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) { struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt; - boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + + if (is_cpu_on) + boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + else + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); cpu_reg(host_ctxt, 0) = boot_args->r0; write_sysreg_el2(boot_args->pc, SYS_ELR); - release_boot_args(boot_args); + + if (is_cpu_on) + release_boot_args(boot_args); __host_enter(host_ctxt); } @@ -193,6 +229,8 @@ static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ return psci_forward(host_ctxt); else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) return psci_cpu_on(func_id, host_ctxt); + else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) + return psci_cpu_suspend(func_id, host_ctxt); else return PSCI_RET_NOT_SUPPORTED; } @@ -211,6 +249,8 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_0_2_FN_SYSTEM_RESET: psci_forward_noreturn(host_ctxt); unreachable(); + case PSCI_0_2_FN64_CPU_SUSPEND: + return psci_cpu_suspend(func_id, host_ctxt); case PSCI_0_2_FN64_CPU_ON: return psci_cpu_on(func_id, host_ctxt); default: -- GitLab From d945f8d9ec4ab5b062ce9696761ca3a21de1e64d Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:19 +0000 Subject: [PATCH 0399/1894] KVM: arm64: Intercept host's SYSTEM_SUSPEND PSCI SMCs Add a handler of SYSTEM_SUSPEND host PSCI SMCs. The semantics are equivalent to CPU_SUSPEND, typically called on the last online CPU. Reuse the same entry point and boot args struct as CPU_SUSPEND. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-24-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 2 +- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 0853f62b052b1..a2e2515476257 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -151,7 +151,7 @@ SYM_CODE_START(kvm_hyp_cpu_entry) SYM_CODE_END(kvm_hyp_cpu_entry) /* - * PSCI CPU_SUSPEND entry point + * PSCI CPU_SUSPEND / SYSTEM_SUSPEND entry point * * x0: struct kvm_nvhe_init_params PA */ diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 688cf7f40d42f..08dc9de693147 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -201,6 +201,30 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) __hyp_pa(init_params)); } +static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, pc, host_ctxt, 1); + DECLARE_REG(unsigned long, r0, host_ctxt, 2); + + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + + /* + * No need to acquire a lock before writing to boot_args because a core + * can only suspend itself. Racy CPU_ON calls use a separate struct. + */ + boot_args->pc = pc; + boot_args->r0 = r0; + + /* Will only return on error. */ + return psci_call(func_id, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(init_params), 0); +} + asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) { struct psci_boot_args *boot_args; @@ -265,6 +289,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_1_0_FN_SET_SUSPEND_MODE: case PSCI_1_1_FN64_SYSTEM_RESET2: return psci_forward(host_ctxt); + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + return psci_system_suspend(func_id, host_ctxt); default: return psci_0_2_handler(func_id, host_ctxt); } -- GitLab From fa8c3d65538aa11bb117cbf872400d5caa7f340b Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:20 +0000 Subject: [PATCH 0400/1894] KVM: arm64: Keep nVHE EL2 vector installed KVM by default keeps the stub vector installed and installs the nVHE vector only briefly for init and later on demand. Change this policy to install the vector at init and then never uninstall it if the kernel was given the protected KVM command line parameter. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-25-dbrazdil@google.com --- arch/arm64/kvm/arm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index aa40bef09dfce..7d162a2961417 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1453,7 +1453,8 @@ static void _kvm_arch_hardware_disable(void *discard) void kvm_arch_hardware_disable(void) { - _kvm_arch_hardware_disable(NULL); + if (!is_protected_kvm_enabled()) + _kvm_arch_hardware_disable(NULL); } #ifdef CONFIG_CPU_PM @@ -1496,11 +1497,13 @@ static struct notifier_block hyp_init_cpu_pm_nb = { static void __init hyp_cpu_pm_init(void) { - cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } static void __init hyp_cpu_pm_exit(void) { - cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); } #else static inline void hyp_cpu_pm_init(void) @@ -1588,7 +1591,8 @@ static int init_subsystems(void) kvm_coproc_table_init(); out: - on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + if (err || !is_protected_kvm_enabled()) + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); return err; } -- GitLab From b93c17c4185bf6b50f2f0b332afb4abe8b766a7a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:21 +0000 Subject: [PATCH 0401/1894] KVM: arm64: Trap host SMCs in protected mode While protected KVM is installed, start trapping all host SMCs. For now these are simply forwarded to EL3, except PSCI CPU_ON/CPU_SUSPEND/SYSTEM_SUSPEND which are intercepted and the hypervisor installed on newly booted cores. Create new constant HCR_HOST_NVHE_PROTECTED_FLAGS with the new set of HCR flags to use while the nVHE vector is installed when the kernel was booted with the protected flag enabled. Switch back to the default HCR flags when switching back to the stub vector. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-26-dbrazdil@google.com --- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-init.S | 10 ++++++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 5 ++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 64ce29378467c..4e90c2debf70a 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -80,6 +80,7 @@ HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) +#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index a2e2515476257..31b060a440452 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -88,6 +88,11 @@ SYM_CODE_END(__kvm_hyp_init) * x0: struct kvm_nvhe_init_params PA */ SYM_CODE_START_LOCAL(___kvm_hyp_init) +alternative_if ARM64_KVM_PROTECTED_MODE + mov_q x1, HCR_HOST_NVHE_PROTECTED_FLAGS + msr hcr_el2, x1 +alternative_else_nop_endif + ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] msr tpidr_el2, x1 @@ -230,6 +235,11 @@ reset: msr sctlr_el2, x5 isb +alternative_if ARM64_KVM_PROTECTED_MODE + mov_q x5, HCR_HOST_NVHE_FLAGS + msr hcr_el2, x5 +alternative_else_nop_endif + /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8ae8160bc93ab..e1f8e0797144e 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -96,7 +96,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + if (is_protected_kvm_enabled()) + write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2); + else + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } -- GitLab From f19f6644a5433cfae8a068445b149bc2247c1445 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:22 +0000 Subject: [PATCH 0402/1894] KVM: arm64: Fix EL2 mode availability checks With protected nVHE hyp code interception host's PSCI SMCs, the host starts seeing new CPUs boot in EL1 instead of EL2. The kernel logic that keeps track of the boot mode needs to be adjusted. Add a static key enabled if KVM protected mode initialization is successful. When the key is enabled, is_hyp_mode_available continues to report `true` because its users either treat it as a check whether KVM will be / was initialized, or whether stub HVCs can be made (eg. hibernate). is_hyp_mode_mismatched is changed to report `false` when the key is enabled. That's because all cores' modes matched at the point of KVM init and KVM will not allow cores not present at init to boot. That said, the function is never used after KVM is initialized. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-27-dbrazdil@google.com --- arch/arm64/include/asm/virt.h | 18 ++++++++++++++++++ arch/arm64/kvm/arm.c | 9 ++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index eb81dcc220b65..ee6a48df89d9c 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -65,9 +65,19 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); +DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { + /* + * If KVM protected mode is initialized, all CPUs must have been booted + * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. + */ + if (IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized)) + return true; + return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -75,6 +85,14 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { + /* + * If KVM protected mode is initialized, all CPUs must have been booted + * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. + */ + if (IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized)) + return false; + return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7d162a2961417..fadcc94931f99 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -48,6 +48,7 @@ __asm__(".arch_extension virt"); #endif static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); @@ -1848,12 +1849,14 @@ int kvm_arch_init(void *opaque) if (err) goto out_hyp; - if (is_protected_kvm_enabled()) + if (is_protected_kvm_enabled()) { + static_branch_enable(&kvm_protected_mode_initialized); kvm_info("Protected nVHE mode initialized successfully\n"); - else if (in_hyp_mode) + } else if (in_hyp_mode) { kvm_info("VHE mode initialized successfully\n"); - else + } else { kvm_info("Hyp mode initialized successfully\n"); + } return 0; -- GitLab From 985eabdcfe3aad1aea6fc195dafff503f303aa3a Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 4 Nov 2020 11:57:56 +0800 Subject: [PATCH 0403/1894] dm: remove unnecessary current->bio_list check when submitting split bio The depth-first splitting is introduced in commit 18a25da84354 ("dm: ensure bio submission follows a depth-first tree walk"), which is used to fix the potential deadlock in case of the misordering handling of bios caused by bio_list. There're two paths submitting split bios, dm_wq_work() from worker thread and submit_bio() from application. Back upon that time, dm_wq_work() thread calls __split_and_process_bio() directly and thus will not trigger this issue since bio_list doesn't exist here. So this issue will only be triggered from application calling submit_bio(), and the fix has to check if current->bio_list is non-NULL to distinguish this case. However since commit 0c2915b8c6db1 ("dm: fix missing imposition of queue_limits from dm_wq_work() thread"), dm_wq_work() thread calls submit_bio_noacct() and thus also uses bio_list. Since then all entries into __split_and_process_bio() are under protection of bio_list, and thus the checking of current->bio_list when determinning if the depth-first principle should be used, seems kind of nonsense. After all the checking always succeeds now. Fixes: 0c2915b8c6db1 ("dm: fix missing imposition of queue_limits from dm_wq_work() thread") Signed-off-by: Jeffle Xu Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e0cbfe3f14d4..9408df43ba757 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1590,7 +1590,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, ci.sector_count = bio_sectors(bio); while (ci.sector_count && !error) { error = __split_and_process_non_flush(&ci); - if (current->bio_list && ci.sector_count && !error) { + if (ci.sector_count && !error) { /* * Remainder must be passed to submit_bio_noacct() * so that it gets handled *after* bios already submitted -- GitLab From 4da8f8c8a1e07ad18f057f4044ad96f4135dc877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 23 Oct 2020 19:05:12 +0200 Subject: [PATCH 0404/1894] dm verity: Add support for signature verification with 2nd keyring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new configuration DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING to enable dm-verity signatures to be verified against the secondary trusted keyring. Instead of relying on the builtin trusted keyring (with hard-coded certificates), the second trusted keyring can include certificate authorities from the builtin trusted keyring and child certificates loaded at run time. Using the secondary trusted keyring enables to use dm-verity disks (e.g. loop devices) signed by keys which did not exist at kernel build time, leveraging the certificate chain of trust model. In practice, this makes it possible to update certificates without kernel update and reboot, aligning with module and kernel (kexec) signature verification which already use the secondary trusted keyring. Signed-off-by: Mickaël Salaün Signed-off-by: Mike Snitzer --- Documentation/admin-guide/device-mapper/verity.rst | 7 ++++++- drivers/md/Kconfig | 13 ++++++++++++- drivers/md/dm-verity-verify-sig.c | 9 +++++++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index 66f71f0dab1b3..b088a647acb78 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -134,7 +134,12 @@ root_hash_sig_key_desc the pkcs7 signature of the roothash. The pkcs7 signature is used to validate the root hash during the creation of the device mapper block device. Verification of roothash depends on the config DM_VERITY_VERIFY_ROOTHASH_SIG - being set in the kernel. + being set in the kernel. The signatures are checked against the builtin + trusted keyring by default, or the secondary trusted keyring if + DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING is set. The secondary + trusted keyring includes by default the builtin trusted keyring, and it can + also gain new certificates at run time if they are signed by a certificate + already in the secondary trusted keyring. Theory of operation =================== diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 30ba3573626c2..1d68935e45efb 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -530,11 +530,22 @@ config DM_VERITY_VERIFY_ROOTHASH_SIG bool "Verity data device root hash signature verification support" depends on DM_VERITY select SYSTEM_DATA_VERIFICATION - help + help Add ability for dm-verity device to be validated if the pre-generated tree of cryptographic checksums passed has a pkcs#7 signature file that can validate the roothash of the tree. + By default, rely on the builtin trusted keyring. + + If unsure, say N. + +config DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING + bool "Verity data device root hash signature verification with secondary keyring" + depends on DM_VERITY_VERIFY_ROOTHASH_SIG + depends on SECONDARY_TRUSTED_KEYRING + help + Rely on the secondary trusted keyring to verify dm-verity signatures. + If unsure, say N. config DM_VERITY_FEC diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c index 614e43db93aa8..29385dc470d5d 100644 --- a/drivers/md/dm-verity-verify-sig.c +++ b/drivers/md/dm-verity-verify-sig.c @@ -119,8 +119,13 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len, } ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data, - sig_len, NULL, VERIFYING_UNSPECIFIED_SIGNATURE, - NULL, NULL); + sig_len, +#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING + VERIFY_USE_SECONDARY_KEYRING, +#else + NULL, +#endif + VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); return ret; } -- GitLab From e4d2e82b2300b03f66b3ca8417590c86e661fab1 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 22 Oct 2020 19:27:50 -0500 Subject: [PATCH 0405/1894] dm mpath: add IO affinity path selector This patch adds a path selector that selects paths based on a CPU to path mapping the user passes in and what CPU we are executing on. The primary user for this PS is where the app is optimized to use specific CPUs so other PSs undo the apps handy work, and the storage and it's transport are not a bottlneck. For these io-affinity PS setups a path's transport/interconnect perf is not going to flucuate a lot and there is no major differences between paths, so QL/HST smarts do not help and RR always messes up what the app is trying to do. On a system with 16 cores, where you have a job per CPU: fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=4k \ --ioengine=libaio --iodepth=128 --numjobs=16 and a dm-multipath device setup where each CPU is mapped to one path: // When in mq mode I had to set dm_mq_nr_hw_queues=$NUM_PATHS. // Bio mode also showed similar results. 0 16777216 multipath 0 0 1 1 io-affinity 0 16 1 8:16 1 8:32 2 8:64 4 8:48 8 8:80 10 8:96 20 8:112 40 8:128 80 8:144 100 8:160 200 8:176 400 8:192 800 8:208 1000 8:224 2000 8:240 4000 65:0 8000 we can see a IOPs increase of 25%. The percent increase depends on the device and interconnect. For a slower/medium speed path/device that can do around 180K IOPs a path if you ran that fio command to it directly we saw a 25% increase like above. Slower path'd devices that could do around 90K per path showed maybe around a 2 - 5% increase. If you use something like null_blk or scsi_debug which can multi-million IOPs and hack it up so each device they export shows up as a path then you see 50%+ increases. Signed-off-by: Mike Christie Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 9 ++ drivers/md/Makefile | 1 + drivers/md/dm-io-affinity.c | 272 ++++++++++++++++++++++++++++++++++++ 3 files changed, 282 insertions(+) create mode 100644 drivers/md/dm-io-affinity.c diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1d68935e45efb..b7e2d96666142 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -463,6 +463,15 @@ config DM_MULTIPATH_HST If unsure, say N. +config DM_MULTIPATH_IOA + tristate "I/O Path Selector based on CPU submission" + depends on DM_MULTIPATH + help + This path selector selects the path based on the CPU the IO is + executed on and the CPU to path mapping setup at path addition time. + + If unsure, say N. + config DM_DELAY tristate "I/O delaying target" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 6d3e234dc46a5..4f95f332d0156 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o obj-$(CONFIG_DM_MULTIPATH_HST) += dm-historical-service-time.o +obj-$(CONFIG_DM_MULTIPATH_IOA) += dm-io-affinity.o obj-$(CONFIG_DM_SWITCH) += dm-switch.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ diff --git a/drivers/md/dm-io-affinity.c b/drivers/md/dm-io-affinity.c new file mode 100644 index 0000000000000..077655cd4fae6 --- /dev/null +++ b/drivers/md/dm-io-affinity.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Oracle Corporation + * + * Module Author: Mike Christie + */ +#include "dm-path-selector.h" + +#include +#include + +#define DM_MSG_PREFIX "multipath io-affinity" + +struct path_info { + struct dm_path *path; + cpumask_var_t cpumask; + refcount_t refcount; + bool failed; +}; + +struct selector { + struct path_info **path_map; + cpumask_var_t path_mask; + atomic_t map_misses; +}; + +static void ioa_free_path(struct selector *s, unsigned int cpu) +{ + struct path_info *pi = s->path_map[cpu]; + + if (!pi) + return; + + if (refcount_dec_and_test(&pi->refcount)) { + cpumask_clear_cpu(cpu, s->path_mask); + free_cpumask_var(pi->cpumask); + kfree(pi); + + s->path_map[cpu] = NULL; + } +} + +static int ioa_add_path(struct path_selector *ps, struct dm_path *path, + int argc, char **argv, char **error) +{ + struct selector *s = ps->context; + struct path_info *pi = NULL; + unsigned int cpu; + int ret; + + if (argc != 1) { + *error = "io-affinity ps: invalid number of arguments"; + return -EINVAL; + } + + pi = kzalloc(sizeof(*pi), GFP_KERNEL); + if (!pi) { + *error = "io-affinity ps: Error allocating path context"; + return -ENOMEM; + } + + pi->path = path; + path->pscontext = pi; + refcount_set(&pi->refcount, 1); + + if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) { + *error = "io-affinity ps: Error allocating cpumask context"; + ret = -ENOMEM; + goto free_pi; + } + + ret = cpumask_parse(argv[0], pi->cpumask); + if (ret) { + *error = "io-affinity ps: invalid cpumask"; + ret = -EINVAL; + goto free_mask; + } + + for_each_cpu(cpu, pi->cpumask) { + if (cpu >= nr_cpu_ids) { + DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u", + cpu, nr_cpu_ids); + break; + } + + if (s->path_map[cpu]) { + DMWARN("CPU mapping for %u exists. Ignoring.", cpu); + continue; + } + + cpumask_set_cpu(cpu, s->path_mask); + s->path_map[cpu] = pi; + refcount_inc(&pi->refcount); + continue; + } + + if (refcount_dec_and_test(&pi->refcount)) { + *error = "io-affinity ps: No new/valid CPU mapping found"; + ret = -EINVAL; + goto free_mask; + } + + return 0; + +free_mask: + free_cpumask_var(pi->cpumask); +free_pi: + kfree(pi); + return ret; +} + +static int ioa_create(struct path_selector *ps, unsigned argc, char **argv) +{ + struct selector *s; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *), + GFP_KERNEL); + if (!s->path_map) + goto free_selector; + + if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL)) + goto free_map; + + atomic_set(&s->map_misses, 0); + ps->context = s; + return 0; + +free_map: + kfree(s->path_map); +free_selector: + kfree(s); + return -ENOMEM; +} + +static void ioa_destroy(struct path_selector *ps) +{ + struct selector *s = ps->context; + unsigned cpu; + + for_each_cpu(cpu, s->path_mask) + ioa_free_path(s, cpu); + + free_cpumask_var(s->path_mask); + kfree(s->path_map); + kfree(s); + + ps->context = NULL; +} + +static int ioa_status(struct path_selector *ps, struct dm_path *path, + status_type_t type, char *result, unsigned int maxlen) +{ + struct selector *s = ps->context; + struct path_info *pi; + int sz = 0; + + if (!path) { + DMEMIT("0 "); + return sz; + } + + switch(type) { + case STATUSTYPE_INFO: + DMEMIT("%d ", atomic_read(&s->map_misses)); + break; + case STATUSTYPE_TABLE: + pi = path->pscontext; + DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask)); + break; + } + + return sz; +} + +static void ioa_fail_path(struct path_selector *ps, struct dm_path *p) +{ + struct path_info *pi = p->pscontext; + + pi->failed = true; +} + +static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p) +{ + struct path_info *pi = p->pscontext; + + pi->failed = false; + return 0; +} + +static struct dm_path *ioa_select_path(struct path_selector *ps, + size_t nr_bytes) +{ + unsigned int cpu, node; + struct selector *s = ps->context; + const struct cpumask *cpumask; + struct path_info *pi; + int i; + + cpu = get_cpu(); + + pi = s->path_map[cpu]; + if (pi && !pi->failed) + goto done; + + /* + * Perf is not optimal, but we at least try the local node then just + * try not to fail. + */ + if (!pi) + atomic_inc(&s->map_misses); + + node = cpu_to_node(cpu); + cpumask = cpumask_of_node(node); + for_each_cpu(i, cpumask) { + pi = s->path_map[i]; + if (pi && !pi->failed) + goto done; + } + + for_each_cpu(i, s->path_mask) { + pi = s->path_map[i]; + if (pi && !pi->failed) + goto done; + } + pi = NULL; + +done: + put_cpu(); + return pi ? pi->path : NULL; +} + +static struct path_selector_type ioa_ps = { + .name = "io-affinity", + .module = THIS_MODULE, + .table_args = 1, + .info_args = 1, + .create = ioa_create, + .destroy = ioa_destroy, + .status = ioa_status, + .add_path = ioa_add_path, + .fail_path = ioa_fail_path, + .reinstate_path = ioa_reinstate_path, + .select_path = ioa_select_path, +}; + +static int __init dm_ioa_init(void) +{ + int ret = dm_register_path_selector(&ioa_ps); + + if (ret < 0) + DMERR("register failed %d", ret); + return ret; +} + +static void __exit dm_ioa_exit(void) +{ + int ret = dm_unregister_path_selector(&ioa_ps); + + if (ret < 0) + DMERR("unregister failed %d", ret); +} + +module_init(dm_ioa_init); +module_exit(dm_ioa_exit); + +MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on"); +MODULE_AUTHOR("Mike Christie "); +MODULE_LICENSE("GPL"); -- GitLab From 298fb372984a808ccba9ca15a9cf0f393b3259b4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 10 Nov 2020 13:41:53 -0500 Subject: [PATCH 0406/1894] dm: rename multipath path selector source files to have "dm-ps" prefix Additional prefix helps clarify that these source files implement path selectors. Required updating Makefile to still build modules _without_ the "dm-ps" prefix to preserve dm-multipath's ability to autoload path selector modules. While at it, cleaned up some DM whitespace in Makefile. Signed-off-by: Mike Snitzer --- drivers/md/Makefile | 19 ++++++++++++------- ...time.c => dm-ps-historical-service-time.c} | 0 .../{dm-io-affinity.c => dm-ps-io-affinity.c} | 0 ...dm-queue-length.c => dm-ps-queue-length.c} | 0 .../{dm-round-robin.c => dm-ps-round-robin.c} | 0 ...dm-service-time.c => dm-ps-service-time.c} | 0 6 files changed, 12 insertions(+), 7 deletions(-) rename drivers/md/{dm-historical-service-time.c => dm-ps-historical-service-time.c} (100%) rename drivers/md/{dm-io-affinity.c => dm-ps-io-affinity.c} (100%) rename drivers/md/{dm-queue-length.c => dm-ps-queue-length.c} (100%) rename drivers/md/{dm-round-robin.c => dm-ps-round-robin.c} (100%) rename drivers/md/{dm-service-time.c => dm-ps-service-time.c} (100%) diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 4f95f332d0156..ef7ddc27685c8 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -7,23 +7,28 @@ dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \ dm-rq.o dm-multipath-y += dm-path-selector.o dm-mpath.o +dm-historical-service-time-y += dm-ps-historical-service-time.o +dm-io-affinity-y += dm-ps-io-affinity.o +dm-queue-length-y += dm-ps-queue-length.o +dm-round-robin-y += dm-ps-round-robin.o +dm-service-time-y += dm-ps-service-time.o dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-snap-persistent.o dm-mirror-y += dm-raid1.o -dm-log-userspace-y \ - += dm-log-userspace-base.o dm-log-userspace-transfer.o +dm-log-userspace-y += dm-log-userspace-base.o dm-log-userspace-transfer.o dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o dm-thin-pool-y += dm-thin.o dm-thin-metadata.o dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \ dm-cache-background-tracker.o -dm-cache-smq-y += dm-cache-policy-smq.o +dm-cache-smq-y += dm-cache-policy-smq.o dm-ebs-y += dm-ebs-target.o dm-era-y += dm-era-target.o dm-clone-y += dm-clone-target.o dm-clone-metadata.o dm-verity-y += dm-verity-target.o +dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o + md-mod-y += md.o md-bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o -dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o linear-y += md-linear.o multipath-y += md-multipath.o faulty-y += md-faulty.o @@ -62,12 +67,12 @@ obj-$(CONFIG_DM_MULTIPATH_HST) += dm-historical-service-time.o obj-$(CONFIG_DM_MULTIPATH_IOA) += dm-io-affinity.o obj-$(CONFIG_DM_SWITCH) += dm-switch.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o -obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ +obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o obj-$(CONFIG_DM_ZERO) += dm-zero.o -obj-$(CONFIG_DM_RAID) += dm-raid.o -obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o +obj-$(CONFIG_DM_RAID) += dm-raid.o +obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o obj-$(CONFIG_DM_VERITY) += dm-verity.o obj-$(CONFIG_DM_CACHE) += dm-cache.o obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o diff --git a/drivers/md/dm-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c similarity index 100% rename from drivers/md/dm-historical-service-time.c rename to drivers/md/dm-ps-historical-service-time.c diff --git a/drivers/md/dm-io-affinity.c b/drivers/md/dm-ps-io-affinity.c similarity index 100% rename from drivers/md/dm-io-affinity.c rename to drivers/md/dm-ps-io-affinity.c diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-ps-queue-length.c similarity index 100% rename from drivers/md/dm-queue-length.c rename to drivers/md/dm-ps-queue-length.c diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-ps-round-robin.c similarity index 100% rename from drivers/md/dm-round-robin.c rename to drivers/md/dm-ps-round-robin.c diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-ps-service-time.c similarity index 100% rename from drivers/md/dm-service-time.c rename to drivers/md/dm-ps-service-time.c -- GitLab From 410fe220078c3e3ed6f4167fb4de65139cfec631 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Fri, 13 Nov 2020 10:05:51 +0800 Subject: [PATCH 0407/1894] dm: add support for REQ_NOWAIT to various targets commit 021a24460dc2 ("block: add QUEUE_FLAG_NOWAIT") added a new queue flag QUEUE_FLAG_NOWAIT to advertise if the bdev supports handling of REQ_NOWAIT or not. DM core supports stacking QUEUE_FLAG_NOWAIT since commit 6abc49468eea ("dm: add support for REQ_NOWAIT and enable it for linear target"), in which only dm-linear enabled it. Update others DM targets, which just do simple remapping, to enable support for REQ_NOWAIT. Signed-off-by: Jeffle Xu Signed-off-by: Mike Snitzer --- drivers/md/dm-stripe.c | 2 +- drivers/md/dm-switch.c | 1 + drivers/md/dm-unstripe.c | 1 + drivers/md/dm-zero.c | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 151d022b032d4..df359d33cda89 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -496,7 +496,7 @@ static void stripe_io_hints(struct dm_target *ti, static struct target_type stripe_target = { .name = "striped", .version = {1, 6, 0}, - .features = DM_TARGET_PASSES_INTEGRITY, + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index bff4c7fa1cd21..262e2b0fd9750 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -550,6 +550,7 @@ static int switch_iterate_devices(struct dm_target *ti, static struct target_type switch_target = { .name = "switch", .version = {1, 1, 0}, + .features = DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = switch_ctr, .dtr = switch_dtr, diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index e673dacf64181..7357c1bd58631 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -178,6 +178,7 @@ static void unstripe_io_hints(struct dm_target *ti, static struct target_type unstripe_target = { .name = "unstriped", .version = {1, 1, 0}, + .features = DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = unstripe_ctr, .dtr = unstripe_dtr, diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index b65ca8dcfbdc7..faa1dbffc8b4a 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -59,6 +59,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio) static struct target_type zero_target = { .name = "zero", .version = {1, 1, 0}, + .features = DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = zero_ctr, .map = zero_map, -- GitLab From e8dc79d1bdda0c048bfc7d39a8146c6db1c36ef6 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 21 Nov 2020 23:22:48 +0100 Subject: [PATCH 0408/1894] dm crypt: Constify static crypt_iv_operations The only usage of these structs is to assign their address to the iv_gen_ops field in the crypt config struct, which is a pointer to const. Make them const like the rest of the static crypt_iv_operations structs. This allows the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 392337f16ecfd..53791138d78bf 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1090,16 +1090,16 @@ static const struct crypt_iv_operations crypt_iv_tcw_ops = { .post = crypt_iv_tcw_post }; -static struct crypt_iv_operations crypt_iv_random_ops = { +static const struct crypt_iv_operations crypt_iv_random_ops = { .generator = crypt_iv_random_gen }; -static struct crypt_iv_operations crypt_iv_eboiv_ops = { +static const struct crypt_iv_operations crypt_iv_eboiv_ops = { .ctr = crypt_iv_eboiv_ctr, .generator = crypt_iv_eboiv_gen }; -static struct crypt_iv_operations crypt_iv_elephant_ops = { +static const struct crypt_iv_operations crypt_iv_elephant_ops = { .ctr = crypt_iv_elephant_ctr, .dtr = crypt_iv_elephant_dtr, .init = crypt_iv_elephant_init, -- GitLab From 4d7659bfbe277a43399a4a2d90fca141e70f29e1 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Sat, 28 Nov 2020 18:19:59 +0800 Subject: [PATCH 0409/1894] dm ioctl: fix error return code in target_message Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 2ca4c92f58f9 ("dm ioctl: prevent empty message") Reported-by: Hulk Robot Signed-off-by: Qinglang Miao Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index cd0478d44058b..5e306bba43751 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1600,6 +1600,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para if (!argc) { DMWARN("Empty message received."); + r = -EINVAL; goto out_argv; } -- GitLab From a2b8b2d975673b1a50ab0bcce5d146b9335edfad Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 30 Nov 2020 19:33:39 +0800 Subject: [PATCH 0410/1894] dm crypt: export sysfs of kcryptd workqueue It should be helpful to export sysfs of "kcryptd" workqueue in some cases, such as setting specific CPU affinity of the workqueue. Besides, also tweak the name format a little. The slash inside a directory name will be translate into exclamation mark, such as /sys/devices/virtual/workqueue/'kcryptd!253:0'. Signed-off-by: Jeffle Xu Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53791138d78bf..5f9f9b3a226d7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3166,11 +3166,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) - cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, + cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1, devname); else - cc->crypt_queue = alloc_workqueue("kcryptd/%s", - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, + cc->crypt_queue = alloc_workqueue("kcryptd-%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | + WQ_UNBOUND | WQ_SYSFS, num_online_cpus(), devname); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; -- GitLab From 91b8246de8590bac89b03b4fd14c61a8b4053b9e Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Fri, 6 Nov 2020 17:43:31 +0800 Subject: [PATCH 0411/1894] ntb: idt: fix error check in ntb_hw_idt.c idt_create_dev never return NULL and fix smatch warning. Signed-off-by: Wang Qing Acked-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/hw/idt/ntb_hw_idt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index d54261f508519..e7a4c2aa8baa8 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2511,7 +2511,7 @@ static int idt_init_dbgfs(struct idt_ntb_dev *ndev) /* If the top directory is not created then do nothing */ if (IS_ERR_OR_NULL(dbgfs_topdir)) { dev_info(&ndev->ntb.pdev->dev, "Top DebugFS directory absent"); - return PTR_ERR(dbgfs_topdir); + return PTR_ERR_OR_ZERO(dbgfs_topdir); } /* Create the info file node */ @@ -2756,7 +2756,7 @@ static int idt_pci_probe(struct pci_dev *pdev, /* Allocate the memory for IDT NTB device data */ ndev = idt_create_dev(pdev, id); - if (IS_ERR_OR_NULL(ndev)) + if (IS_ERR(ndev)) return PTR_ERR(ndev); /* Initialize the basic PCI subsystem of the device */ -- GitLab From 75b6f6487cedd0e4c8e07d68b68b8f85cd352bfe Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 23 Nov 2020 08:36:12 -0700 Subject: [PATCH 0412/1894] ntb: intel: add Intel NTB LTR vendor support for gen4 NTB Intel NTB device has custom LTR management that is not compliant with the PCIe standard. Add support to set LTR status triggered by link status change. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_gen1.h | 1 + drivers/ntb/hw/intel/ntb_hw_gen4.c | 27 ++++++++++++++++++++++++++- drivers/ntb/hw/intel/ntb_hw_gen4.h | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.h b/drivers/ntb/hw/intel/ntb_hw_gen1.h index 1b759942d8aff..344249fc18d1f 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.h @@ -141,6 +141,7 @@ #define NTB_HWERR_B2BDOORBELL_BIT14 BIT_ULL(2) #define NTB_HWERR_MSIX_VECTOR32_BAD BIT_ULL(3) #define NTB_HWERR_BAR_ALIGN BIT_ULL(4) +#define NTB_HWERR_LTR_BAD BIT_ULL(5) extern struct intel_b2b_addr xeon_b2b_usd_addr; extern struct intel_b2b_addr xeon_b2b_dsd_addr; diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c index bc4541cbf8c6e..fede05151f698 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c @@ -177,8 +177,10 @@ int gen4_init_dev(struct intel_ntb_dev *ndev) ndev->reg = &gen4_reg; - if (pdev_is_ICX(pdev)) + if (pdev_is_ICX(pdev)) { ndev->hwerr_flags |= NTB_HWERR_BAR_ALIGN; + ndev->hwerr_flags |= NTB_HWERR_LTR_BAD; + } ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET); ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); @@ -431,6 +433,25 @@ static int intel_ntb4_link_enable(struct ntb_dev *ntb, dev_dbg(&ntb->pdev->dev, "ignoring max_width %d\n", max_width); + if (!(ndev->hwerr_flags & NTB_HWERR_LTR_BAD)) { + u32 ltr; + + /* Setup active snoop LTR values */ + ltr = NTB_LTR_ACTIVE_REQMNT | NTB_LTR_ACTIVE_VAL | NTB_LTR_ACTIVE_LATSCALE; + /* Setup active non-snoop values */ + ltr = (ltr << NTB_LTR_NS_SHIFT) | ltr; + iowrite32(ltr, ndev->self_mmio + GEN4_LTR_ACTIVE_OFFSET); + + /* Setup idle snoop LTR values */ + ltr = NTB_LTR_IDLE_VAL | NTB_LTR_IDLE_LATSCALE | NTB_LTR_IDLE_REQMNT; + /* Setup idle non-snoop values */ + ltr = (ltr << NTB_LTR_NS_SHIFT) | ltr; + iowrite32(ltr, ndev->self_mmio + GEN4_LTR_IDLE_OFFSET); + + /* setup PCIe LTR to active */ + iowrite8(NTB_LTR_SWSEL_ACTIVE, ndev->self_mmio + GEN4_LTR_SWSEL_OFFSET); + } + ntb_ctl = NTB_CTL_E2I_BAR23_SNOOP | NTB_CTL_I2E_BAR23_SNOOP; ntb_ctl |= NTB_CTL_E2I_BAR45_SNOOP | NTB_CTL_I2E_BAR45_SNOOP; iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl); @@ -476,6 +497,10 @@ static int intel_ntb4_link_disable(struct ntb_dev *ntb) lnkctl |= GEN4_LINK_CTRL_LINK_DISABLE; iowrite16(lnkctl, ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); + /* set LTR to idle */ + if (!(ndev->hwerr_flags & NTB_HWERR_LTR_BAD)) + iowrite8(NTB_LTR_SWSEL_IDLE, ndev->self_mmio + GEN4_LTR_SWSEL_OFFSET); + ndev->dev_up = 0; return 0; diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h index a868c788de02f..3fcd3fdce9edf 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h @@ -35,6 +35,9 @@ #define GEN4_IM_SPAD_SEM_OFFSET 0x00c0 /* SPAD hw semaphore */ #define GEN4_IM_SPAD_STICKY_OFFSET 0x00c4 /* sticky SPAD */ #define GEN4_IM_DOORBELL_OFFSET 0x0100 /* 0-31 doorbells */ +#define GEN4_LTR_SWSEL_OFFSET 0x30ec +#define GEN4_LTR_ACTIVE_OFFSET 0x30f0 +#define GEN4_LTR_IDLE_OFFSET 0x30f4 #define GEN4_EM_SPAD_OFFSET 0x8080 /* note, link status is now in MMIO and not config space for NTB */ #define GEN4_LINK_CTRL_OFFSET 0xb050 @@ -80,6 +83,18 @@ #define NTB_SJC_FORCEDETECT 0x000004 +#define NTB_LTR_SWSEL_ACTIVE 0x0 +#define NTB_LTR_SWSEL_IDLE 0x1 + +#define NTB_LTR_NS_SHIFT 16 +#define NTB_LTR_ACTIVE_VAL 0x0000 /* 0 us */ +#define NTB_LTR_ACTIVE_LATSCALE 0x0800 /* 1us scale */ +#define NTB_LTR_ACTIVE_REQMNT 0x8000 /* snoop req enable */ + +#define NTB_LTR_IDLE_VAL 0x0258 /* 600 us */ +#define NTB_LTR_IDLE_LATSCALE 0x0800 /* 1us scale */ +#define NTB_LTR_IDLE_REQMNT 0x8000 /* snoop req enable */ + ssize_t ndev_ntb4_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp); int gen4_init_dev(struct intel_ntb_dev *ndev); -- GitLab From 670d39657ca355615428d176e9de4e69bfb3cf9b Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 12 Oct 2020 13:58:41 +1000 Subject: [PATCH 0413/1894] m68knommu: align BSS section to 4-byte boundaries The kernel start up code for all of the nommu m68k types expects the BSS section to be on a 4-byte boundary, and to be a whole number of 32bit words. The BSS initialization loop sets 32bit sized quantities and has no provision for odd or unaligned accesses. The alignment and size of the BSS has historically worked out to be 4-byte aligned and sized - although no explicit alignment or size was specified in the linker script. So the BSS zeroing code worked as expected. A problem was first observed after commit 7273ad2b08f8 ("kbuild: link lib-y objects to vmlinux forcibly when CONFIG_MODULES=y"). Some kernel builds, depending on exact configuration, then tended to generate even sized BSS sections - which is valid on m68k - but our BSS init code could not handle properly. The simplest and smallest solution is to align and size the BSS appropriately. Signed-off-by: Greg Ungerer --- arch/m68k/kernel/vmlinux-nommu.lds | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds index 7b975420c3d9d..730bb653f9c15 100644 --- a/arch/m68k/kernel/vmlinux-nommu.lds +++ b/arch/m68k/kernel/vmlinux-nommu.lds @@ -82,7 +82,7 @@ SECTIONS { __init_end = .; } - BSS_SECTION(0, 0, 0) + BSS_SECTION(4, 0, 4) _end = .; -- GitLab From a734bbf694270dca8594a5c33375867dc31503f5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Oct 2020 15:26:23 +0100 Subject: [PATCH 0414/1894] m68k: m68328: move platform code to separate files The dragen2 and ucsimm/ucdimm files require a bit of custom code compared to the other dragonball platforms, move them into separate files as a preparation for a build fix. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Ungerer --- arch/m68k/68000/Makefile | 4 ++ arch/m68k/68000/dragen2.c | 100 +++++++++++++++++++++++++++ arch/m68k/68000/m68328.c | 3 +- arch/m68k/68000/m68328.h | 5 ++ arch/m68k/68000/m68EZ328.c | 23 +------ arch/m68k/68000/m68VZ328.c | 136 ++----------------------------------- arch/m68k/68000/ucsimm.c | 38 +++++++++++ 7 files changed, 158 insertions(+), 151 deletions(-) create mode 100644 arch/m68k/68000/dragen2.c create mode 100644 arch/m68k/68000/m68328.h create mode 100644 arch/m68k/68000/ucsimm.c diff --git a/arch/m68k/68000/Makefile b/arch/m68k/68000/Makefile index 4f7d4b45a46fc..ce0b26d6580d4 100644 --- a/arch/m68k/68000/Makefile +++ b/arch/m68k/68000/Makefile @@ -16,4 +16,8 @@ obj-$(CONFIG_M68EZ328) += m68EZ328.o obj-$(CONFIG_M68VZ328) += m68VZ328.o obj-$(CONFIG_ROM) += romvec.o +obj-$(CONFIG_DRAGEN2) += dragen2.o +obj-$(CONFIG_UCSIMM) += ucsimm.o +obj-$(CONFIG_UCDIMM) += ucsimm.o + extra-y := head.o diff --git a/arch/m68k/68000/dragen2.c b/arch/m68k/68000/dragen2.c new file mode 100644 index 0000000000000..584893c57c373 --- /dev/null +++ b/arch/m68k/68000/dragen2.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1993 Hamish Macdonald + * Copyright (C) 1999 D. Jeff Dionne + * Copyright (C) 2001 Georges Menie, Ken Desmet + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ +#include +#include +#include + +/***************************************************************************/ +/* Init Drangon Engine hardware */ +/***************************************************************************/ + +static void dragen2_reset(void) +{ + local_irq_disable(); + +#ifdef CONFIG_INIT_LCD + PBDATA |= 0x20; /* disable CCFL light */ + PKDATA |= 0x4; /* disable LCD controller */ + LCKCON = 0; +#endif + + __asm__ __volatile__( + "reset\n\t" + "moveal #0x04000000, %a0\n\t" + "moveal 0(%a0), %sp\n\t" + "moveal 4(%a0), %a0\n\t" + "jmp (%a0)" + ); +} + +void __init init_dragen2(char *command, int size) +{ + mach_reset = dragen2_reset; + +#ifdef CONFIG_DIRECT_IO_ACCESS + SCR = 0x10; /* allow user access to internal registers */ +#endif + + /* CSGB Init */ + CSGBB = 0x4000; + CSB = 0x1a1; + + /* CS8900 init */ + /* PK3: hardware sleep function pin, active low */ + PKSEL |= PK(3); /* select pin as I/O */ + PKDIR |= PK(3); /* select pin as output */ + PKDATA |= PK(3); /* set pin high */ + + /* PF5: hardware reset function pin, active high */ + PFSEL |= PF(5); /* select pin as I/O */ + PFDIR |= PF(5); /* select pin as output */ + PFDATA &= ~PF(5); /* set pin low */ + + /* cs8900 hardware reset */ + PFDATA |= PF(5); + { int i; for (i = 0; i < 32000; ++i); } + PFDATA &= ~PF(5); + + /* INT1 enable (cs8900 IRQ) */ + PDPOL &= ~PD(1); /* active high signal */ + PDIQEG &= ~PD(1); + PDIRQEN |= PD(1); /* IRQ enabled */ + +#ifdef CONFIG_INIT_LCD + /* initialize LCD controller */ + LSSA = (long) screen_bits; + LVPW = 0x14; + LXMAX = 0x140; + LYMAX = 0xef; + LRRA = 0; + LPXCD = 3; + LPICF = 0x08; + LPOLCF = 0; + LCKCON = 0x80; + PCPDEN = 0xff; + PCSEL = 0; + + /* Enable LCD controller */ + PKDIR |= 0x4; + PKSEL |= 0x4; + PKDATA &= ~0x4; + + /* Enable CCFL backlighting circuit */ + PBDIR |= 0x20; + PBSEL |= 0x20; + PBDATA &= ~0x20; + + /* contrast control register */ + PFDIR |= 0x1; + PFSEL &= ~0x1; + PWMR = 0x037F; +#endif +} diff --git a/arch/m68k/68000/m68328.c b/arch/m68k/68000/m68328.c index 419751b15ec8b..6a5cfc977150f 100644 --- a/arch/m68k/68000/m68328.c +++ b/arch/m68k/68000/m68328.c @@ -25,9 +25,10 @@ #include "bootlogo.h" #endif +#include "m68328.h" + /***************************************************************************/ -int m68328_hwclk(int set, struct rtc_time *t); /***************************************************************************/ diff --git a/arch/m68k/68000/m68328.h b/arch/m68k/68000/m68328.h new file mode 100644 index 0000000000000..f6047c3168d44 --- /dev/null +++ b/arch/m68k/68000/m68328.h @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +void init_dragen2(char *command, int size); +void init_ucsimm(char *command, int size); +struct rtc_time; +int m68328_hwclk(int set, struct rtc_time *t); diff --git a/arch/m68k/68000/m68EZ328.c b/arch/m68k/68000/m68EZ328.c index 05f137dc257ea..65bd112285ef9 100644 --- a/arch/m68k/68000/m68EZ328.c +++ b/arch/m68k/68000/m68EZ328.c @@ -24,9 +24,7 @@ #include #endif -/***************************************************************************/ - -int m68328_hwclk(int set, struct rtc_time *t); +#include "m68328.h" /***************************************************************************/ @@ -44,29 +42,12 @@ void m68ez328_reset(void) /***************************************************************************/ -unsigned char *cs8900a_hwaddr; -static int errno; - -#ifdef CONFIG_UCSIMM -_bsc0(char *, getserialnum) -_bsc1(unsigned char *, gethwaddr, int, a) -_bsc1(char *, getbenv, char *, a) -#endif - void __init config_BSP(char *command, int len) { - unsigned char *p; - pr_info("68EZ328 DragonBallEZ support (C) 1999 Rt-Control, Inc\n"); #ifdef CONFIG_UCSIMM - pr_info("uCsimm serial string [%s]\n", getserialnum()); - p = cs8900a_hwaddr = gethwaddr(0); - pr_info("uCsimm hwaddr %pM\n", p); - - p = getbenv("APPEND"); - if (p) strcpy(p,command); - else command[0] = 0; + init_ucsimm(command, len); #endif mach_sched_init = hw_timer_init; diff --git a/arch/m68k/68000/m68VZ328.c b/arch/m68k/68000/m68VZ328.c index ada87b23afdc0..025da5552f1c6 100644 --- a/arch/m68k/68000/m68VZ328.c +++ b/arch/m68k/68000/m68VZ328.c @@ -32,101 +32,9 @@ #include "bootlogo-vz.h" #endif -/***************************************************************************/ - -int m68328_hwclk(int set, struct rtc_time *t); - -/***************************************************************************/ -/* Init Drangon Engine hardware */ -/***************************************************************************/ -#if defined(CONFIG_DRAGEN2) - -static void m68vz328_reset(void) -{ - local_irq_disable(); - -#ifdef CONFIG_INIT_LCD - PBDATA |= 0x20; /* disable CCFL light */ - PKDATA |= 0x4; /* disable LCD controller */ - LCKCON = 0; -#endif - - __asm__ __volatile__( - "reset\n\t" - "moveal #0x04000000, %a0\n\t" - "moveal 0(%a0), %sp\n\t" - "moveal 4(%a0), %a0\n\t" - "jmp (%a0)" - ); -} - -static void __init init_hardware(char *command, int size) -{ -#ifdef CONFIG_DIRECT_IO_ACCESS - SCR = 0x10; /* allow user access to internal registers */ -#endif - - /* CSGB Init */ - CSGBB = 0x4000; - CSB = 0x1a1; - - /* CS8900 init */ - /* PK3: hardware sleep function pin, active low */ - PKSEL |= PK(3); /* select pin as I/O */ - PKDIR |= PK(3); /* select pin as output */ - PKDATA |= PK(3); /* set pin high */ - - /* PF5: hardware reset function pin, active high */ - PFSEL |= PF(5); /* select pin as I/O */ - PFDIR |= PF(5); /* select pin as output */ - PFDATA &= ~PF(5); /* set pin low */ - - /* cs8900 hardware reset */ - PFDATA |= PF(5); - { int i; for (i = 0; i < 32000; ++i); } - PFDATA &= ~PF(5); - - /* INT1 enable (cs8900 IRQ) */ - PDPOL &= ~PD(1); /* active high signal */ - PDIQEG &= ~PD(1); - PDIRQEN |= PD(1); /* IRQ enabled */ - -#ifdef CONFIG_INIT_LCD - /* initialize LCD controller */ - LSSA = (long) screen_bits; - LVPW = 0x14; - LXMAX = 0x140; - LYMAX = 0xef; - LRRA = 0; - LPXCD = 3; - LPICF = 0x08; - LPOLCF = 0; - LCKCON = 0x80; - PCPDEN = 0xff; - PCSEL = 0; - - /* Enable LCD controller */ - PKDIR |= 0x4; - PKSEL |= 0x4; - PKDATA &= ~0x4; - - /* Enable CCFL backlighting circuit */ - PBDIR |= 0x20; - PBSEL |= 0x20; - PBDATA &= ~0x20; - - /* contrast control register */ - PFDIR |= 0x1; - PFSEL &= ~0x1; - PWMR = 0x037F; -#endif -} +#include "m68328.h" /***************************************************************************/ -/* Init RT-Control uCdimm hardware */ -/***************************************************************************/ -#elif defined(CONFIG_UCDIMM) - static void m68vz328_reset(void) { local_irq_disable(); @@ -139,51 +47,21 @@ static void m68vz328_reset(void) ); } -unsigned char *cs8900a_hwaddr; -static int errno; - -_bsc0(char *, getserialnum) -_bsc1(unsigned char *, gethwaddr, int, a) -_bsc1(char *, getbenv, char *, a) - -static void __init init_hardware(char *command, int size) -{ - char *p; - - pr_info("uCdimm serial string [%s]\n", getserialnum()); - p = cs8900a_hwaddr = gethwaddr(0); - pr_info("uCdimm hwaddr %pM\n", p); - p = getbenv("APPEND"); - if (p) - strcpy(p, command); - else - command[0] = 0; -} - -/***************************************************************************/ -#else - -static void m68vz328_reset(void) -{ -} - -static void __init init_hardware(char *command, int size) -{ -} - -/***************************************************************************/ -#endif /***************************************************************************/ void __init config_BSP(char *command, int size) { pr_info("68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n"); - init_hardware(command, size); - mach_sched_init = hw_timer_init; mach_hwclk = m68328_hwclk; mach_reset = m68vz328_reset; + +#ifdef CONFIG_UCDIMM + init_ucsimm(command, len); +#elif defined(CONFIG_DRAGEN2) + init_dragen2(command, len); +#endif } /***************************************************************************/ diff --git a/arch/m68k/68000/ucsimm.c b/arch/m68k/68000/ucsimm.c new file mode 100644 index 0000000000000..7c6cbf6437126 --- /dev/null +++ b/arch/m68k/68000/ucsimm.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1993 Hamish Macdonald + * Copyright (C) 1999 D. Jeff Dionne + * Copyright (C) 2001 Georges Menie, Ken Desmet + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include + + +#include "m68328.h" + +unsigned char *cs8900a_hwaddr; +static int errno; + +_bsc0(char *, getserialnum) +_bsc1(unsigned char *, gethwaddr, int, a) +_bsc1(char *, getbenv, char *, a) + +void __init init_ucsimm(char *command, int size) +{ + char *p; + + pr_info("uCsimm/uCdimm serial string [%s]\n", getserialnum()); + p = cs8900a_hwaddr = gethwaddr(0); + pr_info("uCsimm/uCdimm hwaddr %pM\n", p); + p = getbenv("APPEND"); + if (p) + strcpy(p, command); + else + command[0] = 0; +} -- GitLab From 8b22820efb35f93d98638563b0a8f4094e8ee399 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Oct 2020 15:26:24 +0100 Subject: [PATCH 0415/1894] m68k: m68328: remove duplicate code Building a kernel with multiple dragonball based boards enabled needlessly causes a link failure because of duplicate config_BSP() functions between the CPU versions. Avoid that merging the three almost identical files into one, and hiding the CPU configuration behind the board config. The pr_info() lines are consolidated in one place. It is still not possible to run a kernel configured for more than one board, but at least configurations that can be selected can also be built now. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Ungerer --- arch/m68k/68000/Makefile | 5 +-- arch/m68k/68000/m68328.c | 31 ++++++++++------- arch/m68k/68000/m68EZ328.c | 58 -------------------------------- arch/m68k/68000/m68VZ328.c | 67 ------------------------------------- arch/m68k/Kconfig.cpu | 8 ++--- arch/m68k/Kconfig.machine | 16 +++++---- arch/m68k/kernel/setup_no.c | 9 +++++ 7 files changed, 43 insertions(+), 151 deletions(-) delete mode 100644 arch/m68k/68000/m68EZ328.c delete mode 100644 arch/m68k/68000/m68VZ328.c diff --git a/arch/m68k/68000/Makefile b/arch/m68k/68000/Makefile index ce0b26d6580d4..674541fdf5b89 100644 --- a/arch/m68k/68000/Makefile +++ b/arch/m68k/68000/Makefile @@ -10,10 +10,7 @@ # 68328, 68EZ328, 68VZ328 -obj-y += entry.o ints.o timers.o -obj-$(CONFIG_M68328) += m68328.o -obj-$(CONFIG_M68EZ328) += m68EZ328.o -obj-$(CONFIG_M68VZ328) += m68VZ328.o +obj-y += entry.o ints.o timers.o m68328.o obj-$(CONFIG_ROM) += romvec.o obj-$(CONFIG_DRAGEN2) += dragen2.o diff --git a/arch/m68k/68000/m68328.c b/arch/m68k/68000/m68328.c index 6a5cfc977150f..eab08da058c63 100644 --- a/arch/m68k/68000/m68328.c +++ b/arch/m68k/68000/m68328.c @@ -1,10 +1,11 @@ /***************************************************************************/ /* - * m68328.c - 68328 specific config + * m68328.c - 68328/68EZ328/68VZ328 specific config * * Copyright (C) 1993 Hamish Macdonald * Copyright (C) 1999 D. Jeff Dionne + * Copyright (C) 2001 Georges Menie, Ken Desmet * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive @@ -20,8 +21,10 @@ #include #include #include -#include -#if defined(CONFIG_PILOT) || defined(CONFIG_INIT_LCD) + +#if defined(CONFIG_INIT_LCD) && defined(CONFIG_M68VZ328) +#include "bootlogo-vz.h" +#elif defined(CONFIG_PILOT) || defined(CONFIG_INIT_LCD) #include "bootlogo.h" #endif @@ -29,10 +32,7 @@ /***************************************************************************/ - -/***************************************************************************/ - -void m68328_reset (void) +static void m68328_reset(void) { local_irq_disable(); asm volatile ("moveal #0x10c00000, %a0;\n\t" @@ -46,12 +46,19 @@ void m68328_reset (void) void __init config_BSP(char *command, int len) { - pr_info("68328 support D. Jeff Dionne \n"); - pr_info("68328 support Kenneth Albanowski \n"); - pr_info("68328/Pilot support Bernhard Kuhn \n"); + mach_sched_init = hw_timer_init; + mach_hwclk = m68328_hwclk; + mach_reset = m68328_reset; - mach_hwclk = m68328_hwclk; - mach_reset = m68328_reset; +#if defined(CONFIG_PILOT) && defined(CONFIG_M68328) + mach_sched_init = NULL; +#elif defined(CONFIG_UCSIMM) + init_ucsimm(command, len); +#elif defined(CONFIG_UCDIMM) + init_ucsimm(command, len); +#elif defined(CONFIG_DRAGEN2) + init_dragen2(command, len); +#endif } /***************************************************************************/ diff --git a/arch/m68k/68000/m68EZ328.c b/arch/m68k/68000/m68EZ328.c deleted file mode 100644 index 65bd112285ef9..0000000000000 --- a/arch/m68k/68000/m68EZ328.c +++ /dev/null @@ -1,58 +0,0 @@ -/***************************************************************************/ - -/* - * m68EZ328.c - 68EZ328 specific config - * - * Copyright (C) 1993 Hamish Macdonald - * Copyright (C) 1999 D. Jeff Dionne - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ - -/***************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_UCSIMM -#include -#endif - -#include "m68328.h" - -/***************************************************************************/ - -void m68ez328_reset(void) -{ - local_irq_disable(); - asm volatile ( - "moveal #0x10c00000, %a0;\n" - "moveb #0, 0xFFFFF300;\n" - "moveal 0(%a0), %sp;\n" - "moveal 4(%a0), %a0;\n" - "jmp (%a0);\n" - ); -} - -/***************************************************************************/ - -void __init config_BSP(char *command, int len) -{ - pr_info("68EZ328 DragonBallEZ support (C) 1999 Rt-Control, Inc\n"); - -#ifdef CONFIG_UCSIMM - init_ucsimm(command, len); -#endif - - mach_sched_init = hw_timer_init; - mach_hwclk = m68328_hwclk; - mach_reset = m68ez328_reset; -} - -/***************************************************************************/ diff --git a/arch/m68k/68000/m68VZ328.c b/arch/m68k/68000/m68VZ328.c deleted file mode 100644 index 025da5552f1c6..0000000000000 --- a/arch/m68k/68000/m68VZ328.c +++ /dev/null @@ -1,67 +0,0 @@ -/***************************************************************************/ - -/* - * m68VZ328.c - 68VZ328 specific config - * - * Copyright (C) 1993 Hamish Macdonald - * Copyright (C) 1999 D. Jeff Dionne - * Copyright (C) 2001 Georges Menie, Ken Desmet - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ - -/***************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_INIT_LCD -#include "bootlogo-vz.h" -#endif - -#include "m68328.h" - -/***************************************************************************/ -static void m68vz328_reset(void) -{ - local_irq_disable(); - asm volatile ( - "moveal #0x10c00000, %a0;\n\t" - "moveb #0, 0xFFFFF300;\n\t" - "moveal 0(%a0), %sp;\n\t" - "moveal 4(%a0), %a0;\n\t" - "jmp (%a0);\n" - ); -} - -/***************************************************************************/ - -void __init config_BSP(char *command, int size) -{ - pr_info("68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n"); - - mach_sched_init = hw_timer_init; - mach_hwclk = m68328_hwclk; - mach_reset = m68vz328_reset; - -#ifdef CONFIG_UCDIMM - init_ucsimm(command, len); -#elif defined(CONFIG_DRAGEN2) - init_dragen2(command, len); -#endif -} - -/***************************************************************************/ diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 694c4fca9f5dc..a65ce76182326 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -35,7 +35,7 @@ endchoice if M68KCLASSIC config M68000 - bool "MC68000" + bool depends on !MMU select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_MULDIV64 @@ -102,21 +102,21 @@ config M68060 processor, say Y. Otherwise, say N. config M68328 - bool "MC68328" + bool depends on !MMU select M68000 help Motorola 68328 processor support. config M68EZ328 - bool "MC68EZ328" + bool depends on !MMU select M68000 help Motorola 68EX328 processor support. config M68VZ328 - bool "MC68VZ328" + bool depends on !MMU select M68000 help diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 17e8c3a292d77..1851c66e86671 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -136,14 +136,13 @@ config SUN3 If you don't want to compile a kernel exclusively for a Sun 3, say N. -endif # M68KCLASSIC - config PILOT bool config PILOT3 bool "Pilot 1000/5000, PalmPilot Personal/Pro, or PalmIII support" - depends on M68328 + depends on !MMU + select M68328 select PILOT help Support for the Palm Pilot 1000/5000, Personal/Pro and PalmIII. @@ -156,19 +155,22 @@ config XCOPILOT_BUGS config UCSIMM bool "uCsimm module support" - depends on M68EZ328 + depends on !MMU + select M68EZ328 help Support for the Arcturus Networks uCsimm module. config UCDIMM bool "uDsimm module support" - depends on M68VZ328 + depends on !MMU + select M68VZ328 help Support for the Arcturus Networks uDsimm module. config DRAGEN2 bool "DragenEngine II board support" - depends on M68VZ328 + depends on !MMU + select M68VZ328 help Support for the DragenEngine II board. @@ -191,6 +193,8 @@ config MEMORY_RESERVE help Reserve certain memory regions on 68x328 based boards. +endif # M68KCLASSIC + config ARN5206 bool "Arnewsh 5206 board support" depends on M5206 diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index f66f4b1d062ed..58dbe10ccf56b 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -106,8 +106,16 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_UCDIMM pr_info("uCdimm by Lineo, Inc. \n"); #endif +#ifdef CONFIG_M68328 + pr_info("68328 support D. Jeff Dionne \n"); + pr_info("68328 support Kenneth Albanowski \n"); +#endif +#ifdef CONFIG_M68EZ328 + pr_info("68EZ328 DragonBallEZ support (C) 1999 Rt-Control, Inc\n"); +#endif #ifdef CONFIG_M68VZ328 pr_info("M68VZ328 support by Evan Stawnyczy \n"); + pr_info("68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n"); #endif #ifdef CONFIG_COLDFIRE pr_info("COLDFIRE port done by Greg Ungerer, gerg@snapgear.com\n"); @@ -121,6 +129,7 @@ void __init setup_arch(char **cmdline_p) pr_info("Flat model support (C) 1998,1999 Kenneth Albanowski, D. Jeff Dionne\n"); #if defined( CONFIG_PILOT ) && defined( CONFIG_M68328 ) + pr_info("68328/Pilot support Bernhard Kuhn \n"); pr_info("TRG SuperPilot FLASH card support \n"); #endif #if defined( CONFIG_PILOT ) && defined( CONFIG_M68EZ328 ) -- GitLab From ec76c2eea903947202098090bbe07a739b5246e9 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 4 Dec 2020 10:55:39 +0100 Subject: [PATCH 0416/1894] ARM: OMAP2+: omap_device: fix idling of devices during probe On the GTA04A5 od->_driver_status was not set to BUS_NOTIFY_BIND_DRIVER during probe of the second mmc used for wifi. Therefore omap_device_late_idle idled the device during probing causing oopses when accessing the registers. It was not set because od->_state was set to OMAP_DEVICE_STATE_IDLE in the notifier callback. Therefore set od->_driver_status also in that case. This came apparent after commit 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") causing this oops: omap_hsmmc 480b4000.mmc: omap_device_late_idle: enabled but no driver. Idling 8<--- cut here --- Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa0b402c ... (omap_hsmmc_set_bus_width) from [] (omap_hsmmc_set_ios+0x11c/0x258) (omap_hsmmc_set_ios) from [] (mmc_power_up.part.8+0x3c/0xd0) (mmc_power_up.part.8) from [] (mmc_start_host+0x88/0x9c) (mmc_start_host) from [] (mmc_add_host+0x58/0x84) (mmc_add_host) from [] (omap_hsmmc_probe+0x5fc/0x8c0) (omap_hsmmc_probe) from [] (platform_drv_probe+0x48/0x98) (platform_drv_probe) from [] (really_probe+0x1dc/0x3b4) Fixes: 04abaf07f6d5 ("ARM: OMAP2+: omap_device: Sync omap_device and pm_runtime after probe defer") Fixes: 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") Acked-by: Ulf Hansson Signed-off-by: Andreas Kemnade [tony@atomide.com: left out extra parens, trimmed description stack trace] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index fc7bb2ca16727..64b23b0cd23c7 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -230,10 +230,12 @@ static int _omap_device_notifier_call(struct notifier_block *nb, break; case BUS_NOTIFY_BIND_DRIVER: od = to_omap_device(pdev); - if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) && - pm_runtime_status_suspended(dev)) { + if (od) { od->_driver_status = BUS_NOTIFY_BIND_DRIVER; - pm_runtime_set_active(dev); + if (od->_state == OMAP_DEVICE_STATE_ENABLED && + pm_runtime_status_suspended(dev)) { + pm_runtime_set_active(dev); + } } break; case BUS_NOTIFY_ADD_DEVICE: -- GitLab From 2f6fc9e08bf79f11516edef855283c6212bbe78f Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Tue, 1 Dec 2020 20:12:37 +0100 Subject: [PATCH 0417/1894] ARM: omap2plus_defconfig: enable SPI GPIO GTA04 uses that for controlling the td028ttec1 panel. So for easier testing/bisecting it is useful to have it enabled in the defconfig. Signed-off-by: Andreas Kemnade Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 23b53526c4eb3..28add0b64d424 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -280,6 +280,7 @@ CONFIG_SERIAL_OMAP_CONSOLE=y CONFIG_SERIAL_DEV_BUS=y CONFIG_I2C_CHARDEV=y CONFIG_SPI=y +CONFIG_SPI_GPIO=m CONFIG_SPI_OMAP24XX=y CONFIG_SPI_TI_QSPI=m CONFIG_HSI=m -- GitLab From 91c1c092f27da4164d55ca81e0a483108f8a3235 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Dec 2020 17:33:33 +0100 Subject: [PATCH 0418/1894] efi: capsule: use atomic kmap for transient sglist mappings Don't use the heavy-weight kmap() API to create short-lived mappings of the scatter-gather list entries that are released as soon as the entries are written. Instead, use kmap_atomic(), which is more suited to this purpose. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/capsule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c index 598b7800d14ec..43f6fe7bfe80f 100644 --- a/drivers/firmware/efi/capsule.c +++ b/drivers/firmware/efi/capsule.c @@ -244,7 +244,7 @@ int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages) for (i = 0; i < sg_count; i++) { efi_capsule_block_desc_t *sglist; - sglist = kmap(sg_pages[i]); + sglist = kmap_atomic(sg_pages[i]); for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) { u64 sz = min_t(u64, imagesize, @@ -265,7 +265,7 @@ int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages) else sglist[j].data = page_to_phys(sg_pages[i + 1]); - kunmap(sg_pages[i]); + kunmap_atomic(sglist); } mutex_lock(&capsule_mutex); -- GitLab From 16a30ba15cd782e95fc57e87c2c4515778d3b493 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:50:29 +0100 Subject: [PATCH 0419/1894] sh: boards: Replace by The SuperH/J2 DT platform code is not a clock provider, and just needs to call of_clk_init(). Hence it can include instead of . Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201110155029.3286090-1-geert+renesas@glider.be Signed-off-by: Stephen Boyd --- arch/sh/boards/of-generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index bffbe69b2236a..921d76fc33583 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -6,10 +6,10 @@ */ #include +#include #include #include #include -#include #include #include -- GitLab From 047b04201f628c038cdf4879ed51513215dd8780 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:51:17 +0100 Subject: [PATCH 0420/1894] xtensa: Replace by The Xtensa time code is not a clock provider, and just needs to call of_clk_init(). Hence it can include instead of . Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Acked-by: Max Filippov Link: https://lore.kernel.org/r/20201110155117.3286247-1-geert+renesas@glider.be Signed-off-by: Stephen Boyd --- arch/xtensa/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 77971fe4cc95b..e8ceb15286081 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -13,7 +13,7 @@ */ #include -#include +#include #include #include #include -- GitLab From b8bcece8a77fe0deedb7afc115881468d9c1617c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:47:50 +0100 Subject: [PATCH 0421/1894] clk: qoriq: Add platform dependencies The Freescale QorIQ clock controller is only present on Freescale E500MC and Layerscape SoCs. Add platform dependencies to the CLK_QORIQ config symbol, to avoid asking the user about it when configuring a kernel without E500MC or Layerscape support. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Acked-by: Li Yang Link: https://lore.kernel.org/r/20201110154750.3285411-1-geert+renesas@glider.be Signed-off-by: Stephen Boyd --- drivers/clk/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index c715d4681a0b8..969080401a830 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -246,7 +246,8 @@ config COMMON_CLK_AXI_CLKGEN config CLK_QORIQ bool "Clock driver for Freescale QorIQ platforms" - depends on (PPC_E500MC || ARM || ARM64 || COMPILE_TEST) && OF + depends on OF + depends on PPC_E500MC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST help This adds the clock driver support for Freescale QorIQ platforms using common clock framework. -- GitLab From e81bed419f032824e7ddf8b5630153be6637e480 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 5 Nov 2020 20:27:44 +0100 Subject: [PATCH 0422/1894] clk: fsl-sai: fix memory leak If the device is removed we don't unregister the composite clock. Fix that. Fixes: 9cd10205227c ("clk: fsl-sai: new driver") Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201105192746.19564-2-michael@walle.cc Signed-off-by: Stephen Boyd --- drivers/clk/clk-fsl-sai.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/clk-fsl-sai.c b/drivers/clk/clk-fsl-sai.c index 0221180a4dd73..1e81c8d8a6fd3 100644 --- a/drivers/clk/clk-fsl-sai.c +++ b/drivers/clk/clk-fsl-sai.c @@ -68,9 +68,20 @@ static int fsl_sai_clk_probe(struct platform_device *pdev) if (IS_ERR(hw)) return PTR_ERR(hw); + platform_set_drvdata(pdev, hw); + return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw); } +static int fsl_sai_clk_remove(struct platform_device *pdev) +{ + struct clk_hw *hw = platform_get_drvdata(pdev); + + clk_hw_unregister_composite(hw); + + return 0; +} + static const struct of_device_id of_fsl_sai_clk_ids[] = { { .compatible = "fsl,vf610-sai-clock" }, { } @@ -79,6 +90,7 @@ MODULE_DEVICE_TABLE(of, of_fsl_sai_clk_ids); static struct platform_driver fsl_sai_clk_driver = { .probe = fsl_sai_clk_probe, + .remove = fsl_sai_clk_remove, .driver = { .name = "fsl-sai-clk", .of_match_table = of_fsl_sai_clk_ids, -- GitLab From 0eba770790426553f45b8643bcd77b854e045057 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 5 Nov 2020 20:27:45 +0100 Subject: [PATCH 0423/1894] clk: composite: add devm_clk_hw_register_composite_pdata() This will simplify drivers which would only unregister the clk in their remove() op. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201105192746.19564-3-michael@walle.cc Signed-off-by: Stephen Boyd --- drivers/clk/clk-composite.c | 50 ++++++++++++++++++++++++++++++++++++ include/linux/clk-provider.h | 7 +++++ 2 files changed, 57 insertions(+) diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index 2ddb54f7d3abc..0506046a5f4b9 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -4,6 +4,7 @@ */ #include +#include #include #include @@ -405,3 +406,52 @@ void clk_hw_unregister_composite(struct clk_hw *hw) kfree(composite); } EXPORT_SYMBOL_GPL(clk_hw_unregister_composite); + +static void devm_clk_hw_release_composite(struct device *dev, void *res) +{ + clk_hw_unregister_composite(*(struct clk_hw **)res); +} + +static struct clk_hw *__devm_clk_hw_register_composite(struct device *dev, + const char *name, const char * const *parent_names, + const struct clk_parent_data *pdata, int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags) +{ + struct clk_hw **ptr, *hw; + + ptr = devres_alloc(devm_clk_hw_release_composite, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + hw = __clk_hw_register_composite(dev, name, parent_names, pdata, + num_parents, mux_hw, mux_ops, rate_hw, + rate_ops, gate_hw, gate_ops, flags); + + if (!IS_ERR(hw)) { + *ptr = hw; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return hw; +} + +struct clk_hw *devm_clk_hw_register_composite_pdata(struct device *dev, + const char *name, + const struct clk_parent_data *parent_data, + int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags) +{ + return __devm_clk_hw_register_composite(dev, name, NULL, parent_data, + num_parents, mux_hw, mux_ops, + rate_hw, rate_ops, gate_hw, + gate_ops, flags); +} diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 03a5de5f99f4a..33db52ff83a0d 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1062,6 +1062,13 @@ struct clk_hw *clk_hw_register_composite_pdata(struct device *dev, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags); +struct clk_hw *devm_clk_hw_register_composite_pdata(struct device *dev, + const char *name, const struct clk_parent_data *parent_data, + int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags); void clk_hw_unregister_composite(struct clk_hw *hw); struct clk *clk_register(struct device *dev, struct clk_hw *hw); -- GitLab From fb8715157b6a16cf93a14109ebc8a6440a182a82 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 5 Nov 2020 20:27:46 +0100 Subject: [PATCH 0424/1894] clk: fsl-sai: use devm_clk_hw_register_composite_pdata() Simplify the driver by using that helper and drop the remove() function. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201105192746.19564-4-michael@walle.cc Signed-off-by: Stephen Boyd --- drivers/clk/clk-fsl-sai.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/clk/clk-fsl-sai.c b/drivers/clk/clk-fsl-sai.c index 1e81c8d8a6fd3..6238fcea04673 100644 --- a/drivers/clk/clk-fsl-sai.c +++ b/drivers/clk/clk-fsl-sai.c @@ -58,30 +58,19 @@ static int fsl_sai_clk_probe(struct platform_device *pdev) /* set clock direction, we are the BCLK master */ writel(CR2_BCD, base + I2S_CR2); - hw = clk_hw_register_composite_pdata(dev, dev->of_node->name, - &pdata, 1, NULL, NULL, - &sai_clk->div.hw, - &clk_divider_ops, - &sai_clk->gate.hw, - &clk_gate_ops, - CLK_SET_RATE_GATE); + hw = devm_clk_hw_register_composite_pdata(dev, dev->of_node->name, + &pdata, 1, NULL, NULL, + &sai_clk->div.hw, + &clk_divider_ops, + &sai_clk->gate.hw, + &clk_gate_ops, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) return PTR_ERR(hw); - platform_set_drvdata(pdev, hw); - return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw); } -static int fsl_sai_clk_remove(struct platform_device *pdev) -{ - struct clk_hw *hw = platform_get_drvdata(pdev); - - clk_hw_unregister_composite(hw); - - return 0; -} - static const struct of_device_id of_fsl_sai_clk_ids[] = { { .compatible = "fsl,vf610-sai-clock" }, { } @@ -90,7 +79,6 @@ MODULE_DEVICE_TABLE(of, of_fsl_sai_clk_ids); static struct platform_driver fsl_sai_clk_driver = { .probe = fsl_sai_clk_probe, - .remove = fsl_sai_clk_remove, .driver = { .name = "fsl-sai-clk", .of_match_table = of_fsl_sai_clk_ids, -- GitLab From 4cb15934ba05b49784d9d47778af308e7ea50b69 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 8 Nov 2020 19:51:07 +0100 Subject: [PATCH 0425/1894] clk: qoriq: provide constants for the type To avoid future mistakes in the device tree for the clockgen module, add constants for the clockgen subtype as well as a macro for the PLL divider. Signed-off-by: Michael Walle Acked-by: Rob Herring Link: https://lore.kernel.org/r/20201108185113.31377-4-michael@walle.cc Signed-off-by: Stephen Boyd --- drivers/clk/clk-qoriq.c | 13 +++++++------ include/dt-bindings/clock/fsl,qoriq-clockgen.h | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 include/dt-bindings/clock/fsl,qoriq-clockgen.h diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c index 46101c6a20f26..70aa521e7e7fa 100644 --- a/drivers/clk/clk-qoriq.c +++ b/drivers/clk/clk-qoriq.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1368,33 +1369,33 @@ static struct clk *clockgen_clk_get(struct of_phandle_args *clkspec, void *data) idx = clkspec->args[1]; switch (type) { - case 0: + case QORIQ_CLK_SYSCLK: if (idx != 0) goto bad_args; clk = cg->sysclk; break; - case 1: + case QORIQ_CLK_CMUX: if (idx >= ARRAY_SIZE(cg->cmux)) goto bad_args; clk = cg->cmux[idx]; break; - case 2: + case QORIQ_CLK_HWACCEL: if (idx >= ARRAY_SIZE(cg->hwaccel)) goto bad_args; clk = cg->hwaccel[idx]; break; - case 3: + case QORIQ_CLK_FMAN: if (idx >= ARRAY_SIZE(cg->fman)) goto bad_args; clk = cg->fman[idx]; break; - case 4: + case QORIQ_CLK_PLATFORM_PLL: pll = &cg->pll[PLATFORM_PLL]; if (idx >= ARRAY_SIZE(pll->div)) goto bad_args; clk = pll->div[idx].clk; break; - case 5: + case QORIQ_CLK_CORECLK: if (idx != 0) goto bad_args; clk = cg->coreclk; diff --git a/include/dt-bindings/clock/fsl,qoriq-clockgen.h b/include/dt-bindings/clock/fsl,qoriq-clockgen.h new file mode 100644 index 0000000000000..ddec7d0bdc7fe --- /dev/null +++ b/include/dt-bindings/clock/fsl,qoriq-clockgen.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef DT_CLOCK_FSL_QORIQ_CLOCKGEN_H +#define DT_CLOCK_FSL_QORIQ_CLOCKGEN_H + +#define QORIQ_CLK_SYSCLK 0 +#define QORIQ_CLK_CMUX 1 +#define QORIQ_CLK_HWACCEL 2 +#define QORIQ_CLK_FMAN 3 +#define QORIQ_CLK_PLATFORM_PLL 4 +#define QORIQ_CLK_CORECLK 5 + +#define QORIQ_CLK_PLL_DIV(x) ((x) - 1) + +#endif /* DT_CLOCK_FSL_QORIQ_CLOCKGEN_H */ -- GitLab From 26792699fe3681102aa85f4ae6d39e80a6a7e6b6 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 8 Nov 2020 19:51:09 +0100 Subject: [PATCH 0426/1894] clk: divider: add devm_clk_hw_register_divider_table() This will simplify drivers which would only unregister the clk in their remove() op. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201108185113.31377-6-michael@walle.cc Signed-off-by: Stephen Boyd --- drivers/clk/clk-divider.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/clk-provider.h | 27 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 8de12cb0c43d8..c499799693ccc 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -578,3 +579,36 @@ void clk_hw_unregister_divider(struct clk_hw *hw) kfree(div); } EXPORT_SYMBOL_GPL(clk_hw_unregister_divider); + +static void devm_clk_hw_release_divider(struct device *dev, void *res) +{ + clk_hw_unregister_divider(*(struct clk_hw **)res); +} + +struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, + struct device_node *np, const char *name, + const char *parent_name, const struct clk_hw *parent_hw, + const struct clk_parent_data *parent_data, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock) +{ + struct clk_hw **ptr, *hw; + + ptr = devres_alloc(devm_clk_hw_release_divider, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + hw = __clk_hw_register_divider(dev, np, name, parent_name, parent_hw, + parent_data, flags, reg, shift, width, + clk_divider_flags, table, lock); + + if (!IS_ERR(hw)) { + *ptr = hw; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return hw; +} +EXPORT_SYMBOL_GPL(__devm_clk_hw_register_divider); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 33db52ff83a0d..5f896df01f83b 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -639,6 +639,12 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, const struct clk_parent_data *parent_data, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); +struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, + struct device_node *np, const char *name, + const char *parent_name, const struct clk_hw *parent_hw, + const struct clk_parent_data *parent_data, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, @@ -779,6 +785,27 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, (parent_data), (flags), (reg), (shift), \ (width), (clk_divider_flags), (table), \ (lock)) +/** + * devm_clk_hw_register_divider_table - register a table based divider clock + * with the clock framework (devres variant) + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @table: array of divider/value pairs ending with a div set to 0 + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_divider_table(dev, name, parent_name, flags, \ + reg, shift, width, \ + clk_divider_flags, table, lock) \ + __devm_clk_hw_register_divider((dev), NULL, (name), (parent_name), \ + NULL, NULL, (flags), (reg), (shift), \ + (width), (clk_divider_flags), (table), \ + (lock)) void clk_unregister_divider(struct clk *clk); void clk_hw_unregister_divider(struct clk_hw *hw); -- GitLab From e577af82c72fa928a4ab670fe5ac2b0b7b14fceb Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 8 Nov 2020 19:51:10 +0100 Subject: [PATCH 0427/1894] dt-bindings: clock: document the fsl-flexspi-clk device Signed-off-by: Michael Walle Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201108185113.31377-7-michael@walle.cc [sboyd@kernel.org: DT bindings aren't drivers] Signed-off-by: Stephen Boyd --- .../bindings/clock/fsl,flexspi-clock.yaml | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml diff --git a/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml b/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml new file mode 100644 index 0000000000000..1fa390ee7b9b6 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/fsl,flexspi-clock.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale FlexSPI clock driver for Layerscape SoCs + +maintainers: + - Michael Walle + +description: + The Freescale Layerscape SoCs have a special FlexSPI clock which is + derived from the platform PLL. + +properties: + compatible: + enum: + - fsl,ls1028a-flexspi-clk + - fsl,lx2160a-flexspi-clk + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + '#clock-cells': + const: 0 + + clock-output-names: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - '#clock-cells' + +additionalProperties: false + +examples: + - | + dcfg { + #address-cells = <1>; + #size-cells = <1>; + + fspi_clk: clock-controller@900 { + compatible = "fsl,ls1028a-flexspi-clk"; + reg = <0x900 0x4>; + #clock-cells = <0>; + clocks = <&parentclk>; + clock-output-names = "fspi_clk"; + }; + }; -- GitLab From fcf77be87eacb8f305528d24d892dfcf15cf0341 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 8 Nov 2020 19:51:11 +0100 Subject: [PATCH 0428/1894] clk: fsl-flexspi: new driver Add support for the FlexSPI clock on Freescale Layerscape SoCs. The clock is a simple divider based one and is located inside the device configuration space (DCFG). This will allow switching the SCK frequencies for the FlexSPI interface on the LS1028A and the LX2160A. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201108185113.31377-8-michael@walle.cc [sboyd@kernel.org: Drop modalias, add module table] Signed-off-by: Stephen Boyd --- drivers/clk/Kconfig | 8 +++ drivers/clk/Makefile | 1 + drivers/clk/clk-fsl-flexspi.c | 106 ++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 drivers/clk/clk-fsl-flexspi.c diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 969080401a830..85856cff506c0 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -188,6 +188,14 @@ config COMMON_CLK_CS2000_CP help If you say yes here you get support for the CS2000 clock multiplier. +config COMMON_CLK_FSL_FLEXSPI + tristate "Clock driver for FlexSPI on Layerscape SoCs" + depends on ARCH_LAYERSCAPE || COMPILE_TEST + default ARCH_LAYERSCAPE && SPI_NXP_FLEXSPI + help + On Layerscape SoCs there is a special clock for the FlexSPI + interface. + config COMMON_CLK_FSL_SAI bool "Clock driver for BCLK of Freescale SAI cores" depends on ARCH_LAYERSCAPE || COMPILE_TEST diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index da8fcf147eb13..dbdc590e7de3a 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_COMMON_CLK_CS2000_CP) += clk-cs2000-cp.o obj-$(CONFIG_ARCH_EFM32) += clk-efm32gg.o obj-$(CONFIG_ARCH_SPARX5) += clk-sparx5.o obj-$(CONFIG_COMMON_CLK_FIXED_MMIO) += clk-fixed-mmio.o +obj-$(CONFIG_COMMON_CLK_FSL_FLEXSPI) += clk-fsl-flexspi.o obj-$(CONFIG_COMMON_CLK_FSL_SAI) += clk-fsl-sai.o obj-$(CONFIG_COMMON_CLK_GEMINI) += clk-gemini.o obj-$(CONFIG_COMMON_CLK_ASPEED) += clk-aspeed.o diff --git a/drivers/clk/clk-fsl-flexspi.c b/drivers/clk/clk-fsl-flexspi.c new file mode 100644 index 0000000000000..8432d681e2d96 --- /dev/null +++ b/drivers/clk/clk-fsl-flexspi.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Layerscape FlexSPI clock driver + * + * Copyright 2020 Michael Walle + */ + +#include +#include +#include +#include + +static const struct clk_div_table ls1028a_flexspi_divs[] = { + { .val = 0, .div = 1, }, + { .val = 1, .div = 2, }, + { .val = 2, .div = 3, }, + { .val = 3, .div = 4, }, + { .val = 4, .div = 5, }, + { .val = 5, .div = 6, }, + { .val = 6, .div = 7, }, + { .val = 7, .div = 8, }, + { .val = 11, .div = 12, }, + { .val = 15, .div = 16, }, + { .val = 16, .div = 20, }, + { .val = 17, .div = 24, }, + { .val = 18, .div = 28, }, + { .val = 19, .div = 32, }, + { .val = 20, .div = 80, }, + {} +}; + +static const struct clk_div_table lx2160a_flexspi_divs[] = { + { .val = 1, .div = 2, }, + { .val = 3, .div = 4, }, + { .val = 5, .div = 6, }, + { .val = 7, .div = 8, }, + { .val = 11, .div = 12, }, + { .val = 15, .div = 16, }, + { .val = 16, .div = 20, }, + { .val = 17, .div = 24, }, + { .val = 18, .div = 28, }, + { .val = 19, .div = 32, }, + { .val = 20, .div = 80, }, + {} +}; + +static int fsl_flexspi_clk_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + const char *clk_name = np->name; + const char *clk_parent; + struct resource *res; + void __iomem *reg; + struct clk_hw *hw; + const struct clk_div_table *divs; + + divs = device_get_match_data(dev); + if (!divs) + return -ENOENT; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; + + /* + * Can't use devm_ioremap_resource() or devm_of_iomap() because the + * resource might already be taken by the parent device. + */ + reg = devm_ioremap(dev, res->start, resource_size(res)); + if (!reg) + return -ENOMEM; + + clk_parent = of_clk_get_parent_name(np, 0); + if (!clk_parent) + return -EINVAL; + + of_property_read_string(np, "clock-output-names", &clk_name); + + hw = devm_clk_hw_register_divider_table(dev, clk_name, clk_parent, 0, + reg, 0, 5, 0, divs, NULL); + if (IS_ERR(hw)) + return PTR_ERR(hw); + + return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw); +} + +static const struct of_device_id fsl_flexspi_clk_dt_ids[] = { + { .compatible = "fsl,ls1028a-flexspi-clk", .data = &ls1028a_flexspi_divs }, + { .compatible = "fsl,lx2160a-flexspi-clk", .data = &lx2160a_flexspi_divs }, + {} +}; +MODULE_DEVICE_TABLE(of, fsl_flexspi_clk_dt_ids); + +static struct platform_driver fsl_flexspi_clk_driver = { + .driver = { + .name = "fsl-flexspi-clk", + .of_match_table = fsl_flexspi_clk_dt_ids, + }, + .probe = fsl_flexspi_clk_probe, +}; +module_platform_driver(fsl_flexspi_clk_driver); + +MODULE_DESCRIPTION("FlexSPI clock driver for Layerscape SoCs"); +MODULE_AUTHOR("Michael Walle "); +MODULE_LICENSE("GPL"); -- GitLab From a13ae5a3797aa86623c906604fb5a884b832a685 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 26 Nov 2020 12:58:39 +0530 Subject: [PATCH 0429/1894] dt-bindings: clock: Add SDX55 GCC clock bindings Add device tree bindings for global clock controller on SDX55 SoCs. Signed-off-by: Vinod Koul Signed-off-by: Manivannan Sadhasivam Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201126072844.35370-2-manivannan.sadhasivam@linaro.org Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,gcc-sdx55.yaml | 77 ++++++++++++ include/dt-bindings/clock/qcom,gcc-sdx55.h | 112 ++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml create mode 100644 include/dt-bindings/clock/qcom,gcc-sdx55.h diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml new file mode 100644 index 0000000000000..1121b3934cb90 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gcc-sdx55.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller Binding for SDX55 + +maintainers: + - Vinod Koul + - Manivannan Sadhasivam + +description: | + Qualcomm global clock control module which supports the clocks, resets and + power domains on SDX55 + + See also: + - dt-bindings/clock/qcom,gcc-sdx55.h + +properties: + compatible: + const: qcom,gcc-sdx55 + + clocks: + items: + - description: Board XO source + - description: Sleep clock source + - description: PLL test clock source (Optional clock) + minItems: 2 + maxItems: 3 + + clock-names: + items: + - const: bi_tcxo + - const: sleep_clk + - const: core_bi_pll_test_se # Optional clock + minItems: 2 + maxItems: 3 + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - clocks + - clock-names + - reg + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +additionalProperties: false + +examples: + - | + #include + clock-controller@100000 { + compatible = "qcom,gcc-sdx55"; + reg = <0x00100000 0x1f0000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&sleep_clk>, <&pll_test_clk>; + clock-names = "bi_tcxo", "sleep_clk", "core_bi_pll_test_se"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; + +... diff --git a/include/dt-bindings/clock/qcom,gcc-sdx55.h b/include/dt-bindings/clock/qcom,gcc-sdx55.h new file mode 100644 index 0000000000000..c372451b34617 --- /dev/null +++ b/include/dt-bindings/clock/qcom,gcc-sdx55.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2020, Linaro Ltd. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SDX55_H +#define _DT_BINDINGS_CLK_QCOM_GCC_SDX55_H + +#define GPLL0 3 +#define GPLL0_OUT_EVEN 4 +#define GPLL4 5 +#define GPLL4_OUT_EVEN 6 +#define GPLL5 7 +#define GCC_AHB_PCIE_LINK_CLK 8 +#define GCC_BLSP1_AHB_CLK 9 +#define GCC_BLSP1_QUP1_I2C_APPS_CLK 10 +#define GCC_BLSP1_QUP1_I2C_APPS_CLK_SRC 11 +#define GCC_BLSP1_QUP1_SPI_APPS_CLK 12 +#define GCC_BLSP1_QUP1_SPI_APPS_CLK_SRC 13 +#define GCC_BLSP1_QUP2_I2C_APPS_CLK 14 +#define GCC_BLSP1_QUP2_I2C_APPS_CLK_SRC 15 +#define GCC_BLSP1_QUP2_SPI_APPS_CLK 16 +#define GCC_BLSP1_QUP2_SPI_APPS_CLK_SRC 17 +#define GCC_BLSP1_QUP3_I2C_APPS_CLK 18 +#define GCC_BLSP1_QUP3_I2C_APPS_CLK_SRC 19 +#define GCC_BLSP1_QUP3_SPI_APPS_CLK 20 +#define GCC_BLSP1_QUP3_SPI_APPS_CLK_SRC 21 +#define GCC_BLSP1_QUP4_I2C_APPS_CLK 22 +#define GCC_BLSP1_QUP4_I2C_APPS_CLK_SRC 23 +#define GCC_BLSP1_QUP4_SPI_APPS_CLK 24 +#define GCC_BLSP1_QUP4_SPI_APPS_CLK_SRC 25 +#define GCC_BLSP1_UART1_APPS_CLK 26 +#define GCC_BLSP1_UART1_APPS_CLK_SRC 27 +#define GCC_BLSP1_UART2_APPS_CLK 28 +#define GCC_BLSP1_UART2_APPS_CLK_SRC 29 +#define GCC_BLSP1_UART3_APPS_CLK 30 +#define GCC_BLSP1_UART3_APPS_CLK_SRC 31 +#define GCC_BLSP1_UART4_APPS_CLK 32 +#define GCC_BLSP1_UART4_APPS_CLK_SRC 33 +#define GCC_BOOT_ROM_AHB_CLK 34 +#define GCC_CE1_AHB_CLK 35 +#define GCC_CE1_AXI_CLK 36 +#define GCC_CE1_CLK 37 +#define GCC_CPUSS_AHB_CLK 38 +#define GCC_CPUSS_AHB_CLK_SRC 39 +#define GCC_CPUSS_GNOC_CLK 40 +#define GCC_CPUSS_RBCPR_CLK 41 +#define GCC_CPUSS_RBCPR_CLK_SRC 42 +#define GCC_EMAC_CLK_SRC 43 +#define GCC_EMAC_PTP_CLK_SRC 44 +#define GCC_ETH_AXI_CLK 45 +#define GCC_ETH_PTP_CLK 46 +#define GCC_ETH_RGMII_CLK 47 +#define GCC_ETH_SLAVE_AHB_CLK 48 +#define GCC_GP1_CLK 49 +#define GCC_GP1_CLK_SRC 50 +#define GCC_GP2_CLK 51 +#define GCC_GP2_CLK_SRC 52 +#define GCC_GP3_CLK 53 +#define GCC_GP3_CLK_SRC 54 +#define GCC_PCIE_0_CLKREF_CLK 55 +#define GCC_PCIE_AUX_CLK 56 +#define GCC_PCIE_AUX_PHY_CLK_SRC 57 +#define GCC_PCIE_CFG_AHB_CLK 58 +#define GCC_PCIE_MSTR_AXI_CLK 59 +#define GCC_PCIE_PIPE_CLK 60 +#define GCC_PCIE_RCHNG_PHY_CLK 61 +#define GCC_PCIE_RCHNG_PHY_CLK_SRC 62 +#define GCC_PCIE_SLEEP_CLK 63 +#define GCC_PCIE_SLV_AXI_CLK 64 +#define GCC_PCIE_SLV_Q2A_AXI_CLK 65 +#define GCC_PDM2_CLK 66 +#define GCC_PDM2_CLK_SRC 67 +#define GCC_PDM_AHB_CLK 68 +#define GCC_PDM_XO4_CLK 69 +#define GCC_SDCC1_AHB_CLK 70 +#define GCC_SDCC1_APPS_CLK 71 +#define GCC_SDCC1_APPS_CLK_SRC 72 +#define GCC_SYS_NOC_CPUSS_AHB_CLK 73 +#define GCC_USB30_MASTER_CLK 74 +#define GCC_USB30_MASTER_CLK_SRC 75 +#define GCC_USB30_MOCK_UTMI_CLK 76 +#define GCC_USB30_MOCK_UTMI_CLK_SRC 77 +#define GCC_USB30_MSTR_AXI_CLK 78 +#define GCC_USB30_SLEEP_CLK 79 +#define GCC_USB30_SLV_AHB_CLK 80 +#define GCC_USB3_PHY_AUX_CLK 81 +#define GCC_USB3_PHY_AUX_CLK_SRC 82 +#define GCC_USB3_PHY_PIPE_CLK 83 +#define GCC_USB3_PRIM_CLKREF_CLK 84 +#define GCC_USB_PHY_CFG_AHB2PHY_CLK 85 +#define GCC_XO_DIV4_CLK 86 +#define GCC_XO_PCIE_LINK_CLK 87 + +#define GCC_EMAC_BCR 0 +#define GCC_PCIE_BCR 1 +#define GCC_PCIE_LINK_DOWN_BCR 2 +#define GCC_PCIE_NOCSR_COM_PHY_BCR 3 +#define GCC_PCIE_PHY_BCR 4 +#define GCC_PCIE_PHY_CFG_AHB_BCR 5 +#define GCC_PCIE_PHY_COM_BCR 6 +#define GCC_PCIE_PHY_NOCSR_COM_PHY_BCR 7 +#define GCC_PDM_BCR 8 +#define GCC_QUSB2PHY_BCR 9 +#define GCC_TCSR_PCIE_BCR 10 +#define GCC_USB30_BCR 11 +#define GCC_USB3_PHY_BCR 12 +#define GCC_USB3PHY_PHY_BCR 13 +#define GCC_USB_PHY_CFG_AHB2PHY_BCR 14 + +#endif -- GitLab From 3fade566c07abd54ad8324326a4a14f2b6c13e3d Mon Sep 17 00:00:00 2001 From: Naveen Yadav Date: Thu, 26 Nov 2020 12:58:40 +0530 Subject: [PATCH 0430/1894] clk: qcom: Add SDX55 GCC support Add Global Clock Controller (GCC) support for SDX55 SoCs from Qualcomm. Signed-off-by: Naveen Yadav [mani: converted to parent_data, commented critical clocks, cleanups] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201126072844.35370-3-manivannan.sadhasivam@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 7 + drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/gcc-sdx55.c | 1626 ++++++++++++++++++++++++++++++++++ 3 files changed, 1634 insertions(+) create mode 100644 drivers/clk/qcom/gcc-sdx55.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 8c8b568609d4b..3555eac37495b 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -422,6 +422,13 @@ config SDM_LPASSCC_845 Say Y if you want to use the LPASS branch clocks of the LPASS clock controller to reset the LPASS subsystem. +config SDX_GCC_55 + tristate "SDX55 Global Clock Controller" + help + Support for the global clock controller on SDX55 devices. + Say Y if you want to use peripheral devices such as UART, + SPI, I2C, USB, SD/UFS, PCIe etc. + config SM_DISPCC_8250 tristate "SM8150 and SM8250 Display Clock Controller" depends on SM_GCC_8150 || SM_GCC_8250 diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index 57b6349a3ee8e..9e5e0e3cb7b48 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o obj-$(CONFIG_SDM_GPUCC_845) += gpucc-sdm845.o obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o +obj-$(CONFIG_SDX_GCC_55) += gcc-sdx55.o obj-$(CONFIG_SM_DISPCC_8250) += dispcc-sm8250.o obj-$(CONFIG_SM_GCC_8150) += gcc-sm8150.o obj-$(CONFIG_SM_GCC_8250) += gcc-sm8250.o diff --git a/drivers/clk/qcom/gcc-sdx55.c b/drivers/clk/qcom/gcc-sdx55.c new file mode 100644 index 0000000000000..bf114165e24b6 --- /dev/null +++ b/drivers/clk/qcom/gcc-sdx55.c @@ -0,0 +1,1626 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2020, Linaro Ltd. + */ + +#include +#include +#include +#include + +#include + +#include "common.h" +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-pll.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "reset.h" + +enum { + P_BI_TCXO, + P_CORE_BI_PLL_TEST_SE, + P_GPLL0_OUT_EVEN, + P_GPLL0_OUT_MAIN, + P_GPLL4_OUT_EVEN, + P_GPLL5_OUT_MAIN, + P_SLEEP_CLK, +}; + +static const struct pll_vco lucid_vco[] = { + { 249600000, 2000000000, 0 }, +}; + +static struct clk_alpha_pll gpll0 = { + .offset = 0x0, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID], + .vco_table = lucid_vco, + .num_vco = ARRAY_SIZE(lucid_vco), + .clkr = { + .enable_reg = 0x6d000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpll0", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fixed_lucid_ops, + }, + }, +}; + +static const struct clk_div_table post_div_table_lucid_even[] = { + { 0x0, 1 }, + { 0x1, 2 }, + { 0x3, 4 }, + { 0x7, 8 }, + { } +}; + +static struct clk_alpha_pll_postdiv gpll0_out_even = { + .offset = 0x0, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID], + .post_div_shift = 8, + .post_div_table = post_div_table_lucid_even, + .num_post_div = ARRAY_SIZE(post_div_table_lucid_even), + .width = 4, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gpll0_out_even", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpll0.clkr.hw, + }, + .num_parents = 1, + .ops = &clk_alpha_pll_postdiv_lucid_ops, + }, +}; + +static struct clk_alpha_pll gpll4 = { + .offset = 0x76000, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID], + .vco_table = lucid_vco, + .num_vco = ARRAY_SIZE(lucid_vco), + .clkr = { + .enable_reg = 0x6d000, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "gpll4", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fixed_lucid_ops, + }, + }, +}; + +static struct clk_alpha_pll_postdiv gpll4_out_even = { + .offset = 0x76000, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID], + .post_div_shift = 8, + .post_div_table = post_div_table_lucid_even, + .num_post_div = ARRAY_SIZE(post_div_table_lucid_even), + .width = 4, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gpll4_out_even", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpll4.clkr.hw, + }, + .num_parents = 1, + .ops = &clk_alpha_pll_postdiv_lucid_ops, + }, +}; + +static struct clk_alpha_pll gpll5 = { + .offset = 0x74000, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID], + .vco_table = lucid_vco, + .num_vco = ARRAY_SIZE(lucid_vco), + .clkr = { + .enable_reg = 0x6d000, + .enable_mask = BIT(5), + .hw.init = &(struct clk_init_data){ + .name = "gpll5", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fixed_lucid_ops, + }, + }, +}; + +static const struct parent_map gcc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_GPLL0_OUT_MAIN, 1 }, + { P_GPLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data gcc_parents_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpll0.clkr.hw }, + { .hw = &gpll0_out_even.clkr.hw }, + { .fw_name = "core_bi_pll_test_se" }, +}; + +static const struct clk_parent_data gcc_parents_0_ao[] = { + { .fw_name = "bi_tcxo_ao" }, + { .hw = &gpll0.clkr.hw }, + { .hw = &gpll0_out_even.clkr.hw }, + { .fw_name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map gcc_parent_map_2[] = { + { P_BI_TCXO, 0 }, + { P_GPLL0_OUT_MAIN, 1 }, + { P_GPLL4_OUT_EVEN, 2 }, + { P_GPLL5_OUT_MAIN, 5 }, + { P_GPLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data gcc_parents_2[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpll0.clkr.hw }, + { .hw = &gpll4_out_even.clkr.hw }, + { .hw = &gpll5.clkr.hw }, + { .hw = &gpll0_out_even.clkr.hw }, + { .fw_name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map gcc_parent_map_3[] = { + { P_BI_TCXO, 0 }, + { P_GPLL0_OUT_MAIN, 1 }, + { P_SLEEP_CLK, 5 }, + { P_GPLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data gcc_parents_3[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpll0.clkr.hw }, + { .fw_name = "sleep_clk", .name = "sleep_clk" }, + { .hw = &gpll0_out_even.clkr.hw }, + { .fw_name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map gcc_parent_map_4[] = { + { P_BI_TCXO, 0 }, + { P_SLEEP_CLK, 5 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data gcc_parents_4[] = { + { .fw_name = "bi_tcxo" }, + { .fw_name = "sleep_clk", .name = "sleep_clk" }, + { .fw_name = "core_bi_pll_test_se" }, +}; + +static const struct parent_map gcc_parent_map_5[] = { + { P_BI_TCXO, 0 }, + { P_GPLL0_OUT_MAIN, 1 }, + { P_GPLL4_OUT_EVEN, 2 }, + { P_GPLL0_OUT_EVEN, 6 }, + { P_CORE_BI_PLL_TEST_SE, 7 }, +}; + +static const struct clk_parent_data gcc_parents_5[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpll0.clkr.hw }, + { .hw = &gpll4_out_even.clkr.hw }, + { .hw = &gpll0_out_even.clkr.hw }, + { .fw_name = "core_bi_pll_test_se" }, +}; + +static const struct freq_tbl ftbl_gcc_blsp1_qup1_i2c_apps_clk_src[] = { + F(9600000, P_BI_TCXO, 2, 0, 0), + F(19200000, P_BI_TCXO, 1, 0, 0), + F(50000000, P_GPLL0_OUT_MAIN, 12, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_blsp1_qup1_i2c_apps_clk_src = { + .cmd_rcgr = 0x11024, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup1_i2c_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_blsp1_qup1_spi_apps_clk_src[] = { + F(960000, P_BI_TCXO, 10, 1, 2), + F(4800000, P_BI_TCXO, 4, 0, 0), + F(9600000, P_BI_TCXO, 2, 0, 0), + F(15000000, P_GPLL0_OUT_EVEN, 5, 1, 4), + F(19200000, P_BI_TCXO, 1, 0, 0), + F(24000000, P_GPLL0_OUT_MAIN, 12.5, 1, 2), + F(25000000, P_GPLL0_OUT_MAIN, 12, 1, 2), + F(50000000, P_GPLL0_OUT_MAIN, 12, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_blsp1_qup1_spi_apps_clk_src = { + .cmd_rcgr = 0x1100c, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup1_spi_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_qup2_i2c_apps_clk_src = { + .cmd_rcgr = 0x13024, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup2_i2c_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_qup2_spi_apps_clk_src = { + .cmd_rcgr = 0x1300c, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup2_spi_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_qup3_i2c_apps_clk_src = { + .cmd_rcgr = 0x15024, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup3_i2c_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_qup3_spi_apps_clk_src = { + .cmd_rcgr = 0x1500c, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup3_spi_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_qup4_i2c_apps_clk_src = { + .cmd_rcgr = 0x17024, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup4_i2c_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_qup4_spi_apps_clk_src = { + .cmd_rcgr = 0x1700c, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup4_spi_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_blsp1_uart1_apps_clk_src[] = { + F(3686400, P_GPLL0_OUT_EVEN, 1, 192, 15625), + F(7372800, P_GPLL0_OUT_EVEN, 1, 384, 15625), + F(9600000, P_BI_TCXO, 2, 0, 0), + F(14745600, P_GPLL0_OUT_EVEN, 1, 768, 15625), + F(16000000, P_GPLL0_OUT_EVEN, 1, 4, 75), + F(19200000, P_BI_TCXO, 1, 0, 0), + F(19354839, P_GPLL0_OUT_MAIN, 15.5, 1, 2), + F(20000000, P_GPLL0_OUT_MAIN, 15, 1, 2), + F(20689655, P_GPLL0_OUT_MAIN, 14.5, 1, 2), + F(21428571, P_GPLL0_OUT_MAIN, 14, 1, 2), + F(22222222, P_GPLL0_OUT_MAIN, 13.5, 1, 2), + F(23076923, P_GPLL0_OUT_MAIN, 13, 1, 2), + F(24000000, P_GPLL0_OUT_MAIN, 5, 1, 5), + F(25000000, P_GPLL0_OUT_MAIN, 12, 1, 2), + F(26086957, P_GPLL0_OUT_MAIN, 11.5, 1, 2), + F(27272727, P_GPLL0_OUT_MAIN, 11, 1, 2), + F(28571429, P_GPLL0_OUT_MAIN, 10.5, 1, 2), + F(32000000, P_GPLL0_OUT_MAIN, 1, 4, 75), + F(40000000, P_GPLL0_OUT_MAIN, 15, 0, 0), + F(46400000, P_GPLL0_OUT_MAIN, 1, 29, 375), + F(48000000, P_GPLL0_OUT_MAIN, 12.5, 0, 0), + F(51200000, P_GPLL0_OUT_MAIN, 1, 32, 375), + F(56000000, P_GPLL0_OUT_MAIN, 1, 7, 75), + F(58982400, P_GPLL0_OUT_MAIN, 1, 1536, 15625), + F(60000000, P_GPLL0_OUT_MAIN, 10, 0, 0), + F(63157895, P_GPLL0_OUT_MAIN, 9.5, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_blsp1_uart1_apps_clk_src = { + .cmd_rcgr = 0x1200c, + .mnd_width = 16, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart1_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_uart2_apps_clk_src = { + .cmd_rcgr = 0x1400c, + .mnd_width = 16, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart2_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_uart3_apps_clk_src = { + .cmd_rcgr = 0x1600c, + .mnd_width = 16, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart3_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_blsp1_uart4_apps_clk_src = { + .cmd_rcgr = 0x1800c, + .mnd_width = 16, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart4_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_cpuss_ahb_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0), + F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0), + F(133333333, P_GPLL0_OUT_MAIN, 4.5, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_cpuss_ahb_clk_src = { + .cmd_rcgr = 0x24010, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_cpuss_ahb_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_cpuss_ahb_clk_src", + .parent_data = gcc_parents_0_ao, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_cpuss_rbcpr_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_cpuss_rbcpr_clk_src = { + .cmd_rcgr = 0x2402c, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_cpuss_rbcpr_clk_src", + .parent_data = gcc_parents_0_ao, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_emac_clk_src[] = { + F(2500000, P_BI_TCXO, 1, 25, 192), + F(5000000, P_BI_TCXO, 1, 25, 96), + F(19200000, P_BI_TCXO, 1, 0, 0), + F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0), + F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0), + F(250000000, P_GPLL4_OUT_EVEN, 2, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_emac_clk_src = { + .cmd_rcgr = 0x47020, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_5, + .freq_tbl = ftbl_gcc_emac_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_emac_clk_src", + .parent_data = gcc_parents_5, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_emac_ptp_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0), + F(230400000, P_GPLL5_OUT_MAIN, 3.5, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_emac_ptp_clk_src = { + .cmd_rcgr = 0x47038, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gcc_parent_map_2, + .freq_tbl = ftbl_gcc_emac_ptp_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_emac_ptp_clk_src", + .parent_data = gcc_parents_2, + .num_parents = 6, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0), + F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0), + F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0), + F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_gp1_clk_src = { + .cmd_rcgr = 0x2b004, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_3, + .freq_tbl = ftbl_gcc_gp1_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_gp1_clk_src", + .parent_data = gcc_parents_3, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_gp2_clk_src = { + .cmd_rcgr = 0x2c004, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_3, + .freq_tbl = ftbl_gcc_gp1_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_gp2_clk_src", + .parent_data = gcc_parents_3, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_gp3_clk_src = { + .cmd_rcgr = 0x2d004, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_3, + .freq_tbl = ftbl_gcc_gp1_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_gp3_clk_src", + .parent_data = gcc_parents_3, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_pcie_aux_phy_clk_src = { + .cmd_rcgr = 0x37034, + .mnd_width = 16, + .hid_width = 5, + .parent_map = gcc_parent_map_4, + .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_aux_phy_clk_src", + .parent_data = gcc_parents_4, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_pcie_rchng_phy_clk_src[] = { + F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_pcie_rchng_phy_clk_src = { + .cmd_rcgr = 0x37050, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gcc_parent_map_3, + .freq_tbl = ftbl_gcc_pcie_rchng_phy_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_rchng_phy_clk_src", + .parent_data = gcc_parents_3, + .num_parents = 5, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_pdm2_clk_src[] = { + F(9600000, P_BI_TCXO, 2, 0, 0), + F(19200000, P_BI_TCXO, 1, 0, 0), + F(60000000, P_GPLL0_OUT_MAIN, 10, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_pdm2_clk_src = { + .cmd_rcgr = 0x19010, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_pdm2_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_pdm2_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 gcc_sdcc1_apps_clk_src = { + .cmd_rcgr = 0xf00c, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_gp1_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_sdcc1_apps_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_usb30_master_clk_src[] = { + F(200000000, P_GPLL0_OUT_EVEN, 1.5, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_usb30_master_clk_src = { + .cmd_rcgr = 0xb024, + .mnd_width = 8, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_usb30_master_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_usb30_master_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_usb30_mock_utmi_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_usb30_mock_utmi_clk_src = { + .cmd_rcgr = 0xb03c, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gcc_parent_map_0, + .freq_tbl = ftbl_gcc_usb30_mock_utmi_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_usb30_mock_utmi_clk_src", + .parent_data = gcc_parents_0, + .num_parents = 4, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_gcc_usb3_phy_aux_clk_src[] = { + F(1000000, P_BI_TCXO, 1, 5, 96), + F(19200000, P_BI_TCXO, 1, 0, 0), + { } +}; + +static struct clk_rcg2 gcc_usb3_phy_aux_clk_src = { + .cmd_rcgr = 0xb064, + .mnd_width = 16, + .hid_width = 5, + .parent_map = gcc_parent_map_4, + .freq_tbl = ftbl_gcc_usb3_phy_aux_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gcc_usb3_phy_aux_clk_src", + .parent_data = gcc_parents_4, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch gcc_ahb_pcie_link_clk = { + .halt_reg = 0x22004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x22004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_ahb_pcie_link_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_ahb_clk = { + .halt_reg = 0x10004, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d008, + .enable_mask = BIT(14), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup1_i2c_apps_clk = { + .halt_reg = 0x11008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x11008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup1_i2c_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup1_i2c_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup1_spi_apps_clk = { + .halt_reg = 0x11004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x11004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup1_spi_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup1_spi_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup2_i2c_apps_clk = { + .halt_reg = 0x13008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x13008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup2_i2c_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup2_i2c_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup2_spi_apps_clk = { + .halt_reg = 0x13004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x13004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup2_spi_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup2_spi_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup3_i2c_apps_clk = { + .halt_reg = 0x15008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x15008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup3_i2c_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup3_i2c_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup3_spi_apps_clk = { + .halt_reg = 0x15004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x15004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup3_spi_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup3_spi_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup4_i2c_apps_clk = { + .halt_reg = 0x17008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x17008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup4_i2c_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup4_i2c_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_qup4_spi_apps_clk = { + .halt_reg = 0x17004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x17004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_qup4_spi_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_qup4_spi_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_uart1_apps_clk = { + .halt_reg = 0x12004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x12004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart1_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_uart1_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_uart2_apps_clk = { + .halt_reg = 0x14004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x14004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart2_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_uart2_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_uart3_apps_clk = { + .halt_reg = 0x16004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x16004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart3_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_uart3_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_blsp1_uart4_apps_clk = { + .halt_reg = 0x18004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x18004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_blsp1_uart4_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_blsp1_uart4_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_boot_rom_ahb_clk = { + .halt_reg = 0x1c004, + .halt_check = BRANCH_HALT_VOTED, + .hwcg_reg = 0x1c004, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x6d008, + .enable_mask = BIT(10), + .hw.init = &(struct clk_init_data){ + .name = "gcc_boot_rom_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_ce1_ahb_clk = { + .halt_reg = 0x2100c, + .halt_check = BRANCH_HALT_VOTED, + .hwcg_reg = 0x2100c, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x6d008, + .enable_mask = BIT(3), + .hw.init = &(struct clk_init_data){ + .name = "gcc_ce1_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_ce1_axi_clk = { + .halt_reg = 0x21008, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d008, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "gcc_ce1_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_ce1_clk = { + .halt_reg = 0x21004, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d008, + .enable_mask = BIT(5), + .hw.init = &(struct clk_init_data){ + .name = "gcc_ce1_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_cpuss_rbcpr_clk = { + .halt_reg = 0x24008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x24008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_cpuss_rbcpr_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_cpuss_rbcpr_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_eth_axi_clk = { + .halt_reg = 0x4701c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x4701c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_eth_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_eth_ptp_clk = { + .halt_reg = 0x47018, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_eth_ptp_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_emac_ptp_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_eth_rgmii_clk = { + .halt_reg = 0x47010, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47010, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_eth_rgmii_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_emac_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_eth_slave_ahb_clk = { + .halt_reg = 0x47014, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47014, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_eth_slave_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_gp1_clk = { + .halt_reg = 0x2b000, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x2b000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_gp1_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_gp1_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_gp2_clk = { + .halt_reg = 0x2c000, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x2c000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_gp2_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_gp2_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_gp3_clk = { + .halt_reg = 0x2d000, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x2d000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_gp3_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_gp3_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_0_clkref_clk = { + .halt_reg = 0x88004, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x88004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_0_clkref_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_aux_clk = { + .halt_reg = 0x37024, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(3), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_aux_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_cfg_ahb_clk = { + .halt_reg = 0x3701c, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(2), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_cfg_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_mstr_axi_clk = { + .halt_reg = 0x37018, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_mstr_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_pipe_clk = { + .halt_reg = 0x3702c, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_pipe_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_rchng_phy_clk = { + .halt_reg = 0x37020, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(7), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_rchng_phy_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_pcie_rchng_phy_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_sleep_clk = { + .halt_reg = 0x37028, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(6), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_sleep_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_pcie_aux_phy_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_slv_axi_clk = { + .halt_reg = 0x37014, + .halt_check = BRANCH_HALT_VOTED, + .hwcg_reg = 0x37014, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_slv_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pcie_slv_q2a_axi_clk = { + .halt_reg = 0x37010, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x6d010, + .enable_mask = BIT(5), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pcie_slv_q2a_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pdm2_clk = { + .halt_reg = 0x1900c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1900c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pdm2_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_pdm2_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pdm_ahb_clk = { + .halt_reg = 0x19004, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x19004, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x19004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pdm_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_pdm_xo4_clk = { + .halt_reg = 0x19008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x19008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_pdm_xo4_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_sdcc1_ahb_clk = { + .halt_reg = 0xf008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xf008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_sdcc1_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_sdcc1_apps_clk = { + .halt_reg = 0xf004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xf004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_sdcc1_apps_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_sdcc1_apps_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb30_master_clk = { + .halt_reg = 0xb010, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb010, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb30_master_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_usb30_master_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb30_mock_utmi_clk = { + .halt_reg = 0xb020, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb020, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb30_mock_utmi_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_usb30_mock_utmi_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb30_mstr_axi_clk = { + .halt_reg = 0xb014, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb014, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb30_mstr_axi_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb30_sleep_clk = { + .halt_reg = 0xb01c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb01c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb30_sleep_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb30_slv_ahb_clk = { + .halt_reg = 0xb018, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb30_slv_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb3_phy_aux_clk = { + .halt_reg = 0xb058, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0xb058, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb3_phy_aux_clk", + .parent_hws = (const struct clk_hw *[]){ + &gcc_usb3_phy_aux_clk_src.clkr.hw }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb3_phy_pipe_clk = { + .halt_reg = 0xb05c, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0xb05c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb3_phy_pipe_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb3_prim_clkref_clk = { + .halt_reg = 0x88000, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x88000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb3_prim_clkref_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_usb_phy_cfg_ahb2phy_clk = { + .halt_reg = 0xe004, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0xe004, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0xe004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_usb_phy_cfg_ahb2phy_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_xo_pcie_link_clk = { + .halt_reg = 0x22008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x22008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_xo_pcie_link_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_regmap *gcc_sdx55_clocks[] = { + [GCC_AHB_PCIE_LINK_CLK] = &gcc_ahb_pcie_link_clk.clkr, + [GCC_BLSP1_AHB_CLK] = &gcc_blsp1_ahb_clk.clkr, + [GCC_BLSP1_QUP1_I2C_APPS_CLK] = &gcc_blsp1_qup1_i2c_apps_clk.clkr, + [GCC_BLSP1_QUP1_I2C_APPS_CLK_SRC] = + &gcc_blsp1_qup1_i2c_apps_clk_src.clkr, + [GCC_BLSP1_QUP1_SPI_APPS_CLK] = &gcc_blsp1_qup1_spi_apps_clk.clkr, + [GCC_BLSP1_QUP1_SPI_APPS_CLK_SRC] = + &gcc_blsp1_qup1_spi_apps_clk_src.clkr, + [GCC_BLSP1_QUP2_I2C_APPS_CLK] = &gcc_blsp1_qup2_i2c_apps_clk.clkr, + [GCC_BLSP1_QUP2_I2C_APPS_CLK_SRC] = + &gcc_blsp1_qup2_i2c_apps_clk_src.clkr, + [GCC_BLSP1_QUP2_SPI_APPS_CLK] = &gcc_blsp1_qup2_spi_apps_clk.clkr, + [GCC_BLSP1_QUP2_SPI_APPS_CLK_SRC] = + &gcc_blsp1_qup2_spi_apps_clk_src.clkr, + [GCC_BLSP1_QUP3_I2C_APPS_CLK] = &gcc_blsp1_qup3_i2c_apps_clk.clkr, + [GCC_BLSP1_QUP3_I2C_APPS_CLK_SRC] = + &gcc_blsp1_qup3_i2c_apps_clk_src.clkr, + [GCC_BLSP1_QUP3_SPI_APPS_CLK] = &gcc_blsp1_qup3_spi_apps_clk.clkr, + [GCC_BLSP1_QUP3_SPI_APPS_CLK_SRC] = + &gcc_blsp1_qup3_spi_apps_clk_src.clkr, + [GCC_BLSP1_QUP4_I2C_APPS_CLK] = &gcc_blsp1_qup4_i2c_apps_clk.clkr, + [GCC_BLSP1_QUP4_I2C_APPS_CLK_SRC] = + &gcc_blsp1_qup4_i2c_apps_clk_src.clkr, + [GCC_BLSP1_QUP4_SPI_APPS_CLK] = &gcc_blsp1_qup4_spi_apps_clk.clkr, + [GCC_BLSP1_QUP4_SPI_APPS_CLK_SRC] = + &gcc_blsp1_qup4_spi_apps_clk_src.clkr, + [GCC_BLSP1_UART1_APPS_CLK] = &gcc_blsp1_uart1_apps_clk.clkr, + [GCC_BLSP1_UART1_APPS_CLK_SRC] = &gcc_blsp1_uart1_apps_clk_src.clkr, + [GCC_BLSP1_UART2_APPS_CLK] = &gcc_blsp1_uart2_apps_clk.clkr, + [GCC_BLSP1_UART2_APPS_CLK_SRC] = &gcc_blsp1_uart2_apps_clk_src.clkr, + [GCC_BLSP1_UART3_APPS_CLK] = &gcc_blsp1_uart3_apps_clk.clkr, + [GCC_BLSP1_UART3_APPS_CLK_SRC] = &gcc_blsp1_uart3_apps_clk_src.clkr, + [GCC_BLSP1_UART4_APPS_CLK] = &gcc_blsp1_uart4_apps_clk.clkr, + [GCC_BLSP1_UART4_APPS_CLK_SRC] = &gcc_blsp1_uart4_apps_clk_src.clkr, + [GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr, + [GCC_CE1_AHB_CLK] = &gcc_ce1_ahb_clk.clkr, + [GCC_CE1_AXI_CLK] = &gcc_ce1_axi_clk.clkr, + [GCC_CE1_CLK] = &gcc_ce1_clk.clkr, + [GCC_CPUSS_AHB_CLK_SRC] = &gcc_cpuss_ahb_clk_src.clkr, + [GCC_CPUSS_RBCPR_CLK] = &gcc_cpuss_rbcpr_clk.clkr, + [GCC_CPUSS_RBCPR_CLK_SRC] = &gcc_cpuss_rbcpr_clk_src.clkr, + [GCC_EMAC_CLK_SRC] = &gcc_emac_clk_src.clkr, + [GCC_EMAC_PTP_CLK_SRC] = &gcc_emac_ptp_clk_src.clkr, + [GCC_ETH_AXI_CLK] = &gcc_eth_axi_clk.clkr, + [GCC_ETH_PTP_CLK] = &gcc_eth_ptp_clk.clkr, + [GCC_ETH_RGMII_CLK] = &gcc_eth_rgmii_clk.clkr, + [GCC_ETH_SLAVE_AHB_CLK] = &gcc_eth_slave_ahb_clk.clkr, + [GCC_GP1_CLK] = &gcc_gp1_clk.clkr, + [GCC_GP1_CLK_SRC] = &gcc_gp1_clk_src.clkr, + [GCC_GP2_CLK] = &gcc_gp2_clk.clkr, + [GCC_GP2_CLK_SRC] = &gcc_gp2_clk_src.clkr, + [GCC_GP3_CLK] = &gcc_gp3_clk.clkr, + [GCC_GP3_CLK_SRC] = &gcc_gp3_clk_src.clkr, + [GCC_PCIE_0_CLKREF_CLK] = &gcc_pcie_0_clkref_clk.clkr, + [GCC_PCIE_AUX_CLK] = &gcc_pcie_aux_clk.clkr, + [GCC_PCIE_AUX_PHY_CLK_SRC] = &gcc_pcie_aux_phy_clk_src.clkr, + [GCC_PCIE_CFG_AHB_CLK] = &gcc_pcie_cfg_ahb_clk.clkr, + [GCC_PCIE_MSTR_AXI_CLK] = &gcc_pcie_mstr_axi_clk.clkr, + [GCC_PCIE_PIPE_CLK] = &gcc_pcie_pipe_clk.clkr, + [GCC_PCIE_RCHNG_PHY_CLK] = &gcc_pcie_rchng_phy_clk.clkr, + [GCC_PCIE_RCHNG_PHY_CLK_SRC] = &gcc_pcie_rchng_phy_clk_src.clkr, + [GCC_PCIE_SLEEP_CLK] = &gcc_pcie_sleep_clk.clkr, + [GCC_PCIE_SLV_AXI_CLK] = &gcc_pcie_slv_axi_clk.clkr, + [GCC_PCIE_SLV_Q2A_AXI_CLK] = &gcc_pcie_slv_q2a_axi_clk.clkr, + [GCC_PDM2_CLK] = &gcc_pdm2_clk.clkr, + [GCC_PDM2_CLK_SRC] = &gcc_pdm2_clk_src.clkr, + [GCC_PDM_AHB_CLK] = &gcc_pdm_ahb_clk.clkr, + [GCC_PDM_XO4_CLK] = &gcc_pdm_xo4_clk.clkr, + [GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr, + [GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr, + [GCC_SDCC1_APPS_CLK_SRC] = &gcc_sdcc1_apps_clk_src.clkr, + [GCC_USB30_MASTER_CLK] = &gcc_usb30_master_clk.clkr, + [GCC_USB30_MASTER_CLK_SRC] = &gcc_usb30_master_clk_src.clkr, + [GCC_USB30_MOCK_UTMI_CLK] = &gcc_usb30_mock_utmi_clk.clkr, + [GCC_USB30_MOCK_UTMI_CLK_SRC] = &gcc_usb30_mock_utmi_clk_src.clkr, + [GCC_USB30_MSTR_AXI_CLK] = &gcc_usb30_mstr_axi_clk.clkr, + [GCC_USB30_SLEEP_CLK] = &gcc_usb30_sleep_clk.clkr, + [GCC_USB30_SLV_AHB_CLK] = &gcc_usb30_slv_ahb_clk.clkr, + [GCC_USB3_PHY_AUX_CLK] = &gcc_usb3_phy_aux_clk.clkr, + [GCC_USB3_PHY_AUX_CLK_SRC] = &gcc_usb3_phy_aux_clk_src.clkr, + [GCC_USB3_PHY_PIPE_CLK] = &gcc_usb3_phy_pipe_clk.clkr, + [GCC_USB3_PRIM_CLKREF_CLK] = &gcc_usb3_prim_clkref_clk.clkr, + [GCC_USB_PHY_CFG_AHB2PHY_CLK] = &gcc_usb_phy_cfg_ahb2phy_clk.clkr, + [GCC_XO_PCIE_LINK_CLK] = &gcc_xo_pcie_link_clk.clkr, + [GPLL0] = &gpll0.clkr, + [GPLL0_OUT_EVEN] = &gpll0_out_even.clkr, + [GPLL4] = &gpll4.clkr, + [GPLL4_OUT_EVEN] = &gpll4_out_even.clkr, + [GPLL5] = &gpll5.clkr, +}; + +static const struct qcom_reset_map gcc_sdx55_resets[] = { + [GCC_EMAC_BCR] = { 0x47000 }, + [GCC_PCIE_BCR] = { 0x37000 }, + [GCC_PCIE_LINK_DOWN_BCR] = { 0x77000 }, + [GCC_PCIE_PHY_BCR] = { 0x39000 }, + [GCC_PCIE_PHY_COM_BCR] = { 0x78004 }, + [GCC_QUSB2PHY_BCR] = { 0xd000 }, + [GCC_USB30_BCR] = { 0xb000 }, + [GCC_USB3_PHY_BCR] = { 0xc000 }, + [GCC_USB3PHY_PHY_BCR] = { 0xc004 }, + [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0xe000 }, +}; + +static const struct regmap_config gcc_sdx55_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0x9b040, + .fast_io = true, +}; + +static const struct qcom_cc_desc gcc_sdx55_desc = { + .config = &gcc_sdx55_regmap_config, + .clks = gcc_sdx55_clocks, + .num_clks = ARRAY_SIZE(gcc_sdx55_clocks), + .resets = gcc_sdx55_resets, + .num_resets = ARRAY_SIZE(gcc_sdx55_resets), +}; + +static const struct of_device_id gcc_sdx55_match_table[] = { + { .compatible = "qcom,gcc-sdx55" }, + { } +}; +MODULE_DEVICE_TABLE(of, gcc_sdx55_match_table); + +static int gcc_sdx55_probe(struct platform_device *pdev) +{ + struct regmap *regmap; + + regmap = qcom_cc_map(pdev, &gcc_sdx55_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + /* + * Keep the clocks always-ON as they are critical to the functioning + * of the system: + * GCC_SYS_NOC_CPUSS_AHB_CLK, GCC_CPUSS_AHB_CLK, GCC_CPUSS_GNOC_CLK + */ + regmap_update_bits(regmap, 0x6d008, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x6d008, BIT(21), BIT(21)); + regmap_update_bits(regmap, 0x6d008, BIT(22), BIT(22)); + + return qcom_cc_really_probe(pdev, &gcc_sdx55_desc, regmap); +} + +static struct platform_driver gcc_sdx55_driver = { + .probe = gcc_sdx55_probe, + .driver = { + .name = "gcc-sdx55", + .of_match_table = gcc_sdx55_match_table, + }, +}; + +static int __init gcc_sdx55_init(void) +{ + return platform_driver_register(&gcc_sdx55_driver); +} +subsys_initcall(gcc_sdx55_init); + +static void __exit gcc_sdx55_exit(void) +{ + platform_driver_unregister(&gcc_sdx55_driver); +} +module_exit(gcc_sdx55_exit); + +MODULE_DESCRIPTION("QTI GCC SDX55 Driver"); +MODULE_LICENSE("GPL v2"); -- GitLab From 2e2639b7ef1641252e838b5181c0b8fec8b6c067 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 26 Nov 2020 12:58:41 +0530 Subject: [PATCH 0431/1894] dt-bindings: clock: Introduce RPMHCC bindings for SDX55 Add compatible for SDX55 RPMHCC and DT include. Signed-off-by: Vinod Koul Signed-off-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Acked-by: Rob Herring Link: https://lore.kernel.org/r/20201126072844.35370-4-manivannan.sadhasivam@linaro.org Signed-off-by: Stephen Boyd --- Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml | 1 + include/dt-bindings/clock/qcom,rpmh.h | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml index a46a3a799a708..a54930f111ba5 100644 --- a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml @@ -19,6 +19,7 @@ properties: enum: - qcom,sc7180-rpmh-clk - qcom,sdm845-rpmh-clk + - qcom,sdx55-rpmh-clk - qcom,sm8150-rpmh-clk - qcom,sm8250-rpmh-clk diff --git a/include/dt-bindings/clock/qcom,rpmh.h b/include/dt-bindings/clock/qcom,rpmh.h index 30111c8f7fe9b..0662a583e93be 100644 --- a/include/dt-bindings/clock/qcom,rpmh.h +++ b/include/dt-bindings/clock/qcom,rpmh.h @@ -22,5 +22,6 @@ #define RPMH_LN_BB_CLK1 13 #define RPMH_LN_BB_CLK1_A 14 #define RPMH_CE_CLK 15 +#define RPMH_QPIC_CLK 16 #endif -- GitLab From afacfbbe1016a692b1fea4ed4c3f1d6f6a4ef4e5 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 26 Nov 2020 12:58:42 +0530 Subject: [PATCH 0432/1894] clk: qcom: Add support for SDX55 RPMh clocks Add support for following clocks maintained by RPMh in SDX55 SoCs. * BI TCXO * RF_CLK1 * RF_CLK1_AO * RF_CLK2 * RF_CLK2_AO * QPIC (Qualcomm Technologies, Inc. Parallel Interface Controller) Signed-off-by: Manivannan Sadhasivam Reviewed-by: Vinod Koul Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201126072844.35370-5-manivannan.sadhasivam@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rpmh.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c index 7e2a4a9b9bf6b..f47595a048853 100644 --- a/drivers/clk/qcom/clk-rpmh.c +++ b/drivers/clk/qcom/clk-rpmh.c @@ -434,6 +434,25 @@ static const struct clk_rpmh_desc clk_rpmh_sm8250 = { .num_clks = ARRAY_SIZE(sm8250_rpmh_clocks), }; +DEFINE_CLK_RPMH_VRM(sdx55, rf_clk1, rf_clk1_ao, "rfclkd1", 1); +DEFINE_CLK_RPMH_VRM(sdx55, rf_clk2, rf_clk2_ao, "rfclkd2", 1); +DEFINE_CLK_RPMH_BCM(sdx55, qpic_clk, "QP0"); + +static struct clk_hw *sdx55_rpmh_clocks[] = { + [RPMH_CXO_CLK] = &sdm845_bi_tcxo.hw, + [RPMH_CXO_CLK_A] = &sdm845_bi_tcxo_ao.hw, + [RPMH_RF_CLK1] = &sdx55_rf_clk1.hw, + [RPMH_RF_CLK1_A] = &sdx55_rf_clk1_ao.hw, + [RPMH_RF_CLK2] = &sdx55_rf_clk2.hw, + [RPMH_RF_CLK2_A] = &sdx55_rf_clk2_ao.hw, + [RPMH_QPIC_CLK] = &sdx55_qpic_clk.hw, +}; + +static const struct clk_rpmh_desc clk_rpmh_sdx55 = { + .clks = sdx55_rpmh_clocks, + .num_clks = ARRAY_SIZE(sdx55_rpmh_clocks), +}; + static struct clk_hw *of_clk_rpmh_hw_get(struct of_phandle_args *clkspec, void *data) { @@ -519,6 +538,7 @@ static int clk_rpmh_probe(struct platform_device *pdev) static const struct of_device_id clk_rpmh_match_table[] = { { .compatible = "qcom,sc7180-rpmh-clk", .data = &clk_rpmh_sc7180}, { .compatible = "qcom,sdm845-rpmh-clk", .data = &clk_rpmh_sdm845}, + { .compatible = "qcom,sdx55-rpmh-clk", .data = &clk_rpmh_sdx55}, { .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150}, { .compatible = "qcom,sm8250-rpmh-clk", .data = &clk_rpmh_sm8250}, { } -- GitLab From bdf7805b8c0e45ade8d26e5bd4616ddcbb3fcc36 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 26 Nov 2020 12:58:43 +0530 Subject: [PATCH 0433/1894] dt-bindings: clock: Add GDSC in SDX55 GCC Add GDSC instances in SDX55 GCC block. Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20201126072844.35370-6-manivannan.sadhasivam@linaro.org Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/qcom,gcc-sdx55.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/dt-bindings/clock/qcom,gcc-sdx55.h b/include/dt-bindings/clock/qcom,gcc-sdx55.h index c372451b34617..fb9a5942f793f 100644 --- a/include/dt-bindings/clock/qcom,gcc-sdx55.h +++ b/include/dt-bindings/clock/qcom,gcc-sdx55.h @@ -109,4 +109,9 @@ #define GCC_USB3PHY_PHY_BCR 13 #define GCC_USB_PHY_CFG_AHB2PHY_BCR 14 +/* GCC power domains */ +#define USB30_GDSC 0 +#define PCIE_GDSC 1 +#define EMAC_GDSC 2 + #endif -- GitLab From 063930ed2df5dbe07e994009a7e05e773f10b23a Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 26 Nov 2020 12:58:44 +0530 Subject: [PATCH 0434/1894] clk: qcom: Add GDSC support for SDX55 GCC Add GDSC support to control the power supply of power domains in SDX55 GCC. Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20201126072844.35370-7-manivannan.sadhasivam@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 1 + drivers/clk/qcom/gcc-sdx55.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 3555eac37495b..d32bb12cd8d0c 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -424,6 +424,7 @@ config SDM_LPASSCC_845 config SDX_GCC_55 tristate "SDX55 Global Clock Controller" + select QCOM_GDSC help Support for the global clock controller on SDX55 devices. Say Y if you want to use peripheral devices such as UART, diff --git a/drivers/clk/qcom/gcc-sdx55.c b/drivers/clk/qcom/gcc-sdx55.c index bf114165e24b6..e3b9030b2bae6 100644 --- a/drivers/clk/qcom/gcc-sdx55.c +++ b/drivers/clk/qcom/gcc-sdx55.c @@ -17,6 +17,7 @@ #include "clk-pll.h" #include "clk-rcg.h" #include "clk-regmap.h" +#include "gdsc.h" #include "reset.h" enum { @@ -1455,6 +1456,30 @@ static struct clk_branch gcc_xo_pcie_link_clk = { }, }; +static struct gdsc usb30_gdsc = { + .gdscr = 0x0b004, + .pd = { + .name = "usb30_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + +static struct gdsc pcie_gdsc = { + .gdscr = 0x37004, + .pd = { + .name = "pcie_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + +static struct gdsc emac_gdsc = { + .gdscr = 0x47004, + .pd = { + .name = "emac_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + static struct clk_regmap *gcc_sdx55_clocks[] = { [GCC_AHB_PCIE_LINK_CLK] = &gcc_ahb_pcie_link_clk.clkr, [GCC_BLSP1_AHB_CLK] = &gcc_blsp1_ahb_clk.clkr, @@ -1560,6 +1585,12 @@ static const struct qcom_reset_map gcc_sdx55_resets[] = { [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0xe000 }, }; +static struct gdsc *gcc_sdx55_gdscs[] = { + [USB30_GDSC] = &usb30_gdsc, + [PCIE_GDSC] = &pcie_gdsc, + [EMAC_GDSC] = &emac_gdsc, +}; + static const struct regmap_config gcc_sdx55_regmap_config = { .reg_bits = 32, .reg_stride = 4, @@ -1574,6 +1605,8 @@ static const struct qcom_cc_desc gcc_sdx55_desc = { .num_clks = ARRAY_SIZE(gcc_sdx55_clocks), .resets = gcc_sdx55_resets, .num_resets = ARRAY_SIZE(gcc_sdx55_resets), + .gdscs = gcc_sdx55_gdscs, + .num_gdscs = ARRAY_SIZE(gcc_sdx55_gdscs), }; static const struct of_device_id gcc_sdx55_match_table[] = { -- GitLab From 28187dc8ebd938d574edfc6d9e0f9c51c21ff3f4 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 17 Nov 2020 00:46:39 +0100 Subject: [PATCH 0435/1894] ARM: 9025/1: Kconfig: CPU_BIG_ENDIAN depends on !LD_IS_LLD LLD does not yet support any big endian architectures. Make this config non-selectable when using LLD until LLD is fixed. Link: https://github.com/ClangBuiltLinux/linux/issues/965 Signed-off-by: Nick Desaulniers Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Reported-by: kbuild test robot Signed-off-by: Russell King --- arch/arm/mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 65e4482e38498..02692fbe2db5c 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -743,6 +743,7 @@ config SWP_EMULATE config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN + depends on !LD_IS_LLD help Say Y if you plan on running a kernel in big-endian mode. Note that your board must be properly built and your board -- GitLab From 331b9d02d77e0e33f273d8328d9f5453efddd926 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 17 Nov 2020 00:49:25 +0100 Subject: [PATCH 0436/1894] ARM: 9026/1: unwind: remove old check for GCC <= 4.2 Since commit 0bddd227f3dc ("Documentation: update for gcc 4.9 requirement") the minimum supported version of GCC is gcc-4.9. It's now safe to remove this code. Link: https://github.com/ClangBuiltLinux/linux/issues/427 Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Russell King --- arch/arm/kernel/unwind.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index f35eb584a18aa..59fdf257bf8be 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -18,9 +18,6 @@ #warning Your compiler does not have EABI support. #warning ARM unwind is known to compile only with EABI compilers. #warning Change compiler or disable ARM_UNWIND option. -#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) && !defined(__clang__) -#warning Your compiler is too buggy; it is known to not compile ARM unwind support. -#warning Change compiler or disable ARM_UNWIND option. #endif #endif /* __CHECKER__ */ -- GitLab From 4d576cab16f57e1f87978f6997a725179398341e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 17 Nov 2020 10:23:28 +0100 Subject: [PATCH 0437/1894] ARM: 9028/1: disable KASAN in call stack capturing routines KASAN uses the routines in stacktrace.c to capture the call stack each time memory gets allocated or freed. Some of these routines are also used to log CPU and memory context when exceptions are taken, and so in some cases, memory accesses may be made that are not strictly in line with the KASAN constraints, and may therefore trigger false KASAN positives. So follow the example set by other architectures, and simply disable KASAN instrumentation for these routines. Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 89e5d864e9234..f6431b46866e8 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -21,6 +21,9 @@ obj-y := elf.o entry-common.o irq.o opcodes.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o +KASAN_SANITIZE_stacktrace.o := n +KASAN_SANITIZE_traps.o := n + ifneq ($(CONFIG_ARM_UNWIND),y) obj-$(CONFIG_FRAME_POINTER) += return_address.o endif -- GitLab From 3c9f5708b7aed6a963e2aefccbd1854802de163e Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Tue, 17 Nov 2020 23:11:36 +0100 Subject: [PATCH 0438/1894] ARM: 9029/1: Make iwmmxt.S support Clang's integrated assembler This patch replaces 6 IWMMXT instructions Clang's integrated assembler does not support in iwmmxt.S using macros, while making sure GNU assembler still emit the same instructions. This should be easier than providing full IWMMXT support in Clang. This is one of the last bits of kernel code that could be compiled but not assembled with clang. Once all of it works with IAS, we no longer need to special-case 32-bit Arm in Kbuild, or turn off CONFIG_IWMMXT when build-testing. "Intel Wireless MMX Technology - Developer Guide - August, 2002" should be referenced for the encoding schemes of these extensions. Link: https://github.com/ClangBuiltLinux/linux/issues/975 Suggested-by: Nick Desaulniers Suggested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Jian Cai Signed-off-by: Russell King --- arch/arm/kernel/iwmmxt.S | 89 ++++++++++++++++++++-------------------- arch/arm/kernel/iwmmxt.h | 47 +++++++++++++++++++++ 2 files changed, 92 insertions(+), 44 deletions(-) create mode 100644 arch/arm/kernel/iwmmxt.h diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 0dcae787b004d..d2b4ac06e4ed8 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -16,6 +16,7 @@ #include #include #include +#include "iwmmxt.h" #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) #define PJ4(code...) code @@ -113,33 +114,33 @@ concan_save: concan_dump: - wstrw wCSSF, [r1, #MMX_WCSSF] - wstrw wCASF, [r1, #MMX_WCASF] - wstrw wCGR0, [r1, #MMX_WCGR0] - wstrw wCGR1, [r1, #MMX_WCGR1] - wstrw wCGR2, [r1, #MMX_WCGR2] - wstrw wCGR3, [r1, #MMX_WCGR3] + wstrw wCSSF, r1, MMX_WCSSF + wstrw wCASF, r1, MMX_WCASF + wstrw wCGR0, r1, MMX_WCGR0 + wstrw wCGR1, r1, MMX_WCGR1 + wstrw wCGR2, r1, MMX_WCGR2 + wstrw wCGR3, r1, MMX_WCGR3 1: @ MUP? wRn tst r2, #0x2 beq 2f - wstrd wR0, [r1, #MMX_WR0] - wstrd wR1, [r1, #MMX_WR1] - wstrd wR2, [r1, #MMX_WR2] - wstrd wR3, [r1, #MMX_WR3] - wstrd wR4, [r1, #MMX_WR4] - wstrd wR5, [r1, #MMX_WR5] - wstrd wR6, [r1, #MMX_WR6] - wstrd wR7, [r1, #MMX_WR7] - wstrd wR8, [r1, #MMX_WR8] - wstrd wR9, [r1, #MMX_WR9] - wstrd wR10, [r1, #MMX_WR10] - wstrd wR11, [r1, #MMX_WR11] - wstrd wR12, [r1, #MMX_WR12] - wstrd wR13, [r1, #MMX_WR13] - wstrd wR14, [r1, #MMX_WR14] - wstrd wR15, [r1, #MMX_WR15] + wstrd wR0, r1, MMX_WR0 + wstrd wR1, r1, MMX_WR1 + wstrd wR2, r1, MMX_WR2 + wstrd wR3, r1, MMX_WR3 + wstrd wR4, r1, MMX_WR4 + wstrd wR5, r1, MMX_WR5 + wstrd wR6, r1, MMX_WR6 + wstrd wR7, r1, MMX_WR7 + wstrd wR8, r1, MMX_WR8 + wstrd wR9, r1, MMX_WR9 + wstrd wR10, r1, MMX_WR10 + wstrd wR11, r1, MMX_WR11 + wstrd wR12, r1, MMX_WR12 + wstrd wR13, r1, MMX_WR13 + wstrd wR14, r1, MMX_WR14 + wstrd wR15, r1, MMX_WR15 2: teq r0, #0 @ anything to load? reteq lr @ if not, return @@ -147,30 +148,30 @@ concan_dump: concan_load: @ Load wRn - wldrd wR0, [r0, #MMX_WR0] - wldrd wR1, [r0, #MMX_WR1] - wldrd wR2, [r0, #MMX_WR2] - wldrd wR3, [r0, #MMX_WR3] - wldrd wR4, [r0, #MMX_WR4] - wldrd wR5, [r0, #MMX_WR5] - wldrd wR6, [r0, #MMX_WR6] - wldrd wR7, [r0, #MMX_WR7] - wldrd wR8, [r0, #MMX_WR8] - wldrd wR9, [r0, #MMX_WR9] - wldrd wR10, [r0, #MMX_WR10] - wldrd wR11, [r0, #MMX_WR11] - wldrd wR12, [r0, #MMX_WR12] - wldrd wR13, [r0, #MMX_WR13] - wldrd wR14, [r0, #MMX_WR14] - wldrd wR15, [r0, #MMX_WR15] + wldrd wR0, r0, MMX_WR0 + wldrd wR1, r0, MMX_WR1 + wldrd wR2, r0, MMX_WR2 + wldrd wR3, r0, MMX_WR3 + wldrd wR4, r0, MMX_WR4 + wldrd wR5, r0, MMX_WR5 + wldrd wR6, r0, MMX_WR6 + wldrd wR7, r0, MMX_WR7 + wldrd wR8, r0, MMX_WR8 + wldrd wR9, r0, MMX_WR9 + wldrd wR10, r0, MMX_WR10 + wldrd wR11, r0, MMX_WR11 + wldrd wR12, r0, MMX_WR12 + wldrd wR13, r0, MMX_WR13 + wldrd wR14, r0, MMX_WR14 + wldrd wR15, r0, MMX_WR15 @ Load wCx - wldrw wCSSF, [r0, #MMX_WCSSF] - wldrw wCASF, [r0, #MMX_WCASF] - wldrw wCGR0, [r0, #MMX_WCGR0] - wldrw wCGR1, [r0, #MMX_WCGR1] - wldrw wCGR2, [r0, #MMX_WCGR2] - wldrw wCGR3, [r0, #MMX_WCGR3] + wldrw wCSSF, r0, MMX_WCSSF + wldrw wCASF, r0, MMX_WCASF + wldrw wCGR0, r0, MMX_WCGR0 + wldrw wCGR1, r0, MMX_WCGR1 + wldrw wCGR2, r0, MMX_WCGR2 + wldrw wCGR3, r0, MMX_WCGR3 @ clear CUP/MUP (only if r1 != 0) teq r1, #0 diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h new file mode 100644 index 0000000000000..fb627286f5bb9 --- /dev/null +++ b/arch/arm/kernel/iwmmxt.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __IWMMXT_H__ +#define __IWMMXT_H__ + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +.set .LwR\b, \b +.set .Lr\b, \b +.endr + +.set .LwCSSF, 0x2 +.set .LwCASF, 0x3 +.set .LwCGR0, 0x8 +.set .LwCGR1, 0x9 +.set .LwCGR2, 0xa +.set .LwCGR3, 0xb + +.macro wldrd, reg:req, base:req, offset:req +.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wldrw, reg:req, base:req, offset:req +.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrd, reg:req, base:req, offset:req +.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrw, reg:req, base:req, offset:req +.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +#ifdef __clang__ + +#define wCon c1 + +.macro tmrc, dest:req, control:req +mrc p1, 0, \dest, \control, c0, 0 +.endm + +.macro tmcr, control:req, src:req +mcr p1, 0, \src, \control, c0, 0 +.endm +#endif + +#endif -- GitLab From f77ac2e378be9dd61eb88728f0840642f045d9d1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 19 Nov 2020 18:09:16 +0100 Subject: [PATCH 0439/1894] ARM: 9030/1: entry: omit FP emulation for UND exceptions taken in kernel mode There are a couple of problems with the exception entry code that deals with FP exceptions (which are reported as UND exceptions) when building the kernel in Thumb2 mode: - the conditional branch to vfp_kmode_exception in vfp_support_entry() may be out of range for its target, depending on how the linker decides to arrange the sections; - when the UND exception is taken in kernel mode, the emulation handling logic is entered via the 'call_fpe' label, which means we end up using the wrong value/mask pairs to match and detect the NEON opcodes. Since UND exceptions in kernel mode are unlikely to occur on a hot path (as opposed to the user mode version which is invoked for VFP support code and lazy restore), we can use the existing undef hook machinery for any kernel mode instruction emulation that is needed, including calling the existing vfp_kmode_exception() routine for unexpected cases. So drop the call to call_fpe, and instead, install an undef hook that will get called for NEON and VFP instructions that trigger an UND exception in kernel mode. While at it, make sure that the PC correction is accurate for the execution mode where the exception was taken, by checking the PSR Thumb bit. Cc: Dmitry Osipenko Cc: Kees Cook Fixes: eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input sections") Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Reviewed-by: Nick Desaulniers Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 25 ++---------------- arch/arm/vfp/vfphw.S | 5 ---- arch/arm/vfp/vfpmodule.c | 49 ++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 30 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c4220f51fcf3a..0ea8529a48726 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -252,31 +252,10 @@ __und_svc: #else svc_entry #endif - @ - @ call emulation code, which returns using r9 if it has emulated - @ the instruction, or the more conventional lr if we are to treat - @ this as a real undefined instruction - @ - @ r0 - instruction - @ -#ifndef CONFIG_THUMB2_KERNEL - ldr r0, [r4, #-4] -#else - mov r1, #2 - ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 - cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 - blo __und_svc_fault - ldrh r9, [r4] @ bottom 16 bits - add r4, r4, #2 - str r4, [sp, #S_PC] - orr r0, r9, r0, lsl #16 -#endif - badr r9, __und_svc_finish - mov r2, r4 - bl call_fpe mov r1, #4 @ PC correction to apply -__und_svc_fault: + THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode? + THUMB( movne r1, #2 ) @ if so, fix up PC correction mov r0, sp @ struct pt_regs *regs bl __und_fault diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index 4fcff9f59947d..d5837bf05a9a5 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -79,11 +79,6 @@ ENTRY(vfp_support_entry) DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 .fpu vfpv2 - ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions - and r3, r3, #MODE_MASK @ are supported in kernel mode - teq r3, #USR_MODE - bne vfp_kmode_exception @ Returns through lr - VFPFMRX r1, FPEXC @ Is the VFP enabled? DBGSTR1 "fpexc %08x", r1 tst r1, #FPEXC_EN diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 8c9e7f9f0277d..c3b6451c18bda 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "vfpinstr.h" @@ -642,7 +643,9 @@ static int vfp_starting_cpu(unsigned int unused) return 0; } -void vfp_kmode_exception(void) +#ifdef CONFIG_KERNEL_MODE_NEON + +static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr) { /* * If we reach this point, a floating point exception has been raised @@ -660,9 +663,51 @@ void vfp_kmode_exception(void) pr_crit("BUG: unsupported FP instruction in kernel mode\n"); else pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n"); + pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC)); + return 1; } -#ifdef CONFIG_KERNEL_MODE_NEON +static struct undef_hook vfp_kmode_exception_hook[] = {{ + .instr_mask = 0xfe000000, + .instr_val = 0xf2000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf4000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xef000000, + .instr_val = 0xef000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf9000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}}; + +static int __init vfp_kmode_exception_hook_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++) + register_undef_hook(&vfp_kmode_exception_hook[i]); + return 0; +} +core_initcall(vfp_kmode_exception_hook_init); /* * Kernel-side NEON support functions -- GitLab From e64ab473dddaffdfc4bd0b385204f472f2cb00d6 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 30 Nov 2020 22:42:36 +0100 Subject: [PATCH 0440/1894] ARM: 9034/1: __div64_32(): straighten up inline asm constraints The ARM version of __div64_32() encapsulates a call to __do_div64 with non-standard argument passing. In particular, __n is a 64-bit input argument assigned to r0-r1 and __rem is an output argument sharing half of that r0-r1 register pair. With __n being an input argument, the compiler is in its right to presume that r0-r1 would still hold the value of __n past the inline assembly statement. Normally, the compiler would have assigned non overlapping registers to __n and __rem if the value for __n is needed again. However, here we enforce our own register assignment and gcc fails to notice the conflict. In practice this doesn't cause any problem as __n is considered dead after the asm statement and *n is overwritten. However this is not always guaranteed and clang rightfully complains. Let's fix it properly by making __n into an input-output variable. This makes it clear that those registers representing __n have been modified. Then we can extract __rem as the high part of __n with plain C code. This asm constraint "abuse" was likely relied upon back when gcc didn't handle 64-bit values optimally. Turns out that gcc is now able to optimize things and produces the same code with this patch applied. Reported-by: Antony Yu Signed-off-by: Nicolas Pitre Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/div64.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 898e9c78a7e75..595e538f5bfb5 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -21,29 +21,20 @@ * assembly implementation with completely non standard calling convention * for arguments and results (beware). */ - -#ifdef __ARMEB__ -#define __xh "r0" -#define __xl "r1" -#else -#define __xl "r0" -#define __xh "r1" -#endif - static inline uint32_t __div64_32(uint64_t *n, uint32_t base) { register unsigned int __base asm("r4") = base; register unsigned long long __n asm("r0") = *n; register unsigned long long __res asm("r2"); - register unsigned int __rem asm(__xh); - asm( __asmeq("%0", __xh) + unsigned int __rem; + asm( __asmeq("%0", "r0") __asmeq("%1", "r2") - __asmeq("%2", "r0") - __asmeq("%3", "r4") + __asmeq("%2", "r4") "bl __do_div64" - : "=r" (__rem), "=r" (__res) - : "r" (__n), "r" (__base) + : "+r" (__n), "=r" (__res) + : "r" (__base) : "ip", "lr", "cc"); + __rem = __n >> 32; *n = __res; return __rem; } -- GitLab From c0bc969c176b10598b31d5d1a5edf9a5261f0a9f Mon Sep 17 00:00:00 2001 From: Carl Philipp Klemm Date: Mon, 7 Dec 2020 20:58:01 +0100 Subject: [PATCH 0441/1894] ARM: omap2: pmic-cpcap: fix maximum voltage to be consistent with defaults on xt875 xt875 comes up with a iva voltage of 1375000 and android runs at this too. fix maximum voltage to be consistent with this. Signed-off-by: Carl Philipp Klemm Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pmic-cpcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c index eab281a5fc9f7..09076ad0576d9 100644 --- a/arch/arm/mach-omap2/pmic-cpcap.c +++ b/arch/arm/mach-omap2/pmic-cpcap.c @@ -71,7 +71,7 @@ static struct omap_voltdm_pmic omap_cpcap_iva = { .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, .vddmin = 900000, - .vddmax = 1350000, + .vddmax = 1375000, .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, .i2c_slave_addr = 0x44, .volt_reg_addr = 0x0, -- GitLab From c25ce589dca10d64dde139ae093abc258a32869c Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Mon, 23 Nov 2020 15:15:33 +0100 Subject: [PATCH 0442/1894] tweewide: Fix most Shebang lines Change every shebang which does not need an argument to use /usr/bin/env. This is needed as not every distro has everything under /usr/bin, sometimes not even bash. Signed-off-by: Finn Behrens Signed-off-by: Masahiro Yamada --- Documentation/sphinx/parse-headers.pl | 2 +- Documentation/target/tcm_mod_builder.py | 2 +- Documentation/trace/postprocess/decode_msr.py | 2 +- Documentation/trace/postprocess/trace-pagealloc-postprocess.pl | 2 +- Documentation/trace/postprocess/trace-vmscan-postprocess.pl | 2 +- arch/ia64/scripts/unwcheck.py | 2 +- scripts/bloat-o-meter | 2 +- scripts/config | 2 +- scripts/diffconfig | 2 +- scripts/get_abi.pl | 2 +- scripts/show_delta | 2 +- scripts/sphinx-pre-install | 2 +- scripts/split-man.pl | 2 +- scripts/tracing/draw_functrace.py | 2 +- tools/perf/python/tracepoint.py | 2 +- tools/perf/python/twatch.py | 2 +- tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py | 2 +- tools/testing/ktest/compare-ktest-sample.pl | 2 +- tools/testing/kunit/kunit.py | 2 +- tools/testing/kunit/kunit_tool_test.py | 2 +- tools/testing/selftests/bpf/test_offload.py | 2 +- .../selftests/drivers/net/mlxsw/sharedbuffer_configuration.py | 2 +- tools/testing/selftests/kselftest/prefix.pl | 2 +- tools/testing/selftests/net/devlink_port_split.py | 2 +- tools/testing/selftests/tc-testing/tdc_batch.py | 2 +- tools/testing/selftests/tc-testing/tdc_multibatch.py | 2 +- 26 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl index 1910079f984fb..b063f2f1cfb25 100755 --- a/Documentation/sphinx/parse-headers.pl +++ b/Documentation/sphinx/parse-headers.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl use strict; use Text::Tabs; use Getopt::Long; diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 1548d84204996..54492aa813b9b 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # The TCM v4 multi-protocol fabric module generation script for drivers/target/$NEW_MOD # # Copyright (c) 2010 Rising Tide Systems diff --git a/Documentation/trace/postprocess/decode_msr.py b/Documentation/trace/postprocess/decode_msr.py index 0ab40e0db5809..aa9cc7abd5c2b 100644 --- a/Documentation/trace/postprocess/decode_msr.py +++ b/Documentation/trace/postprocess/decode_msr.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # add symbolic names to read_msr / write_msr in trace # decode_msr msr-index.h < trace import sys diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl index 0a120aae33ce5..b9b7d80c2f9d2 100644 --- a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl +++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # This is a POC (proof of concept or piece of crap, take your pick) for reading the # text representation of trace output related to page allocation. It makes an attempt # to extract some high-level information on what is going on. The accuracy of the parser diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 995da15b16cab..2f4e39875fb39 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # This is a POC for reading the text representation of trace output related to # page reclaim. It makes an attempt to extract some high-level information on # what is going on. The accuracy of the parser may vary diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py index c55276e31b6b6..bfd1b671e35fc 100644 --- a/arch/ia64/scripts/unwcheck.py +++ b/arch/ia64/scripts/unwcheck.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # # Usage: unwcheck.py FILE diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index d7ca46c612b34..652e9542043f2 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # # Copyright 2004 Matt Mackall # diff --git a/scripts/config b/scripts/config index eee5b7f3a092a..8c8d7c3d7accc 100755 --- a/scripts/config +++ b/scripts/config @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0 # Manipulate options in a .config file from the command line diff --git a/scripts/diffconfig b/scripts/diffconfig index 89abf777f1973..627eba5849b5c 100755 --- a/scripts/diffconfig +++ b/scripts/diffconfig @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # # diffconfig - a tool to compare .config files. diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index 68dab828a722d..92d9aa6cc4f5d 100755 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 use strict; diff --git a/scripts/show_delta b/scripts/show_delta index 264399307c4fc..28e67e1781941 100755 --- a/scripts/show_delta +++ b/scripts/show_delta @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only # # show_deltas: Read list of printk messages instrumented with diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install index 40fa6923e80ae..828a8615a9181 100755 --- a/scripts/sphinx-pre-install +++ b/scripts/sphinx-pre-install @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0-or-later use strict; diff --git a/scripts/split-man.pl b/scripts/split-man.pl index c3db607ee9ec1..96bd99dc977a5 100755 --- a/scripts/split-man.pl +++ b/scripts/split-man.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # # Author: Mauro Carvalho Chehab diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index b657357585209..74f8aadfd4cbc 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only """ diff --git a/tools/perf/python/tracepoint.py b/tools/perf/python/tracepoint.py index eb76f6516247e..461848c7f57dc 100755 --- a/tools/perf/python/tracepoint.py +++ b/tools/perf/python/tracepoint.py @@ -1,4 +1,4 @@ -#! /usr/bin/python +#! /usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # -*- python -*- # -*- coding: utf-8 -*- diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index ff87ccf5b7085..04f3db29b9bc1 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -1,4 +1,4 @@ -#! /usr/bin/python +#! /usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only # -*- python -*- # -*- coding: utf-8 -*- diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index 3c47865bb247a..e15e20696d17b 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only # -*- coding: utf-8 -*- # diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl index 4118eb4a842d2..ebea21d0a1be8 100755 --- a/tools/testing/ktest/compare-ktest-sample.pl +++ b/tools/testing/ktest/compare-ktest-sample.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 open (IN,"ktest.pl"); diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index d4f7846d0745b..21516e293d171 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # A thin wrapper on top of the KUnit Kernel diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 497ab51bc1702..b593f4448e839 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # A collection of tests for tools/testing/kunit/kunit.py diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 43c9cda199b82..f736d34b89e1f 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright (C) 2017 Netronome Systems, Inc. # Copyright (c) 2019 Mellanox Technologies. All rights reserved diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py index 0d4b9327c9b3f..2223337eed0c1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 import subprocess diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl index 31f7c2a0a8bd4..12a7f4ca2684d 100755 --- a/tools/testing/selftests/kselftest/prefix.pl +++ b/tools/testing/selftests/kselftest/prefix.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # Prefix all lines with "# ", unbuffered. Command being piped in may need # to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd". diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py index 58bb7e9b88cec..834066d465fc1 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 from subprocess import PIPE, Popen diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py index 995f66ce43eba..35d5d94937842 100755 --- a/tools/testing/selftests/tc-testing/tdc_batch.py +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ tdc_batch.py - a script to generate TC batch file diff --git a/tools/testing/selftests/tc-testing/tdc_multibatch.py b/tools/testing/selftests/tc-testing/tdc_multibatch.py index 5e7237952e497..48e1f17ff2e86 100755 --- a/tools/testing/selftests/tc-testing/tdc_multibatch.py +++ b/tools/testing/selftests/tc-testing/tdc_multibatch.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 """ tdc_multibatch.py - a thin wrapper over tdc_batch.py to generate multiple batch -- GitLab From c93e4aeed1be5b99715a9127f5b38d6b4ab9e5d7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 24 Nov 2020 16:43:17 +0100 Subject: [PATCH 0443/1894] Makefile.extrawarn: remove -Wnested-externs warning The -Wnested-externs warning has become useless with gcc, since this warns every time that BUILD_BUG_ON() or similar macros are used. With clang, the warning option does nothing to start with, so just remove it entirely. Suggested-by: Nathan Chancellor Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.extrawarn | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6baee1200615d..d538255038747 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -61,7 +61,6 @@ endif ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) KBUILD_CFLAGS += -Wdisabled-optimization -KBUILD_CFLAGS += -Wnested-externs KBUILD_CFLAGS += -Wshadow KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) KBUILD_CFLAGS += -Wmissing-field-initializers -- GitLab From 7b675649be2217786847dee13597a0ab8502cc40 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 24 Oct 2020 21:38:39 +0900 Subject: [PATCH 0444/1894] kconfig: qconf: drop Qt4 support It is possible to keep this compatible with both Qt4 and Qt5, but it is questionable if it is worth the efforts; it would require us to test this on both of them, and prevent us from using new features in Qt5. Qt5 was released in 2012, and now widely available. Drop the Qt4 support. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf-cfg.sh | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh index 02ccc0ae10318..d1eb2407c35d0 100755 --- a/scripts/kconfig/qconf-cfg.sh +++ b/scripts/kconfig/qconf-cfg.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 PKG="Qt5Core Qt5Gui Qt5Widgets" -PKG2="QtCore QtGui" if [ -z "$(command -v pkg-config)" ]; then echo >&2 "*" @@ -18,15 +17,8 @@ if pkg-config --exists $PKG; then exit 0 fi -if pkg-config --exists $PKG2; then - echo cflags=\"$(pkg-config --cflags $PKG2)\" - echo libs=\"$(pkg-config --libs $PKG2)\" - echo moc=\"$(pkg-config --variable=moc_location QtCore)\" - exit 0 -fi - echo >&2 "*" -echo >&2 "* Could not find Qt via pkg-config." -echo >&2 "* Please install either Qt 4.8 or 5.x. and make sure it's in PKG_CONFIG_PATH" +echo >&2 "* Could not find Qt5 via pkg-config." +echo >&2 "* Please install Qt5 and make sure it's in PKG_CONFIG_PATH" echo >&2 "*" exit 1 -- GitLab From 7cd0158703a4828252f10a4c4519778fa069ffdf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 24 Oct 2020 21:38:40 +0900 Subject: [PATCH 0445/1894] kconfig: qconf: use a variable to pass packages to pkg-config The variable, PKG, is defined at the beginning of this script. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf-cfg.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh index d1eb2407c35d0..fa564cd795b7c 100755 --- a/scripts/kconfig/qconf-cfg.sh +++ b/scripts/kconfig/qconf-cfg.sh @@ -11,7 +11,7 @@ if [ -z "$(command -v pkg-config)" ]; then fi if pkg-config --exists $PKG; then - echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags Qt5Core Qt5Gui Qt5Widgets)\" + echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags $PKG)\" echo libs=\"$(pkg-config --libs $PKG)\" echo moc=\"$(pkg-config --variable=host_bins Qt5Core)/moc\" exit 0 -- GitLab From a2574c12df0d77eef293d2f388d7e05df33b6155 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 24 Oct 2020 21:38:41 +0900 Subject: [PATCH 0446/1894] kconfig: qconf: convert to Qt5 new signal/slot connection syntax Now that the Qt4 support was dropped, we can use the new connection syntax supported by Qt5. It provides compile-time checking of the validity of the connection. Previously, the connection between signals and slots were checked only run-time. Commit d85de3399f97 ("kconfig: qconf: fix signal connection to invalid slots") fixed wrong slots. This change makes it possible to catch such mistakes easily. Signed-off-by: Masahiro Yamada Tested-by: Boris Kolpackov --- scripts/kconfig/qconf.cc | 136 ++++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 58 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index f7eb093614f27..cbe749b44b1ad 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -310,15 +310,16 @@ ConfigList::ConfigList(QWidget *parent, const char *name) setHeaderLabels(QStringList() << "Option" << "Name" << "Value"); - connect(this, SIGNAL(itemSelectionChanged(void)), - SLOT(updateSelection(void))); + connect(this, &ConfigList::itemSelectionChanged, + this, &ConfigList::updateSelection); if (name) { configSettings->beginGroup(name); showName = configSettings->value("/showName", false).toBool(); optMode = (enum optionMode)configSettings->value("/optionMode", 0).toInt(); configSettings->endGroup(); - connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); + connect(configApp, &QApplication::aboutToQuit, + this, &ConfigList::saveSettings); } showColumn(promptColIdx); @@ -888,10 +889,10 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) headerPopup = new QMenu(this); action = new QAction("Show Name", this); action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), - SLOT(setShowName(bool))); - connect(this, SIGNAL(showNameChanged(bool)), - action, SLOT(setChecked(bool))); + connect(action, &QAction::toggled, + this, &ConfigList::setShowName); + connect(this, &ConfigList::showNameChanged, + action, &QAction::setChecked); action->setChecked(showName); headerPopup->addAction(action); } @@ -936,15 +937,18 @@ ConfigInfoView::ConfigInfoView(QWidget* parent, const char *name) configSettings->beginGroup(objectName()); setShowDebug(configSettings->value("/showDebug", false).toBool()); configSettings->endGroup(); - connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); + connect(configApp, &QApplication::aboutToQuit, + this, &ConfigInfoView::saveSettings); } contextMenu = createStandardContextMenu(); QAction *action = new QAction("Show Debug Info", contextMenu); action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), SLOT(setShowDebug(bool))); - connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setChecked(bool))); + connect(action, &QAction::toggled, + this, &ConfigInfoView::setShowDebug); + connect(this, &ConfigInfoView::showDebugChanged, + action, &QAction::setChecked); action->setChecked(showDebug()); contextMenu->addSeparator(); contextMenu->addAction(action); @@ -1231,11 +1235,13 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent) layout2->setSpacing(6); layout2->addWidget(new QLabel("Find:", this)); editField = new QLineEdit(this); - connect(editField, SIGNAL(returnPressed()), SLOT(search())); + connect(editField, &QLineEdit::returnPressed, + this, &ConfigSearchWindow::search); layout2->addWidget(editField); searchButton = new QPushButton("Search", this); searchButton->setAutoDefault(false); - connect(searchButton, SIGNAL(clicked()), SLOT(search())); + connect(searchButton, &QPushButton::clicked, + this, &ConfigSearchWindow::search); layout2->addWidget(searchButton); layout1->addLayout(layout2); @@ -1244,10 +1250,10 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent) list = new ConfigList(split, "search"); list->mode = listMode; info = new ConfigInfoView(split, "search"); - connect(list, SIGNAL(menuChanged(struct menu *)), - info, SLOT(setInfo(struct menu *))); - connect(list, SIGNAL(menuChanged(struct menu *)), - parent, SLOT(setMenuLink(struct menu *))); + connect(list, &ConfigList::menuChanged, + info, &ConfigInfoView::setInfo); + connect(list, &ConfigList::menuChanged, + parent, &ConfigMainWindow::setMenuLink); layout1->addWidget(split); @@ -1267,7 +1273,8 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent) if (ok) split->setSizes(sizes); configSettings->endGroup(); - connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); + connect(configApp, &QApplication::aboutToQuit, + this, &ConfigSearchWindow::saveSettings); } void ConfigSearchWindow::saveSettings(void) @@ -1367,19 +1374,23 @@ ConfigMainWindow::ConfigMainWindow(void) configList->setFocus(); backAction = new QAction(QPixmap(xpm_back), "Back", this); - connect(backAction, SIGNAL(triggered(bool)), SLOT(goBack())); + connect(backAction, &QAction::triggered, + this, &ConfigMainWindow::goBack); QAction *quitAction = new QAction("&Quit", this); quitAction->setShortcut(Qt::CTRL + Qt::Key_Q); - connect(quitAction, SIGNAL(triggered(bool)), SLOT(close())); + connect(quitAction, &QAction::triggered, + this, &ConfigMainWindow::close); QAction *loadAction = new QAction(QPixmap(xpm_load), "&Load", this); loadAction->setShortcut(Qt::CTRL + Qt::Key_L); - connect(loadAction, SIGNAL(triggered(bool)), SLOT(loadConfig())); + connect(loadAction, &QAction::triggered, + this, &ConfigMainWindow::loadConfig); saveAction = new QAction(QPixmap(xpm_save), "&Save", this); saveAction->setShortcut(Qt::CTRL + Qt::Key_S); - connect(saveAction, SIGNAL(triggered(bool)), SLOT(saveConfig())); + connect(saveAction, &QAction::triggered, + this, &ConfigMainWindow::saveConfig); conf_set_changed_callback(conf_changed); @@ -1388,31 +1399,37 @@ ConfigMainWindow::ConfigMainWindow(void) configname = xstrdup(conf_get_configname()); QAction *saveAsAction = new QAction("Save &As...", this); - connect(saveAsAction, SIGNAL(triggered(bool)), SLOT(saveConfigAs())); + connect(saveAsAction, &QAction::triggered, + this, &ConfigMainWindow::saveConfigAs); QAction *searchAction = new QAction("&Find", this); searchAction->setShortcut(Qt::CTRL + Qt::Key_F); - connect(searchAction, SIGNAL(triggered(bool)), SLOT(searchConfig())); + connect(searchAction, &QAction::triggered, + this, &ConfigMainWindow::searchConfig); singleViewAction = new QAction(QPixmap(xpm_single_view), "Single View", this); singleViewAction->setCheckable(true); - connect(singleViewAction, SIGNAL(triggered(bool)), SLOT(showSingleView())); + connect(singleViewAction, &QAction::triggered, + this, &ConfigMainWindow::showSingleView); splitViewAction = new QAction(QPixmap(xpm_split_view), "Split View", this); splitViewAction->setCheckable(true); - connect(splitViewAction, SIGNAL(triggered(bool)), SLOT(showSplitView())); + connect(splitViewAction, &QAction::triggered, + this, &ConfigMainWindow::showSplitView); fullViewAction = new QAction(QPixmap(xpm_tree_view), "Full View", this); fullViewAction->setCheckable(true); - connect(fullViewAction, SIGNAL(triggered(bool)), SLOT(showFullView())); + connect(fullViewAction, &QAction::triggered, + this, &ConfigMainWindow::showFullView); QAction *showNameAction = new QAction("Show Name", this); showNameAction->setCheckable(true); - connect(showNameAction, SIGNAL(toggled(bool)), configList, SLOT(setShowName(bool))); + connect(showNameAction, &QAction::toggled, + configList, &ConfigList::setShowName); showNameAction->setChecked(configList->showName); QActionGroup *optGroup = new QActionGroup(this); optGroup->setExclusive(true); - connect(optGroup, SIGNAL(triggered(QAction*)), configList, - SLOT(setOptionMode(QAction *))); - connect(optGroup, SIGNAL(triggered(QAction *)), menuList, - SLOT(setOptionMode(QAction *))); + connect(optGroup, &QActionGroup::triggered, + configList, &ConfigList::setOptionMode); + connect(optGroup, &QActionGroup::triggered, + menuList, &ConfigList::setOptionMode); ConfigList::showNormalAction = new QAction("Show Normal Options", optGroup); ConfigList::showNormalAction->setCheckable(true); @@ -1423,13 +1440,16 @@ ConfigMainWindow::ConfigMainWindow(void) QAction *showDebugAction = new QAction("Show Debug Info", this); showDebugAction->setCheckable(true); - connect(showDebugAction, SIGNAL(toggled(bool)), helpText, SLOT(setShowDebug(bool))); + connect(showDebugAction, &QAction::toggled, + helpText, &ConfigInfoView::setShowDebug); showDebugAction->setChecked(helpText->showDebug()); QAction *showIntroAction = new QAction("Introduction", this); - connect(showIntroAction, SIGNAL(triggered(bool)), SLOT(showIntro())); + connect(showIntroAction, &QAction::triggered, + this, &ConfigMainWindow::showIntro); QAction *showAboutAction = new QAction("About", this); - connect(showAboutAction, SIGNAL(triggered(bool)), SLOT(showAbout())); + connect(showAboutAction, &QAction::triggered, + this, &ConfigMainWindow::showAbout); // init tool bar QToolBar *toolBar = addToolBar("Tools"); @@ -1467,30 +1487,30 @@ ConfigMainWindow::ConfigMainWindow(void) menu->addAction(showIntroAction); menu->addAction(showAboutAction); - connect (helpText, SIGNAL (anchorClicked (const QUrl &)), - helpText, SLOT (clicked (const QUrl &)) ); - - connect(configList, SIGNAL(menuChanged(struct menu *)), - helpText, SLOT(setInfo(struct menu *))); - connect(configList, SIGNAL(menuSelected(struct menu *)), - SLOT(changeMenu(struct menu *))); - connect(configList, SIGNAL(itemSelected(struct menu *)), - SLOT(changeItens(struct menu *))); - connect(configList, SIGNAL(parentSelected()), - SLOT(goBack())); - connect(menuList, SIGNAL(menuChanged(struct menu *)), - helpText, SLOT(setInfo(struct menu *))); - connect(menuList, SIGNAL(menuSelected(struct menu *)), - SLOT(changeMenu(struct menu *))); - - connect(configList, SIGNAL(gotFocus(struct menu *)), - helpText, SLOT(setInfo(struct menu *))); - connect(menuList, SIGNAL(gotFocus(struct menu *)), - helpText, SLOT(setInfo(struct menu *))); - connect(menuList, SIGNAL(gotFocus(struct menu *)), - SLOT(listFocusChanged(void))); - connect(helpText, SIGNAL(menuSelected(struct menu *)), - SLOT(setMenuLink(struct menu *))); + connect(helpText, &ConfigInfoView::anchorClicked, + helpText, &ConfigInfoView::clicked); + + connect(configList, &ConfigList::menuChanged, + helpText, &ConfigInfoView::setInfo); + connect(configList, &ConfigList::menuSelected, + this, &ConfigMainWindow::changeMenu); + connect(configList, &ConfigList::itemSelected, + this, &ConfigMainWindow::changeItens); + connect(configList, &ConfigList::parentSelected, + this, &ConfigMainWindow::goBack); + connect(menuList, &ConfigList::menuChanged, + helpText, &ConfigInfoView::setInfo); + connect(menuList, &ConfigList::menuSelected, + this, &ConfigMainWindow::changeMenu); + + connect(configList, &ConfigList::gotFocus, + helpText, &ConfigInfoView::setInfo); + connect(menuList, &ConfigList::gotFocus, + helpText, &ConfigInfoView::setInfo); + connect(menuList, &ConfigList::gotFocus, + this, &ConfigMainWindow::listFocusChanged); + connect(helpText, &ConfigInfoView::menuSelected, + this, &ConfigMainWindow::setMenuLink); QString listMode = configSettings->value("/listMode", "symbol").toString(); if (listMode == "single") -- GitLab From 98ebea7ba891569c3678c5cd2fd1960098e84f4e Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 29 Oct 2020 17:51:51 +0200 Subject: [PATCH 0447/1894] kconfig: make lkc.h self-sufficient #include-wise Signed-off-by: Boris Kolpackov Signed-off-by: Masahiro Yamada --- scripts/kconfig/lkc.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 8454649b17bd5..bee2413bda633 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -6,6 +6,10 @@ #ifndef LKC_H #define LKC_H +#include +#include +#include + #include "expr.h" #ifdef __cplusplus -- GitLab From f463269fb940d2a4259169b1e87aab8d259a9ec4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 2 Nov 2020 11:59:57 +0900 Subject: [PATCH 0448/1894] kconfig: qconf: show Qt version in the About dialog You can get the Qt version by running "pkg-config --modversion Qt5Core" or something, but this might be useful to get the runtime Qt version more easily. Go to the menu "Help" -> "About", then you can see it. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index cbe749b44b1ad..d000869b787c8 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1799,10 +1799,13 @@ void ConfigMainWindow::showIntro(void) void ConfigMainWindow::showAbout(void) { static const QString str = "qconf is Copyright (C) 2002 Roman Zippel .\n" - "Copyright (C) 2015 Boris Barbulovski .\n\n" - "Bug reports and feature request can also be entered at http://bugzilla.kernel.org/\n"; + "Copyright (C) 2015 Boris Barbulovski .\n" + "\n" + "Bug reports and feature request can also be entered at http://bugzilla.kernel.org/\n" + "\n" + "Qt Version: "; - QMessageBox::information(this, "qconf", str); + QMessageBox::information(this, "qconf", str + qVersion()); } void ConfigMainWindow::saveSettings(void) -- GitLab From 78cb09078352d032b12e2af7feb9b5b7f0fa794c Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 23 Nov 2020 11:38:18 +0200 Subject: [PATCH 0449/1894] kconfig: clean up header inclusion - Add missing includes. - Remove no longer necessary includes. Signed-off-by: Boris Kolpackov Signed-off-by: Masahiro Yamada --- scripts/kconfig/conf.c | 1 - scripts/kconfig/confdata.c | 1 + scripts/kconfig/lexer.l | 1 - scripts/kconfig/symbol.c | 2 +- 4 files changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index f6e548b8f7955..db03e2f45de42 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index a39d93e3c6ae8..2568dbe16ed64 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index 240109f965aeb..9c22cb5546734 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -12,7 +12,6 @@ #include #include #include -#include #include "lkc.h" #include "parser.tab.h" diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index ffa3ec65cc907..fe38e6fd2c2a4 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -3,11 +3,11 @@ * Copyright (C) 2002 Roman Zippel */ +#include #include #include #include #include -#include #include "lkc.h" -- GitLab From 0cc519f85a527e1c5ad5a7f182105fe614e9ff80 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Dec 2020 19:51:49 +0000 Subject: [PATCH 0450/1894] KVM: arm64: Fix nVHE boot on VHE systems Conflict resolution gone astray results in the kernel not booting on VHE-capable HW when VHE support is disabled. Thankfully spotted by David. Reported-by: David Brazdil Signed-off-by: Marc Zyngier --- arch/arm64/kernel/head.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7eba3a1e84b0e..9576830294387 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -515,8 +515,11 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) */ mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 - cbz x2, init_el2_nvhe +#else + mov x2, xzr #endif + cbz x2, init_el2_nvhe + /* * When VHE _is_ in use, EL1 will not be used in the host and * requires no configuration, and all non-hyp-specific EL2 setup -- GitLab From 4dbe44fb538c59a4adae5abfa9ded2f310250315 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Dec 2020 18:40:53 +0100 Subject: [PATCH 0451/1894] efi: capsule: clean scatter-gather entries from the D-cache Scatter-gather lists passed to UpdateCapsule() should be cleaned from the D-cache to ensure that they are visible to the CPU after a warm reboot before the MMU is enabled. On ARM and arm64 systems, this implies a D-cache clean by virtual address to the point of coherency. However, due to the fact that the firmware itself is not able to map physical addresses back to virtual addresses when running under the OS, this must be done by the caller. Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/efi.h | 5 +++++ arch/arm64/include/asm/efi.h | 5 +++++ drivers/firmware/efi/capsule.c | 12 ++++++++++++ 3 files changed, 22 insertions(+) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 3ee4f43819850..e9a06e164e069 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -93,4 +93,9 @@ struct efi_arm_entry_state { u32 sctlr_after_ebs; }; +static inline void efi_capsule_flush_cache_range(void *addr, int size) +{ + __cpuc_flush_dcache_area(addr, size); +} + #endif /* _ASM_ARM_EFI_H */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 973b144152711..00bd1e179d36f 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -141,4 +141,9 @@ static inline void efi_set_pgd(struct mm_struct *mm) void efi_virtmap_load(void); void efi_virtmap_unload(void); +static inline void efi_capsule_flush_cache_range(void *addr, int size) +{ + __flush_dcache_area(addr, size); +} + #endif /* _ASM_EFI_H */ diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c index 43f6fe7bfe80f..768430293669f 100644 --- a/drivers/firmware/efi/capsule.c +++ b/drivers/firmware/efi/capsule.c @@ -12,6 +12,7 @@ #include #include #include +#include #include typedef struct { @@ -265,6 +266,17 @@ int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages) else sglist[j].data = page_to_phys(sg_pages[i + 1]); +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + /* + * At runtime, the firmware has no way to find out where the + * sglist elements are mapped, if they are mapped in the first + * place. Therefore, on architectures that can only perform + * cache maintenance by virtual address, the firmware is unable + * to perform this maintenance, and so it is up to the OS to do + * it instead. + */ + efi_capsule_flush_cache_range(sglist, PAGE_SIZE); +#endif kunmap_atomic(sglist); } -- GitLab From c0249238feefbbb99d517d06ace4338393901b67 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 12 Nov 2020 15:42:27 +0100 Subject: [PATCH 0452/1894] efi: arm: reduce minimum alignment of uncompressed kernel Now that we reduced the minimum relative alignment between PHYS_OFFSET and PAGE_OFFSET to 2 MiB, we can take this into account when allocating memory for the decompressed kernel when booting via EFI. This minimizes the amount of unusable memory we may end up with due to the base of DRAM being occupied by firmware. Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/efi.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index e9a06e164e069..153680541aeaf 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -66,13 +66,12 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) #define MAX_UNCOMP_KERNEL_SIZE SZ_32M /* - * phys-to-virt patching requires that the physical to virtual offset fits - * into the immediate field of an add/sub instruction, which comes down to the - * 24 least significant bits being zero, and so the offset should be a multiple - * of 16 MB. Since PAGE_OFFSET itself is a multiple of 16 MB, the physical - * base should be aligned to 16 MB as well. + * phys-to-virt patching requires that the physical to virtual offset is a + * multiple of 2 MiB. However, using an alignment smaller than TEXT_OFFSET + * here throws off the memory allocation logic, so let's use the lowest power + * of two greater than 2 MiB and greater than TEXT_OFFSET. */ -#define EFI_PHYS_ALIGN SZ_16M +#define EFI_PHYS_ALIGN max(SZ_2M, roundup_pow_of_two(TEXT_OFFSET)) /* on ARM, the FDT should be located in a lowmem region */ static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) @@ -83,7 +82,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) /* on ARM, the initrd should be loaded in a lowmem region */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { - return round_down(image_addr, EFI_PHYS_ALIGN) + SZ_512M; + return round_down(image_addr, SZ_4M) + SZ_512M; } struct efi_arm_entry_state { -- GitLab From ff20661bb54cd57a18207b33cc57eb8d5c758a86 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Fri, 27 Nov 2020 20:20:51 +0100 Subject: [PATCH 0453/1894] efi/efi_test: read RuntimeServicesSupported Since the UEFI 2.8A specification the UEFI enabled firmware provides a configuration table EFI_RT_PROPERTIES_TABLE which indicates which runtime services are enabled. The EFI stub reads this table and saves the value of the field RuntimeServicesSupported internally. The Firmware Test Suite requires the value to determine if UEFI runtime services are correctly implemented. With this patch an IOCTL call is provided to read the value of the field RuntimeServicesSupported, e.g. #define EFI_RUNTIME_GET_SUPPORTED_MASK \ _IOR('p', 0x0C, unsigned int) unsigned int mask; fd = open("/dev/efi_test", O_RDWR); ret = ioctl(fd, EFI_RUNTIME_GET_SUPPORTED_MASK, &mask); Signed-off-by: Heinrich Schuchardt Link: https://lore.kernel.org/r/20201127192051.1430-1-xypron.glpk@gmx.de Acked-by: Colin Ian King Acked-by: Ivan Hu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/test/efi_test.c | 16 ++++++++++++++++ drivers/firmware/efi/test/efi_test.h | 3 +++ 2 files changed, 19 insertions(+) diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c index ddf9eae396fe1..47d67bb0a516d 100644 --- a/drivers/firmware/efi/test/efi_test.c +++ b/drivers/firmware/efi/test/efi_test.c @@ -663,6 +663,19 @@ out: return rv; } +static long efi_runtime_get_supported_mask(unsigned long arg) +{ + unsigned int __user *supported_mask; + int rv = 0; + + supported_mask = (unsigned int *)arg; + + if (put_user(efi.runtime_supported_mask, supported_mask)) + rv = -EFAULT; + + return rv; +} + static long efi_test_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -699,6 +712,9 @@ static long efi_test_ioctl(struct file *file, unsigned int cmd, case EFI_RUNTIME_RESET_SYSTEM: return efi_runtime_reset_system(arg); + + case EFI_RUNTIME_GET_SUPPORTED_MASK: + return efi_runtime_get_supported_mask(arg); } return -ENOTTY; diff --git a/drivers/firmware/efi/test/efi_test.h b/drivers/firmware/efi/test/efi_test.h index f2446aa1c2e3d..117349e579937 100644 --- a/drivers/firmware/efi/test/efi_test.h +++ b/drivers/firmware/efi/test/efi_test.h @@ -118,4 +118,7 @@ struct efi_resetsystem { #define EFI_RUNTIME_RESET_SYSTEM \ _IOW('p', 0x0B, struct efi_resetsystem) +#define EFI_RUNTIME_GET_SUPPORTED_MASK \ + _IOR('p', 0x0C, unsigned int) + #endif /* _DRIVERS_FIRMWARE_EFI_TEST_H_ */ -- GitLab From 54649911f31b6e7c2a79a1426ca98259139e4c35 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Oct 2020 14:49:01 +0100 Subject: [PATCH 0454/1894] efi: stub: get rid of efi_get_max_fdt_addr() Now that ARM started following the example of arm64 and RISC-V, and no longer imposes any restrictions on the placement of the FDT in memory at boot, we no longer need per-arch implementations of efi_get_max_fdt_addr() to factor out the differences. So get rid of it. Signed-off-by: Ard Biesheuvel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20201029134901.9773-1-ardb@kernel.org --- arch/arm/include/asm/efi.h | 6 ------ arch/arm64/include/asm/efi.h | 6 ------ arch/riscv/include/asm/efi.h | 6 ------ drivers/firmware/efi/libstub/efi-stub.c | 1 - drivers/firmware/efi/libstub/efistub.h | 1 - drivers/firmware/efi/libstub/fdt.c | 3 +-- 6 files changed, 1 insertion(+), 22 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 153680541aeaf..abae071a02e16 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -73,12 +73,6 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) */ #define EFI_PHYS_ALIGN max(SZ_2M, roundup_pow_of_two(TEXT_OFFSET)) -/* on ARM, the FDT should be located in a lowmem region */ -static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) -{ - return round_down(image_addr, EFI_PHYS_ALIGN) + SZ_512M; -} - /* on ARM, the initrd should be loaded in a lowmem region */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 00bd1e179d36f..3578aba9c6080 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -64,12 +64,6 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); #define EFI_KIMG_ALIGN \ (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN) -/* on arm64, the FDT may be located anywhere in system RAM */ -static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) -{ - return ULONG_MAX; -} - /* * On arm64, we have to ensure that the initrd ends up in the linear region, * which is a 1 GB aligned region of size '1UL << (VA_BITS_MIN - 1)' that is diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 7542282f1141c..6d98cd999680b 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -27,12 +27,6 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) -/* on RISC-V, the FDT may be located anywhere in system RAM */ -static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) -{ - return ULONG_MAX; -} - /* Load initrd at enough distance from DRAM start */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 914a343c7785c..ec2f3985bef35 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -273,7 +273,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, install_memreserve_table(); status = allocate_new_fdt_and_exit_boot(handle, &fdt_addr, - efi_get_max_fdt_addr(image_addr), initrd_addr, initrd_size, cmdline_ptr, fdt_addr, fdt_size); if (status != EFI_SUCCESS) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index b8ec29d6a74ac..b50a6c67d9bd2 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -750,7 +750,6 @@ efi_status_t efi_exit_boot_services(void *handle, efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, - unsigned long max_addr, u64 initrd_addr, u64 initrd_size, char *cmdline_ptr, unsigned long fdt_addr, diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 368cd60000eec..365c3a43a1982 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -238,7 +238,6 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, - unsigned long max_addr, u64 initrd_addr, u64 initrd_size, char *cmdline_ptr, unsigned long fdt_addr, @@ -275,7 +274,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, efi_info("Exiting boot services and installing virtual address map...\n"); map.map = &memory_map; - status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, max_addr); + status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, ULONG_MAX); if (status != EFI_SUCCESS) { efi_err("Unable to allocate memory for new device tree.\n"); goto fail; -- GitLab From 44fd9fb599d3d2be4c6838f4b11eaa459bb33989 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 7 Dec 2020 20:01:36 +0100 Subject: [PATCH 0455/1894] scsi: ufs: Remove unused macro definition POWER_DESC_MAX_SIZE POWER_DESC_MAX_SIZE is unused, remove it. Link: https://lore.kernel.org/r/20201207190137.6858-2-huobean@gmail.com Acked-by: Avri Altman Acked-by: Alim Akhtar Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index d593edb487677..142c2ebd3d7d8 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -330,7 +330,6 @@ enum { UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), }; -#define POWER_DESC_MAX_SIZE 0x62 #define POWER_DESC_MAX_ACTV_ICC_LVLS 16 /* Attribute bActiveICCLevel parameter bit masks definitions */ -- GitLab From 1fa0570002e3f66db9b58c32c60de4183b857a19 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 7 Dec 2020 20:01:37 +0100 Subject: [PATCH 0456/1894] scsi: ufs: Fix wrong print message in dev_err() Change dev_err() print message from "dme-reset" to "dme_enable" in function ufshcd_dme_enable(). Link: https://lore.kernel.org/r/20201207190137.6858-3-huobean@gmail.com Acked-by: Alim Akhtar Acked-by: Avri Altman Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c1c401b2b69d0..6bcedebae4a8f 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3641,7 +3641,7 @@ static int ufshcd_dme_enable(struct ufs_hba *hba) ret = ufshcd_send_uic_cmd(hba, &uic_cmd); if (ret) dev_err(hba->dev, - "dme-reset: error code %d\n", ret); + "dme-enable: error code %d\n", ret); return ret; } -- GitLab From 1918651f2d7e8d58c9b7c49755c61e41ed655009 Mon Sep 17 00:00:00 2001 From: Randall Huang Date: Mon, 30 Nov 2020 20:14:02 -0800 Subject: [PATCH 0457/1894] scsi: ufs: Clear UAC for RPMB after ufshcd resets If RPMB is not provisioned, we may see RPMB failure after UFS suspend/resume. Inject request_sense to clear uac in ufshcd reset flow. Link: https://lore.kernel.org/r/20201201041402.3860525-1-jaegeuk@kernel.org Reported-by: kernel test robot Reviewed-by: Stanley Chu Signed-off-by: Randall Huang Signed-off-by: Leo Liou Signed-off-by: Jaegeuk Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 6bcedebae4a8f..a84a8759437ac 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -225,6 +225,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba); static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd); static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag); static void ufshcd_hba_exit(struct ufs_hba *hba); +static int ufshcd_clear_ua_wluns(struct ufs_hba *hba); static int ufshcd_probe_hba(struct ufs_hba *hba, bool async); static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on); static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba); @@ -6895,7 +6896,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) /* Establish the link again and restore the device */ err = ufshcd_probe_hba(hba, false); - + if (!err) + ufshcd_clear_ua_wluns(hba); out: if (err) dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err); @@ -8379,13 +8381,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * handling context. */ hba->host->eh_noresume = 1; - if (hba->wlun_dev_clr_ua) { - ret = ufshcd_send_request_sense(hba, sdp); - if (ret) - goto out; - /* Unit attention condition is cleared now */ - hba->wlun_dev_clr_ua = false; - } + ufshcd_clear_ua_wluns(hba); cmd[4] = pwr_mode << 4; @@ -8406,7 +8402,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, if (!ret) hba->curr_dev_pwr_mode = pwr_mode; -out: + scsi_device_put(sdp); hba->host->eh_noresume = 0; return ret; -- GitLab From f8162ac70ecf5a3ed638f96dc10e0e19b523ec7f Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Mon, 7 Dec 2020 13:49:54 +0800 Subject: [PATCH 0458/1894] scsi: ufs: Allow regulators being always-on Introduce a flag "always_on" in struct ufs_vreg to allow vendors to keep the regulator always-on. Link: https://lore.kernel.org/r/20201207054955.24366-2-stanley.chu@mediatek.com Reviewed-by: Andy Teng Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 1 + drivers/scsi/ufs/ufshcd.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 142c2ebd3d7d8..14dfda735adf5 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -512,6 +512,7 @@ struct ufs_query_res { struct ufs_vreg { struct regulator *reg; const char *name; + bool always_on; bool enabled; int min_uV; int max_uV; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a84a8759437ac..4f3ac2566322e 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8012,7 +8012,7 @@ static int ufshcd_disable_vreg(struct device *dev, struct ufs_vreg *vreg) { int ret = 0; - if (!vreg || !vreg->enabled) + if (!vreg || !vreg->enabled || vreg->always_on) goto out; ret = regulator_disable(vreg->reg); -- GitLab From b3f3d31a528f78d9903253a23a5e5c6bf5280f40 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Mon, 7 Dec 2020 13:49:55 +0800 Subject: [PATCH 0459/1894] scsi: ufs-mediatek: Keep VCC always-on for specific devices For some devices which need extra delay after VCC power down, VCC shall be kept always-on in some MediaTek UFS platforms to ensure the stability of such devices because the extra delay may not be enough in those platforms. Link: https://lore.kernel.org/r/20201207054955.24366-3-stanley.chu@mediatek.com Reviewed-by: Andy Teng Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 21 +++++++++++++++++++++ drivers/scsi/ufs/ufs-mediatek.h | 1 + 2 files changed, 22 insertions(+) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 3522458db3bbd..80618af7c8720 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -70,6 +70,13 @@ static bool ufs_mtk_is_va09_supported(struct ufs_hba *hba) return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL); } +static bool ufs_mtk_is_broken_vcc(struct ufs_hba *hba) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + + return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC); +} + static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) { u32 tmp; @@ -514,6 +521,9 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba) if (of_property_read_bool(np, "mediatek,ufs-disable-ah8")) host->caps |= UFS_MTK_CAP_DISABLE_AH8; + if (of_property_read_bool(np, "mediatek,ufs-broken-vcc")) + host->caps |= UFS_MTK_CAP_BROKEN_VCC; + dev_info(hba->dev, "caps: 0x%x", host->caps); } @@ -1003,6 +1013,17 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) static void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba) { ufshcd_fixup_dev_quirks(hba, ufs_mtk_dev_fixups); + + if (ufs_mtk_is_broken_vcc(hba) && hba->vreg_info.vcc && + (hba->dev_quirks & UFS_DEVICE_QUIRK_DELAY_AFTER_LPM)) { + hba->vreg_info.vcc->always_on = true; + /* + * VCC will be kept always-on thus we don't + * need any delay during regulator operations + */ + hba->dev_quirks &= ~(UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | + UFS_DEVICE_QUIRK_DELAY_AFTER_LPM); + } } static void ufs_mtk_event_notify(struct ufs_hba *hba, diff --git a/drivers/scsi/ufs/ufs-mediatek.h b/drivers/scsi/ufs/ufs-mediatek.h index 93d35097dfb0a..3f0d3bb769e89 100644 --- a/drivers/scsi/ufs/ufs-mediatek.h +++ b/drivers/scsi/ufs/ufs-mediatek.h @@ -81,6 +81,7 @@ enum ufs_mtk_host_caps { UFS_MTK_CAP_BOOST_CRYPT_ENGINE = 1 << 0, UFS_MTK_CAP_VA09_PWR_CTRL = 1 << 1, UFS_MTK_CAP_DISABLE_AH8 = 1 << 2, + UFS_MTK_CAP_BROKEN_VCC = 1 << 3, }; struct ufs_mtk_crypt_cfg { -- GitLab From c763729a10e538d997744317cf4a1c4f25266066 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:17 +0200 Subject: [PATCH 0460/1894] scsi: ufs-pci: Fix restore from S4 for Intel controllers Currently, ufshcd-pci is the only UFS driver with support for suspend-to-disk PM callbacks (i.e. freeze/thaw/restore/poweroff). These callbacks are set by the macro SET_SYSTEM_SLEEP_PM_OPS to the same functions as system suspend/resume. That will work with spm_lvl 5 because spm_lvl 5 will result in a full restore for the ->restore() callback. In the absence of a full restore, the host controller registers will have values set up by the restore kernel (the kernel that boots and loads the restore image) which are not necessarily the same. However it turns out, the only registers that sometimes need restore are the base address registers. This has gone un-noticed because, depending on IOMMU settings, the kernel can end up allocating the same addresses every time. For Intel controllers, an spm_lvl other than 5 can be used, so to support S4 (suspend-to-disk) with spm_lvl other than 5, restore the base address registers. Link: https://lore.kernel.org/r/20201207083120.26732-2-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index df3a564c3e334..360c25f1f061a 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -163,6 +163,24 @@ static void ufs_intel_common_exit(struct ufs_hba *hba) intel_ltr_hide(hba->dev); } +static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) +{ + /* + * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base + * address registers must be restored because the restore kernel can + * have used different addresses. + */ + ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr), + REG_UTP_TRANSFER_REQ_LIST_BASE_L); + ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr), + REG_UTP_TRANSFER_REQ_LIST_BASE_H); + ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr), + REG_UTP_TASK_REQ_LIST_BASE_L); + ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), + REG_UTP_TASK_REQ_LIST_BASE_H); + return 0; +} + static int ufs_intel_ehl_init(struct ufs_hba *hba) { hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; @@ -174,6 +192,7 @@ static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .init = ufs_intel_common_init, .exit = ufs_intel_common_exit, .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, }; static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { @@ -181,6 +200,7 @@ static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { .init = ufs_intel_ehl_init, .exit = ufs_intel_common_exit, .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, }; #ifdef CONFIG_PM_SLEEP -- GitLab From af423534d2de86cd0db729a5ac41f056ca8717de Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:18 +0200 Subject: [PATCH 0461/1894] scsi: ufs-pci: Ensure UFS device is in PowerDown mode for suspend-to-disk ->poweroff() The expectation for suspend-to-disk is that devices will be powered-off, so the UFS device should be put in PowerDown mode. If spm_lvl is not 5, then that will not happen. Change the pm callbacks to force spm_lvl 5 for suspend-to-disk poweroff. Link: https://lore.kernel.org/r/20201207083120.26732-3-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 360c25f1f061a..5d33c39fa82f0 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -227,6 +227,30 @@ static int ufshcd_pci_resume(struct device *dev) { return ufshcd_system_resume(dev_get_drvdata(dev)); } + +/** + * ufshcd_pci_poweroff - suspend-to-disk poweroff function + * @dev: pointer to PCI device handle + * + * Returns 0 if successful + * Returns non-zero otherwise + */ +static int ufshcd_pci_poweroff(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + int spm_lvl = hba->spm_lvl; + int ret; + + /* + * For poweroff we need to set the UFS device to PowerDown mode. + * Force spm_lvl to ensure that. + */ + hba->spm_lvl = 5; + ret = ufshcd_system_suspend(hba); + hba->spm_lvl = spm_lvl; + return ret; +} + #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM @@ -322,8 +346,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } static const struct dev_pm_ops ufshcd_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ufshcd_pci_suspend, - ufshcd_pci_resume) +#ifdef CONFIG_PM_SLEEP + .suspend = ufshcd_pci_suspend, + .resume = ufshcd_pci_resume, + .freeze = ufshcd_pci_suspend, + .thaw = ufshcd_pci_resume, + .poweroff = ufshcd_pci_poweroff, + .restore = ufshcd_pci_resume, +#endif SET_RUNTIME_PM_OPS(ufshcd_pci_runtime_suspend, ufshcd_pci_runtime_resume, ufshcd_pci_runtime_idle) -- GitLab From 044d5bda7117891d6d0d56f2f807b7b11e120abd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:19 +0200 Subject: [PATCH 0462/1894] scsi: ufs-pci: Fix recovery from hibernate exit errors for Intel controllers Intel controllers can end up in an unrecoverable state after a hibernate exit error unless a full reset and restore is done before anything else. Force that to happen. Link: https://lore.kernel.org/r/20201207083120.26732-4-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 5d33c39fa82f0..888d8c9ca3a55 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -178,6 +178,23 @@ static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) REG_UTP_TASK_REQ_LIST_BASE_L); ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), REG_UTP_TASK_REQ_LIST_BASE_H); + + if (ufshcd_is_link_hibern8(hba)) { + int ret = ufshcd_uic_hibern8_exit(hba); + + if (!ret) { + ufshcd_set_link_active(hba); + } else { + dev_err(hba->dev, "%s: hibern8 exit failed %d\n", + __func__, ret); + /* + * Force reset and restore. Any other actions can lead + * to an unrecoverable state. + */ + ufshcd_set_link_off(hba); + } + } + return 0; } -- GitLab From dd78bdb6f810bdcb173b42379af558c676c8e0aa Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:20 +0200 Subject: [PATCH 0463/1894] scsi: ufs-pci: Enable UFSHCD_CAP_RPM_AUTOSUSPEND for Intel controllers Enable runtime PM auto-suspend by default for Intel host controllers. Link: https://lore.kernel.org/r/20201207083120.26732-5-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 888d8c9ca3a55..fadd566025b86 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -148,6 +148,8 @@ static int ufs_intel_common_init(struct ufs_hba *hba) { struct intel_host *host; + hba->caps |= UFSHCD_CAP_RPM_AUTOSUSPEND; + host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL); if (!host) return -ENOMEM; -- GitLab From fa4d0f1992a96f6d7c988ef423e3127e613f6ac9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:44 -0800 Subject: [PATCH 0464/1894] scsi: block: Fix a race in the runtime power management code With the current implementation the following race can happen: * blk_pre_runtime_suspend() calls blk_freeze_queue_start() and blk_mq_unfreeze_queue(). * blk_queue_enter() calls blk_queue_pm_only() and that function returns true. * blk_queue_enter() calls blk_pm_request_resume() and that function does not call pm_request_resume() because the queue runtime status is RPM_ACTIVE. * blk_pre_runtime_suspend() changes the queue status into RPM_SUSPENDING. Fix this race by changing the queue runtime status into RPM_SUSPENDING before switching q_usage_counter to atomic mode. Link: https://lore.kernel.org/r/20201209052951.16136-2-bvanassche@acm.org Fixes: 986d413b7c15 ("blk-mq: Enable support for runtime power management") Cc: Ming Lei Cc: Rafael J. Wysocki Cc: stable Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Acked-by: Alan Stern Acked-by: Stanley Chu Co-developed-by: Can Guo Signed-off-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-pm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/blk-pm.c b/block/blk-pm.c index b85234d758f7b..17bd020268d42 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -67,6 +67,10 @@ int blk_pre_runtime_suspend(struct request_queue *q) WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE); + spin_lock_irq(&q->queue_lock); + q->rpm_status = RPM_SUSPENDING; + spin_unlock_irq(&q->queue_lock); + /* * Increase the pm_only counter before checking whether any * non-PM blk_queue_enter() calls are in progress to avoid that any @@ -89,15 +93,14 @@ int blk_pre_runtime_suspend(struct request_queue *q) /* Switch q_usage_counter back to per-cpu mode. */ blk_mq_unfreeze_queue(q); - spin_lock_irq(&q->queue_lock); - if (ret < 0) + if (ret < 0) { + spin_lock_irq(&q->queue_lock); + q->rpm_status = RPM_ACTIVE; pm_runtime_mark_last_busy(q->dev); - else - q->rpm_status = RPM_SUSPENDING; - spin_unlock_irq(&q->queue_lock); + spin_unlock_irq(&q->queue_lock); - if (ret) blk_clear_pm_only(q); + } return ret; } -- GitLab From 0854bcdcdec26aecdc92c303816f349ee1fba2bc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:45 -0800 Subject: [PATCH 0465/1894] scsi: block: Introduce BLK_MQ_REQ_PM Introduce the BLK_MQ_REQ_PM flag. This flag makes the request allocation functions set RQF_PM. This is the first step towards removing BLK_MQ_REQ_PREEMPT. Link: https://lore.kernel.org/r/20201209052951.16136-3-bvanassche@acm.org Cc: Alan Stern Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Can Guo Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-core.c | 7 ++++--- block/blk-mq.c | 2 ++ include/linux/blk-mq.h | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 2db8bda43b6e6..10696f9fb6ac6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -424,11 +424,11 @@ EXPORT_SYMBOL(blk_cleanup_queue); /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer - * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT + * @flags: BLK_MQ_REQ_NOWAIT, BLK_MQ_REQ_PM and/or BLK_MQ_REQ_PREEMPT */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool pm = flags & BLK_MQ_REQ_PREEMPT; + const bool pm = flags & (BLK_MQ_REQ_PM | BLK_MQ_REQ_PREEMPT); while (true) { bool success = false; @@ -630,7 +630,8 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op, struct request *req; WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT)); + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM | + BLK_MQ_REQ_PREEMPT)); req = blk_mq_alloc_request(q, op, flags); if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) diff --git a/block/blk-mq.c b/block/blk-mq.c index 1b25ec2fe9bef..b5880a1fb38d9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -292,6 +292,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->mq_hctx = data->hctx; rq->rq_flags = 0; rq->cmd_flags = data->cmd_flags; + if (data->flags & BLK_MQ_REQ_PM) + rq->rq_flags |= RQF_PM; if (data->flags & BLK_MQ_REQ_PREEMPT) rq->rq_flags |= RQF_PREEMPT; if (blk_queue_io_stat(data->q)) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b23eeca4d6772..c00e856c6fb1e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -444,6 +444,8 @@ enum { BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), + /* set RQF_PM */ + BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), /* set RQF_PREEMPT */ BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; -- GitLab From 96d86e6a80a3ab9aff81d12f9f1f2a0da2917d38 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:46 -0800 Subject: [PATCH 0466/1894] scsi: ide: Do not set the RQF_PREEMPT flag for sense requests RQF_PREEMPT is used for two different purposes in the legacy IDE code: 1. To mark power management requests. 2. To mark requests that should preempt another request. An (old) explanation of that feature is as follows: "The IDE driver in the Linux kernel normally uses a series of busywait delays during its initialization. When the driver executes these busywaits, the kernel does nothing for the duration of the wait. The time spent in these waits could be used for other initialization activities, if they could be run concurrently with these waits. More specifically, busywait-style delays such as udelay() in module init functions inhibit kernel preemption because the Big Kernel Lock is held, while yielding APIs such as schedule_timeout() allow preemption. This is true because the kernel handles the BKL specially and releases and reacquires it across reschedules allowed by the current thread. This IDE-preempt specification requires that the driver eliminate these busywaits and replace them with a mechanism that allows other work to proceed while the IDE driver is initializing." Since I haven't found an implementation of (2), do not set the PREEMPT flag for sense requests. This patch causes sense requests to be postponed while a drive is suspended instead of being submitted to ide_queue_rq(). If it would ever be necessary to restore the IDE PREEMPT functionality, that can be done by introducing a new flag in struct ide_request. Link: https://lore.kernel.org/r/20201209052951.16136-4-bvanassche@acm.org Cc: David S. Miller Cc: Alan Stern Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ide/ide-atapi.c | 1 - drivers/ide/ide-io.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2162bc80f09e0..013ad33fbbc81 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -223,7 +223,6 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd_flags = REQ_OP_DRV_IN; ide_req(sense_rq)->type = ATA_PRIV_SENSE; - sense_rq->rq_flags |= RQF_PREEMPT; req->cmd[0] = GPCMD_REQUEST_SENSE; req->cmd[4] = cmd_len; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 1a53c7a752244..c210ea3bd02fa 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -515,11 +515,6 @@ repeat: * above to return us whatever is in the queue. Since we call * ide_do_request() ourselves, we end up taking requests while * the queue is blocked... - * - * We let requests forced at head of queue with ide-preempt - * though. I hope that doesn't happen too much, hopefully not - * unless the subdriver triggers such a thing in its own PM - * state machine. */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && ata_pm_request(rq) == 0 && -- GitLab From 5ae65383fc7633e0247c31b0c8bf0e6ea63b95a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:47 -0800 Subject: [PATCH 0467/1894] scsi: ide: Mark power management requests with RQF_PM instead of RQF_PREEMPT This is another step that prepares for the removal of RQF_PREEMPT. Link: https://lore.kernel.org/r/20201209052951.16136-5-bvanassche@acm.org Cc: David S. Miller Cc: Alan Stern Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ide/ide-io.c | 2 +- drivers/ide/ide-pm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index c210ea3bd02fa..4867b67b60d69 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -518,7 +518,7 @@ repeat: */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && ata_pm_request(rq) == 0 && - (rq->rq_flags & RQF_PREEMPT) == 0) { + (rq->rq_flags & RQF_PM) == 0) { /* there should be no pending command at this point */ ide_unlock_port(hwif); goto plug_device; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 192e6c65d34e7..82ab308f1aafe 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -77,7 +77,7 @@ int generic_ide_resume(struct device *dev) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PM); ide_req(rq)->type = ATA_PRIV_PM_RESUME; ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; -- GitLab From cfefd9f8240a7b9fdd96fcd54cb029870b6d8d88 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:48 -0800 Subject: [PATCH 0468/1894] scsi: scsi_transport_spi: Set RQF_PM for domain validation commands Disable runtime power management during domain validation. Since a later patch removes RQF_PREEMPT, set RQF_PM for domain validation commands such that these are executed in the quiesced SCSI device state. Link: https://lore.kernel.org/r/20201209052951.16136-6-bvanassche@acm.org Cc: Alan Stern Cc: James Bottomley Cc: Woody Suwalski Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Stan Johnson Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_spi.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index f3d5b1bbd5aa7..c37dd15d16d24 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -117,12 +117,16 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd, sshdr = &sshdr_tmp; for(i = 0; i < DV_RETRIES; i++) { + /* + * The purpose of the RQF_PM flag below is to bypass the + * SDEV_QUIESCE state. + */ result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense, sshdr, DV_TIMEOUT, /* retries */ 1, REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER, - 0, NULL); + RQF_PM, NULL); if (driver_byte(result) != DRIVER_SENSE || sshdr->sense_key != UNIT_ATTENTION) break; @@ -1005,23 +1009,26 @@ spi_dv_device(struct scsi_device *sdev) */ lock_system_sleep(); + if (scsi_autopm_get_device(sdev)) + goto unlock_system_sleep; + if (unlikely(spi_dv_in_progress(starget))) - goto unlock; + goto put_autopm; if (unlikely(scsi_device_get(sdev))) - goto unlock; + goto put_autopm; spi_dv_in_progress(starget) = 1; buffer = kzalloc(len, GFP_KERNEL); if (unlikely(!buffer)) - goto out_put; + goto put_sdev; /* We need to verify that the actual device will quiesce; the * later target quiesce is just a nice to have */ if (unlikely(scsi_device_quiesce(sdev))) - goto out_free; + goto free_buffer; scsi_target_quiesce(starget); @@ -1041,12 +1048,16 @@ spi_dv_device(struct scsi_device *sdev) spi_initial_dv(starget) = 1; - out_free: +free_buffer: kfree(buffer); - out_put: + +put_sdev: spi_dv_in_progress(starget) = 0; scsi_device_put(sdev); -unlock: +put_autopm: + scsi_autopm_put_device(sdev); + +unlock_system_sleep: unlock_system_sleep(); } EXPORT_SYMBOL(spi_dv_device); -- GitLab From e6044f714b256259df9611ff49af433e5411c5c8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:49 -0800 Subject: [PATCH 0469/1894] scsi: core: Only process PM requests if rpm_status != RPM_ACTIVE Instead of submitting all SCSI commands submitted with scsi_execute() to a SCSI device if rpm_status != RPM_ACTIVE, only submit RQF_PM (power management requests) if rpm_status != RPM_ACTIVE. This patch makes the SCSI core handle the runtime power management status (rpm_status) as it should be handled. Link: https://lore.kernel.org/r/20201209052951.16136-7-bvanassche@acm.org Cc: Can Guo Cc: Stanley Chu Cc: Alan Stern Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Martin Kepplinger Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b7ac145714157..91bc39a4c3c3e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -249,7 +249,8 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req = blk_get_request(sdev->request_queue, data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); if (IS_ERR(req)) return ret; rq = scsi_req(req); @@ -1206,6 +1207,8 @@ static blk_status_t scsi_device_state_check(struct scsi_device *sdev, struct request *req) { switch (sdev->sdev_state) { + case SDEV_CREATED: + return BLK_STS_OK; case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: /* @@ -1232,18 +1235,18 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req) return BLK_STS_RESOURCE; case SDEV_QUIESCE: /* - * If the devices is blocked we defer normal commands. + * If the device is blocked we only accept power management + * commands. */ - if (req && !(req->rq_flags & RQF_PREEMPT)) + if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM))) return BLK_STS_RESOURCE; return BLK_STS_OK; default: /* * For any other not fully online state we only allow - * special commands. In particular any user initiated - * command is not allowed. + * power management commands. */ - if (req && !(req->rq_flags & RQF_PREEMPT)) + if (req && !(req->rq_flags & RQF_PM)) return BLK_STS_IOERR; return BLK_STS_OK; } @@ -2517,15 +2520,13 @@ void sdev_evt_send_simple(struct scsi_device *sdev, EXPORT_SYMBOL_GPL(sdev_evt_send_simple); /** - * scsi_device_quiesce - Block user issued commands. + * scsi_device_quiesce - Block all commands except power management. * @sdev: scsi device to quiesce. * * This works by trying to transition to the SDEV_QUIESCE state * (which must be a legal transition). When the device is in this - * state, only special requests will be accepted, all others will - * be deferred. Since special requests may also be requeued requests, - * a successful return doesn't guarantee the device will be - * totally quiescent. + * state, only power management requests will be accepted, all others will + * be deferred. * * Must be called with user context, may sleep. * @@ -2587,12 +2588,12 @@ void scsi_device_resume(struct scsi_device *sdev) * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); + if (sdev->sdev_state == SDEV_QUIESCE) + scsi_device_set_state(sdev, SDEV_RUNNING); if (sdev->quiesced_by) { sdev->quiesced_by = NULL; blk_clear_pm_only(sdev->request_queue); } - if (sdev->sdev_state == SDEV_QUIESCE) - scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); } EXPORT_SYMBOL(scsi_device_resume); -- GitLab From a4d34da715e3cb7e0741fe603dcd511bed067e00 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:50 -0800 Subject: [PATCH 0470/1894] scsi: block: Remove RQF_PREEMPT and BLK_MQ_REQ_PREEMPT Remove flag RQF_PREEMPT and BLK_MQ_REQ_PREEMPT since these are no longer used by any kernel code. Link: https://lore.kernel.org/r/20201209052951.16136-8-bvanassche@acm.org Cc: Can Guo Cc: Stanley Chu Cc: Alan Stern Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Martin Kepplinger Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-core.c | 7 +++---- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 2 -- include/linux/blk-mq.h | 2 -- include/linux/blkdev.h | 6 +----- 5 files changed, 4 insertions(+), 14 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 10696f9fb6ac6..a00bce9f46d88 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -424,11 +424,11 @@ EXPORT_SYMBOL(blk_cleanup_queue); /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer - * @flags: BLK_MQ_REQ_NOWAIT, BLK_MQ_REQ_PM and/or BLK_MQ_REQ_PREEMPT + * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool pm = flags & (BLK_MQ_REQ_PM | BLK_MQ_REQ_PREEMPT); + const bool pm = flags & BLK_MQ_REQ_PM; while (true) { bool success = false; @@ -630,8 +630,7 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op, struct request *req; WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM | - BLK_MQ_REQ_PREEMPT)); + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); req = blk_mq_alloc_request(q, op, flags); if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3094542e12ae0..9336a6f8d6efd 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -297,7 +297,6 @@ static const char *const rqf_name[] = { RQF_NAME(MIXED_MERGE), RQF_NAME(MQ_INFLIGHT), RQF_NAME(DONTPREP), - RQF_NAME(PREEMPT), RQF_NAME(FAILED), RQF_NAME(QUIET), RQF_NAME(ELVPRIV), diff --git a/block/blk-mq.c b/block/blk-mq.c index b5880a1fb38d9..d50504888b682 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -294,8 +294,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->cmd_flags = data->cmd_flags; if (data->flags & BLK_MQ_REQ_PM) rq->rq_flags |= RQF_PM; - if (data->flags & BLK_MQ_REQ_PREEMPT) - rq->rq_flags |= RQF_PREEMPT; if (blk_queue_io_stat(data->q)) rq->rq_flags |= RQF_IO_STAT; INIT_LIST_HEAD(&rq->queuelist); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c00e856c6fb1e..88af1df943081 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -446,8 +446,6 @@ enum { BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), /* set RQF_PM */ BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), - /* set RQF_PREEMPT */ - BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b5..7d4b746f7e6af 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,9 +79,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) -/* set for "ide_preempt" requests and also for requests for which the SCSI - "quiesce" state must be ignored. */ -#define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ @@ -430,8 +427,7 @@ struct request_queue { unsigned long queue_flags; /* * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM and RQF_PREEMPT requests are - * processed. + * counter is above zero then only RQF_PM requests are processed. */ atomic_t pm_only; -- GitLab From 52abca64fd9410ea6c9a3a74eab25663b403d7da Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 8 Dec 2020 21:29:51 -0800 Subject: [PATCH 0471/1894] scsi: block: Do not accept any requests while suspended blk_queue_enter() accepts BLK_MQ_REQ_PM requests independent of the runtime power management state. Now that SCSI domain validation no longer depends on this behavior, modify the behavior of blk_queue_enter() as follows: - Do not accept any requests while suspended. - Only process power management requests while suspending or resuming. Submitting BLK_MQ_REQ_PM requests to a device that is runtime suspended causes runtime-suspended devices not to resume as they should. The request which should cause a runtime resume instead gets issued directly, without resuming the device first. Of course the device can't handle it properly, the I/O fails, and the device remains suspended. The problem is fixed by checking that the queue's runtime-PM status isn't RPM_SUSPENDED before allowing a request to be issued, and queuing a runtime-resume request if it is. In particular, the inline blk_pm_request_resume() routine is renamed blk_pm_resume_queue() and the code is unified by merging the surrounding checks into the routine. If the queue isn't set up for runtime PM, or there currently is no restriction on allowed requests, the request is allowed. Likewise if the BLK_MQ_REQ_PM flag is set and the status isn't RPM_SUSPENDED. Otherwise a runtime resume is queued and the request is blocked until conditions are more suitable. [ bvanassche: modified commit message and removed Cc: stable because without the previous patches from this series this patch would break parallel SCSI domain validation + introduced queue_rpm_status() ] Link: https://lore.kernel.org/r/20201209052951.16136-9-bvanassche@acm.org Cc: Jens Axboe Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Reported-and-tested-by: Martin Kepplinger Reviewed-by: Hannes Reinecke Reviewed-by: Can Guo Signed-off-by: Alan Stern Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-core.c | 7 ++++--- block/blk-pm.h | 14 +++++++++----- include/linux/blkdev.h | 12 ++++++++++++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index a00bce9f46d88..2d53e2ff48ff8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -440,7 +441,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) * responsible for ensuring that that counter is * globally visible before the queue is unfrozen. */ - if (pm || !blk_queue_pm_only(q)) { + if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) || + !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -465,8 +467,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && - (pm || (blk_pm_request_resume(q), - !blk_queue_pm_only(q)))) || + blk_pm_resume_queue(pm, q)) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; diff --git a/block/blk-pm.h b/block/blk-pm.h index ea5507d23e759..a2283cc9f716d 100644 --- a/block/blk-pm.h +++ b/block/blk-pm.h @@ -6,11 +6,14 @@ #include #ifdef CONFIG_PM -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { - if (q->dev && (q->rpm_status == RPM_SUSPENDED || - q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); + if (!q->dev || !blk_queue_pm_only(q)) + return 1; /* Nothing to do */ + if (pm && q->rpm_status != RPM_SUSPENDED) + return 1; /* Request allowed */ + pm_request_resume(q->dev); + return 0; } static inline void blk_pm_mark_last_busy(struct request *rq) @@ -44,8 +47,9 @@ static inline void blk_pm_put_request(struct request *rq) --rq->q->nr_pending; } #else -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { + return 1; } static inline void blk_pm_mark_last_busy(struct request *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d4b746f7e6af..2b6fc3fb3a99d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -692,6 +692,18 @@ static inline bool queue_is_mq(struct request_queue *q) return q->mq_ops; } +#ifdef CONFIG_PM +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return q->rpm_status; +} +#else +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return RPM_ACTIVE; +} +#endif + static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { -- GitLab From 8b3c8035297e71abb9e6d0f50ceab50d33c0d64b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 9 Dec 2020 17:03:11 +0300 Subject: [PATCH 0472/1894] scsi: mpt3sas: Signedness bug in _base_get_diag_triggers() The "trigger_flags" variable needs to be signed for the error checking to work. Link: https://lore.kernel.org/r/X9DZH37bYPHwSQRP@mwanda Fixes: aec93e8e2385 ("scsi: mpt3sas: Add persistent trigger pages support") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b129f3734ed0a..26537d503a8b6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -5027,7 +5027,7 @@ _base_check_for_trigger_pages_support(struct MPT3SAS_ADAPTER *ioc) static void _base_get_diag_triggers(struct MPT3SAS_ADAPTER *ioc) { - u16 trigger_flags; + int trigger_flags; /* * Default setting of master trigger. -- GitLab From 5213dc7940e0aa4c094413e015790c4a310ef36c Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 9 Dec 2020 14:31:44 +0800 Subject: [PATCH 0473/1894] scsi: ufs-mediatek: Use correct path to fix compile error When the kernel is compiled with allmodconfig, the following error is reported: In file included from drivers/scsi/ufs/ufs-mediatek-trace.h:36:0, from drivers/scsi/ufs/ufs-mediatek.c:28: ./include/trace/define_trace.h:95:42: fatal error: ./ufs-mediatek-trace.h: No such file or directory #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) The comment in include/trace/define_trace.h specifies that: TRACE_INCLUDE_PATH: Note, the path is relative to define_trace.h, not the file including it. Full path names for out of tree modules must be used. So without "CFLAGS_ufs-mediatek.o := -I$(src)", the current directory "." is "include/trace/", the relative path of ufs-mediatek-trace.h is "../../drivers/scsi/ufs/". Link: https://lore.kernel.org/r/20201209063144.1840-2-thunder.leizhen@huawei.com Fixes: ca1bb061d644 ("scsi: ufs-mediatek: Introduce event_notify implementation") Reviewed-by: Stanley Chu Signed-off-by: Zhen Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-mediatek-trace.h b/drivers/scsi/ufs/ufs-mediatek-trace.h index fd6f84c1b4e22..895e82ea6ece5 100644 --- a/drivers/scsi/ufs/ufs-mediatek-trace.h +++ b/drivers/scsi/ufs/ufs-mediatek-trace.h @@ -31,6 +31,6 @@ TRACE_EVENT(ufs_mtk_event, #undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_PATH ../../drivers/scsi/ufs/ #define TRACE_INCLUDE_FILE ufs-mediatek-trace #include -- GitLab From bd14bf0e4a084514aa62d24d2109e0f09a93822f Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 8 Dec 2020 21:56:34 +0800 Subject: [PATCH 0474/1894] scsi: ufs: Re-enable WriteBooster after device reset UFS 3.1 specification mentions that the WriteBooster flags listed below will be set to their default values, i.e. disabled, after power cycle or any type of reset event. Thus we need to reset the flag variables kept in struct hba to align with the device status and ensure that WriteBooster-related functions are configured properly after device reset. Without this fix, WriteBooster will not be enabled successfully after by ufshcd_wb_ctrl() after device reset because hba->wb_enabled remains true. Flags required to be reset to default values: - fWriteBoosterEn: hba->wb_enabled - fWriteBoosterBufferFlushEn: hba->wb_buf_flush_enabled - fWriteBoosterBufferFlushDuringHibernate: No variable mapped Link: https://lore.kernel.org/r/20201208135635.15326-2-stanley.chu@mediatek.com Fixes: 3d17b9b5ab11 ("scsi: ufs: Add write booster feature support") Reviewed-by: Bean Huo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 08c8a591e6b09..36d367eb81392 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1221,8 +1221,13 @@ static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) if (hba->vops && hba->vops->device_reset) { int err = hba->vops->device_reset(hba); - if (!err) + if (!err) { ufshcd_set_ufs_dev_active(hba); + if (ufshcd_is_wb_allowed(hba)) { + hba->wb_enabled = false; + hba->wb_buf_flush_enabled = false; + } + } if (err != -EOPNOTSUPP) ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); } -- GitLab From 31a5d9cafff163473abf9496318b6a53022d48f7 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 8 Dec 2020 21:56:35 +0800 Subject: [PATCH 0475/1894] scsi: ufs: Un-inline ufshcd_vops_device_reset function More and more statements are being added to ufshcd_vops_device_reset() and this function is being called from multiple locations in the driver. Un-inline the function to allow the compiler to make better decisions. Link: https://lore.kernel.org/r/20201208135635.15326-3-stanley.chu@mediatek.com Reviewed-by: Bean Huo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 27 ++++++++++++++++++++++----- drivers/scsi/ufs/ufshcd.h | 19 +++++-------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 4f3ac2566322e..e221add25a7e1 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -581,6 +581,23 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba) hba->pwr_info.hs_rate); } +static void ufshcd_device_reset(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_vops_device_reset(hba); + + if (!err) { + ufshcd_set_ufs_dev_active(hba); + if (ufshcd_is_wb_allowed(hba)) { + hba->wb_enabled = false; + hba->wb_buf_flush_enabled = false; + } + } + if (err != -EOPNOTSUPP) + ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); +} + void ufshcd_delay_us(unsigned long us, unsigned long tolerance) { if (!us) @@ -3933,7 +3950,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba) spin_unlock_irqrestore(hba->host->host_lock, flags); /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ret = ufshcd_host_reset_and_restore(hba); @@ -6935,7 +6952,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba) do { /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); err = ufshcd_host_reset_and_restore(hba); } while (err && --retries); @@ -8708,7 +8725,7 @@ set_link_active: * further below. */ if (ufshcd_is_ufs_dev_deepsleep(hba)) { - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); WARN_ON(!ufshcd_is_link_off(hba)); } if (ufshcd_is_link_hibern8(hba) && !ufshcd_uic_hibern8_exit(hba)) @@ -8718,7 +8735,7 @@ set_link_active: set_dev_active: /* Can also get here needing to exit DeepSleep */ if (ufshcd_is_ufs_dev_deepsleep(hba)) { - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ufshcd_host_reset_and_restore(hba); } if (!ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE)) @@ -9317,7 +9334,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) } /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ufshcd_init_crypto(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 36d367eb81392..9bb5f0ed4124c 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1216,21 +1216,12 @@ static inline void ufshcd_vops_dbg_register_dump(struct ufs_hba *hba) hba->vops->dbg_register_dump(hba); } -static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) +static inline int ufshcd_vops_device_reset(struct ufs_hba *hba) { - if (hba->vops && hba->vops->device_reset) { - int err = hba->vops->device_reset(hba); - - if (!err) { - ufshcd_set_ufs_dev_active(hba); - if (ufshcd_is_wb_allowed(hba)) { - hba->wb_enabled = false; - hba->wb_buf_flush_enabled = false; - } - } - if (err != -EOPNOTSUPP) - ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); - } + if (hba->vops && hba->vops->device_reset) + return hba->vops->device_reset(hba); + + return -EOPNOTSUPP; } static inline void ufshcd_vops_config_scaling_param(struct ufs_hba *hba, -- GitLab From cb5253198f10a4cd79b7523c581e6173c7d49ddb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 8 Dec 2020 14:05:05 -0800 Subject: [PATCH 0476/1894] scsi: cxgb4i: Fix TLS dependency SCSI_CXGB4_ISCSI selects CHELSIO_T4. The latter depends on TLS || TLS=n, so since 'select' does not check dependencies of the selected symbol, SCSI_CXGB4_ISCSI should also depend on TLS || TLS=n. This prevents the following kconfig warning and restricts SCSI_CXGB4_ISCSI to 'm' whenever TLS=m. WARNING: unmet direct dependencies detected for CHELSIO_T4 Depends on [m]: NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_CHELSIO [=y] && PCI [=y] && (IPV6 [=y] || IPV6 [=y]=n) && (TLS [=m] || TLS [=m]=n) Selected by [y]: - SCSI_CXGB4_ISCSI [=y] && SCSI_LOWLEVEL [=y] && SCSI [=y] && PCI [=y] && INET [=y] && (IPV6 [=y] || IPV6 [=y]=n) && ETHERNET [=y] Link: https://lore.kernel.org/r/20201208220505.24488-1-rdunlap@infradead.org Fixes: 7b36b6e03b0d ("[SCSI] cxgb4i v5: iscsi driver") Cc: Karen Xie Cc: linux-scsi@vger.kernel.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Randy Dunlap Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/cxgb4i/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig index b206e266b4e72..8b0deece9758b 100644 --- a/drivers/scsi/cxgbi/cxgb4i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig @@ -4,6 +4,7 @@ config SCSI_CXGB4_ISCSI depends on PCI && INET && (IPV6 || IPV6=n) depends on THERMAL || !THERMAL depends on ETHERNET + depends on TLS || TLS=n select NET_VENDOR_CHELSIO select CHELSIO_T4 select CHELSIO_LIB -- GitLab From 8041ac642a1b31a2479488bc93fb16045726de23 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 7 Nov 2020 23:58:15 +0800 Subject: [PATCH 0477/1894] ext4: update ext4_data_block_valid related comments Since ext4_data_block_valid() has been renamed to ext4_inode_block_valid(), the related comments need to be updated. Signed-off-by: Chunguang Xu Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1604764698-4269-5-git-send-email-brookxu@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 8e6ca23ed172c..c13422a770e4f 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -206,7 +206,7 @@ static void ext4_destroy_system_zone(struct rcu_head *rcu) * * The update of system_blks pointer in this function is protected by * sb->s_umount semaphore. However we have to be careful as we can be - * racing with ext4_data_block_valid() calls reading system_blks rbtree + * racing with ext4_inode_block_valid() calls reading system_blks rbtree * protected only by RCU. That's why we first build the rbtree and then * swap it in place. */ @@ -258,7 +258,7 @@ int ext4_setup_system_zone(struct super_block *sb) /* * System blks rbtree complete, announce it once to prevent racing - * with ext4_data_block_valid() accessing the rbtree at the same + * with ext4_inode_block_valid() accessing the rbtree at the same * time. */ rcu_assign_pointer(sbi->s_system_blks, system_blks); @@ -278,7 +278,7 @@ err: * * The update of system_blks pointer in this function is protected by * sb->s_umount semaphore. However we have to be careful as we can be - * racing with ext4_data_block_valid() calls reading system_blks rbtree + * racing with ext4_inode_block_valid() calls reading system_blks rbtree * protected only by RCU. So we first clear the system_blks pointer and * then free the rbtree only after RCU grace period expires. */ -- GitLab From 41fca96e635be523c28b8d57f2d1b1e51d1221d8 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 7 Nov 2020 23:58:17 +0800 Subject: [PATCH 0478/1894] ext4: delete nonsensical (commented-out) code inside ext4_xattr_block_set() Signed-off-by: Chunguang Xu Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1604764698-4269-7-git-send-email-brookxu@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 6127e94ea4f5d..4e3b1f8c2e81e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1927,7 +1927,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } else { /* Allocate a buffer where we construct the new block. */ s->base = kzalloc(sb->s_blocksize, GFP_NOFS); - /* assert(header == s->base) */ error = -ENOMEM; if (s->base == NULL) goto cleanup; -- GitLab From f57ad63a835c6f1fe646ea985e78a79eb206a5b3 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Dec 2020 16:33:19 +0200 Subject: [PATCH 0479/1894] KVM: x86: ignore SIPIs that are received while not in wait-for-sipi state In the commit 1c96dcceaeb3 ("KVM: x86: fix apic_accept_events vs check_nested_events"), we accidently started latching SIPIs that are received while the cpu is not waiting for them. This causes vCPUs to never enter a halted state. Fixes: 1c96dcceaeb3 ("KVM: x86: fix apic_accept_events vs check_nested_events") Signed-off-by: Maxim Levitsky Message-Id: <20201203143319.159394-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e3ee597ff5404..6a87623aa578e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2892,14 +2892,15 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) else vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; } - if (test_bit(KVM_APIC_SIPI, &pe) && - vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + if (test_bit(KVM_APIC_SIPI, &pe)) { clear_bit(KVM_APIC_SIPI, &apic->pending_events); - /* evaluate pending_events before reading the vector */ - smp_rmb(); - sipi_vector = apic->sipi_vector; - kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + /* evaluate pending_events before reading the vector */ + smp_rmb(); + sipi_vector = apic->sipi_vector; + kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + } } } -- GitLab From 0ca995f5c7110c13795cf82c18639175e2ee20f2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 24 Sep 2020 13:18:08 +0200 Subject: [PATCH 0480/1894] clk: renesas: r8a779a0: Make rcar_r8a779a0_cpg_clk_register() static When compiling with clang: drivers/clk/renesas/r8a779a0-cpg-mssr.c:156:21: warning: no previous prototype for function 'rcar_r8a779a0_cpg_clk_register' [-Wmissing-prototypes] struct clk * __init rcar_r8a779a0_cpg_clk_register(struct device *dev, ^ drivers/clk/renesas/r8a779a0-cpg-mssr.c:156:1: note: declare 'static' if the function is not intended to be used outside of this translation unit struct clk * __init rcar_r8a779a0_cpg_clk_register(struct device *dev, ^ static Similarly, with sparse: drivers/clk/renesas/r8a779a0-cpg-mssr.c:156:12: warning: symbol 'rcar_r8a779a0_cpg_clk_register' was not declared. Should it be static? There are no users of rcar_r8a779a0_cpg_clk_register() outside this file, so it should be static. Fixes: 17bcc8035d2d19fc ("clk: renesas: cpg-mssr: Add support for R-Car V3U") Reported-by: kernel test robot Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200924111808.15358-1-geert+renesas@glider.be --- drivers/clk/renesas/r8a779a0-cpg-mssr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/renesas/r8a779a0-cpg-mssr.c b/drivers/clk/renesas/r8a779a0-cpg-mssr.c index 17ebbac7ddfb4..7e25b3b8945bd 100644 --- a/drivers/clk/renesas/r8a779a0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779a0-cpg-mssr.c @@ -153,7 +153,7 @@ static const struct rcar_r8a779a0_cpg_pll_config *cpg_pll_config __initdata; static unsigned int cpg_clk_extalr __initdata; static u32 cpg_mode __initdata; -struct clk * __init rcar_r8a779a0_cpg_clk_register(struct device *dev, +static struct clk * __init rcar_r8a779a0_cpg_clk_register(struct device *dev, const struct cpg_core_clk *core, const struct cpg_mssr_info *info, struct clk **clks, void __iomem *base, struct raw_notifier_head *notifiers) -- GitLab From 043585760f5191926467862c01a0893c2fefb359 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Sep 2020 13:07:13 +0200 Subject: [PATCH 0481/1894] MAINTAINERS: Update git repo for Renesas clock drivers Align the clock branch name with other renesas-* branches pulled by subsystem maintainers. Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20200925110713.2652-1-geert+renesas@glider.be --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e73636b75f29d..595e1023b3987 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14888,7 +14888,7 @@ RENESAS CLOCK DRIVERS M: Geert Uytterhoeven L: linux-renesas-soc@vger.kernel.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git clk-renesas +T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git renesas-clk F: Documentation/devicetree/bindings/clock/renesas,* F: drivers/clk/renesas/ -- GitLab From 23378e70ca286de32cedcf6505af734b034148dc Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 16 Oct 2020 13:11:53 +0200 Subject: [PATCH 0482/1894] clk: renesas: r8a779a0: Add CSI4[0-3] clocks Add definitions of the CSI-2 receiver clocks for R-Car V3U. Signed-off-by: Jacopo Mondi Link: https://lore.kernel.org/r/20201016111158.17521-2-jacopo+renesas@jmondi.org Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r8a779a0-cpg-mssr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/renesas/r8a779a0-cpg-mssr.c b/drivers/clk/renesas/r8a779a0-cpg-mssr.c index 7e25b3b8945bd..bd54a28c50eeb 100644 --- a/drivers/clk/renesas/r8a779a0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779a0-cpg-mssr.c @@ -141,6 +141,10 @@ static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = { }; static const struct mssr_mod_clk r8a779a0_mod_clks[] __initconst = { + DEF_MOD("csi40", 331, R8A779A0_CLK_CSI0), + DEF_MOD("csi41", 400, R8A779A0_CLK_CSI0), + DEF_MOD("csi42", 401, R8A779A0_CLK_CSI0), + DEF_MOD("csi43", 402, R8A779A0_CLK_CSI0), DEF_MOD("scif0", 702, R8A779A0_CLK_S1D8), DEF_MOD("scif1", 703, R8A779A0_CLK_S1D8), DEF_MOD("scif3", 704, R8A779A0_CLK_S1D8), -- GitLab From 874d4eee5421d08fd220adffc32da307f8b7c964 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 16 Oct 2020 13:11:56 +0200 Subject: [PATCH 0483/1894] clk: renesas: r8a779a0: Add VIN clocks Add definitions of the VIN instance clocks for R-Car V3U. Signed-off-by: Jacopo Mondi Link: https://lore.kernel.org/r/20201016111158.17521-5-jacopo+renesas@jmondi.org Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r8a779a0-cpg-mssr.c | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/clk/renesas/r8a779a0-cpg-mssr.c b/drivers/clk/renesas/r8a779a0-cpg-mssr.c index bd54a28c50eeb..2a00eb82013f3 100644 --- a/drivers/clk/renesas/r8a779a0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779a0-cpg-mssr.c @@ -149,6 +149,38 @@ static const struct mssr_mod_clk r8a779a0_mod_clks[] __initconst = { DEF_MOD("scif1", 703, R8A779A0_CLK_S1D8), DEF_MOD("scif3", 704, R8A779A0_CLK_S1D8), DEF_MOD("scif4", 705, R8A779A0_CLK_S1D8), + DEF_MOD("vin00", 730, R8A779A0_CLK_S1D1), + DEF_MOD("vin01", 731, R8A779A0_CLK_S1D1), + DEF_MOD("vin02", 800, R8A779A0_CLK_S1D1), + DEF_MOD("vin03", 801, R8A779A0_CLK_S1D1), + DEF_MOD("vin04", 802, R8A779A0_CLK_S1D1), + DEF_MOD("vin05", 803, R8A779A0_CLK_S1D1), + DEF_MOD("vin06", 804, R8A779A0_CLK_S1D1), + DEF_MOD("vin07", 805, R8A779A0_CLK_S1D1), + DEF_MOD("vin10", 806, R8A779A0_CLK_S1D1), + DEF_MOD("vin11", 807, R8A779A0_CLK_S1D1), + DEF_MOD("vin12", 808, R8A779A0_CLK_S1D1), + DEF_MOD("vin13", 809, R8A779A0_CLK_S1D1), + DEF_MOD("vin14", 810, R8A779A0_CLK_S1D1), + DEF_MOD("vin15", 811, R8A779A0_CLK_S1D1), + DEF_MOD("vin16", 812, R8A779A0_CLK_S1D1), + DEF_MOD("vin17", 813, R8A779A0_CLK_S1D1), + DEF_MOD("vin20", 814, R8A779A0_CLK_S1D1), + DEF_MOD("vin21", 815, R8A779A0_CLK_S1D1), + DEF_MOD("vin22", 816, R8A779A0_CLK_S1D1), + DEF_MOD("vin23", 817, R8A779A0_CLK_S1D1), + DEF_MOD("vin24", 818, R8A779A0_CLK_S1D1), + DEF_MOD("vin25", 819, R8A779A0_CLK_S1D1), + DEF_MOD("vin26", 820, R8A779A0_CLK_S1D1), + DEF_MOD("vin27", 821, R8A779A0_CLK_S1D1), + DEF_MOD("vin30", 822, R8A779A0_CLK_S1D1), + DEF_MOD("vin31", 823, R8A779A0_CLK_S1D1), + DEF_MOD("vin32", 824, R8A779A0_CLK_S1D1), + DEF_MOD("vin33", 825, R8A779A0_CLK_S1D1), + DEF_MOD("vin34", 826, R8A779A0_CLK_S1D1), + DEF_MOD("vin35", 827, R8A779A0_CLK_S1D1), + DEF_MOD("vin36", 828, R8A779A0_CLK_S1D1), + DEF_MOD("vin37", 829, R8A779A0_CLK_S1D1), }; static spinlock_t cpg_lock; -- GitLab From 13d2617bf224351e78141183ca51971df83a9dd5 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Fri, 16 Oct 2020 13:17:07 +0100 Subject: [PATCH 0484/1894] clk: renesas: r8a774a1: Add RPC clocks Describe the RPCSRC internal clock and the RPC[D2] clocks derived from it, as well as the RPC-IF module clock, in the RZ/G2M (R8A774A1) CPG/MSSR driver. Inspired by commit 94e3935b5756 ("clk: renesas: r8a77980: Add RPC clocks"). Signed-off-by: Biju Das Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20201016121709.8447-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r8a774a1-cpg-mssr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clk/renesas/r8a774a1-cpg-mssr.c b/drivers/clk/renesas/r8a774a1-cpg-mssr.c index fd54b9f625dad..4a43ebec7d5e2 100644 --- a/drivers/clk/renesas/r8a774a1-cpg-mssr.c +++ b/drivers/clk/renesas/r8a774a1-cpg-mssr.c @@ -41,6 +41,7 @@ enum clk_ids { CLK_S2, CLK_S3, CLK_SDSRC, + CLK_RPCSRC, CLK_RINT, /* Module Clocks */ @@ -67,6 +68,12 @@ static const struct cpg_core_clk r8a774a1_core_clks[] __initconst = { DEF_FIXED(".s2", CLK_S2, CLK_PLL1_DIV2, 4, 1), DEF_FIXED(".s3", CLK_S3, CLK_PLL1_DIV2, 6, 1), DEF_FIXED(".sdsrc", CLK_SDSRC, CLK_PLL1_DIV2, 2, 1), + DEF_BASE(".rpcsrc", CLK_RPCSRC, CLK_TYPE_GEN3_RPCSRC, CLK_PLL1), + + DEF_BASE("rpc", R8A774A1_CLK_RPC, CLK_TYPE_GEN3_RPC, + CLK_RPCSRC), + DEF_BASE("rpcd2", R8A774A1_CLK_RPCD2, CLK_TYPE_GEN3_RPCD2, + R8A774A1_CLK_RPC), DEF_GEN3_OSC(".r", CLK_RINT, CLK_EXTAL, 32), @@ -200,6 +207,7 @@ static const struct mssr_mod_clk r8a774a1_mod_clks[] __initconst = { DEF_MOD("can-fd", 914, R8A774A1_CLK_S3D2), DEF_MOD("can-if1", 915, R8A774A1_CLK_S3D4), DEF_MOD("can-if0", 916, R8A774A1_CLK_S3D4), + DEF_MOD("rpc-if", 917, R8A774A1_CLK_RPCD2), DEF_MOD("i2c6", 918, R8A774A1_CLK_S0D6), DEF_MOD("i2c5", 919, R8A774A1_CLK_S0D6), DEF_MOD("i2c-dvfs", 926, R8A774A1_CLK_CP), -- GitLab From fb9805c51793339e0affbc8e3ce2b3210b41c9fa Mon Sep 17 00:00:00 2001 From: Biju Das Date: Fri, 16 Oct 2020 13:17:08 +0100 Subject: [PATCH 0485/1894] clk: renesas: r8a774b1: Add RPC clocks Describe the RPCSRC internal clock and the RPC[D2] clocks derived from it, as well as the RPC-IF module clock, in the RZ/G2N (R8A774B1) CPG/MSSR driver. Inspired by commit 94e3935b5756 ("clk: renesas: r8a77980: Add RPC clocks"). Signed-off-by: Biju Das Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20201016121709.8447-4-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r8a774b1-cpg-mssr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clk/renesas/r8a774b1-cpg-mssr.c b/drivers/clk/renesas/r8a774b1-cpg-mssr.c index f436691271ece..6f04c40fe237a 100644 --- a/drivers/clk/renesas/r8a774b1-cpg-mssr.c +++ b/drivers/clk/renesas/r8a774b1-cpg-mssr.c @@ -40,6 +40,7 @@ enum clk_ids { CLK_S2, CLK_S3, CLK_SDSRC, + CLK_RPCSRC, CLK_RINT, /* Module Clocks */ @@ -65,6 +66,12 @@ static const struct cpg_core_clk r8a774b1_core_clks[] __initconst = { DEF_FIXED(".s2", CLK_S2, CLK_PLL1_DIV2, 4, 1), DEF_FIXED(".s3", CLK_S3, CLK_PLL1_DIV2, 6, 1), DEF_FIXED(".sdsrc", CLK_SDSRC, CLK_PLL1_DIV2, 2, 1), + DEF_BASE(".rpcsrc", CLK_RPCSRC, CLK_TYPE_GEN3_RPCSRC, CLK_PLL1), + + DEF_BASE("rpc", R8A774B1_CLK_RPC, CLK_TYPE_GEN3_RPC, + CLK_RPCSRC), + DEF_BASE("rpcd2", R8A774B1_CLK_RPCD2, CLK_TYPE_GEN3_RPCD2, + R8A774B1_CLK_RPC), DEF_GEN3_OSC(".r", CLK_RINT, CLK_EXTAL, 32), @@ -196,6 +203,7 @@ static const struct mssr_mod_clk r8a774b1_mod_clks[] __initconst = { DEF_MOD("can-fd", 914, R8A774B1_CLK_S3D2), DEF_MOD("can-if1", 915, R8A774B1_CLK_S3D4), DEF_MOD("can-if0", 916, R8A774B1_CLK_S3D4), + DEF_MOD("rpc-if", 917, R8A774B1_CLK_RPCD2), DEF_MOD("i2c6", 918, R8A774B1_CLK_S0D6), DEF_MOD("i2c5", 919, R8A774B1_CLK_S0D6), DEF_MOD("i2c-dvfs", 926, R8A774B1_CLK_CP), -- GitLab From 4ef39a80da8b15ed933d387bc02ec14df4f4f1a7 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Tue, 3 Nov 2020 11:44:53 +0800 Subject: [PATCH 0486/1894] clk: renesas: rcar-usb2-clock-sel: Replace devm_reset_control_array_get() devm_reset_control_array_get_shared() looks more readable Signed-off-by: Yejune Deng Link: https://lore.kernel.org/r/1604375093-6451-1-git-send-email-yejune.deng@gmail.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/rcar-usb2-clock-sel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/renesas/rcar-usb2-clock-sel.c b/drivers/clk/renesas/rcar-usb2-clock-sel.c index d4c02986c34e9..3abafd78f7c8a 100644 --- a/drivers/clk/renesas/rcar-usb2-clock-sel.c +++ b/drivers/clk/renesas/rcar-usb2-clock-sel.c @@ -160,7 +160,7 @@ static int rcar_usb2_clock_sel_probe(struct platform_device *pdev) if (ret < 0) return ret; - priv->rsts = devm_reset_control_array_get(dev, true, false); + priv->rsts = devm_reset_control_array_get_shared(dev); if (IS_ERR(priv->rsts)) return PTR_ERR(priv->rsts); -- GitLab From b5fb3b8859a491ff31e933927809f17a4e39459f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Nov 2020 17:24:34 +0100 Subject: [PATCH 0487/1894] clk: renesas: cpg-mssr: fix kerneldoc of cpg_mssr_priv The struct cpg_mssr_priv missed proper formatting: drivers/clk/renesas/renesas-cpg-mssr.c:142: warning: cannot understand function prototype: 'struct cpg_mssr_priv ' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201103162435.13689-7-krzk@kernel.org Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/renesas-cpg-mssr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c index 94db883703377..1c3215dc4877c 100644 --- a/drivers/clk/renesas/renesas-cpg-mssr.c +++ b/drivers/clk/renesas/renesas-cpg-mssr.c @@ -119,7 +119,8 @@ static const u16 srstclr_for_v3u[] = { }; /** - * Clock Pulse Generator / Module Standby and Software Reset Private Data + * struct cpg_mssr_priv - Clock Pulse Generator / Module Standby + * and Software Reset Private Data * * @rcdev: Optional reset controller entity * @dev: CPG/MSSR device -- GitLab From 14653942de7f63e21ece32e3901f09a248598a43 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 9 Nov 2020 16:26:14 +0100 Subject: [PATCH 0488/1894] clk: renesas: r8a779a0: Fix R and OSC clocks The R-Car V3U clock driver defines the R and OSC clocks using R-Car Gen3 clock types. However, The R-Car V3U clock driver does not use the R-Car Gen3 clock driver core, hence registering the R and OSC clocks fails: renesas-cpg-mssr e6150000.clock-controller: Failed to register core clock osc: -22 renesas-cpg-mssr e6150000.clock-controller: Failed to register core clock r: -22 Fix this by introducing clock definition macros specific to R-Car V3U. Note that rcar_r8a779a0_cpg_clk_register() already handled the related clock types. Drop the now unneeded include of rcar-gen3-cpg.h. Fixes: 17bcc8035d2d19fc ("clk: renesas: cpg-mssr: Add support for R-Car V3U") Signed-off-by: Geert Uytterhoeven Tested-by: Yoshihiro Shimoda Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20201109152614.2465483-1-geert+renesas@glider.be --- drivers/clk/renesas/r8a779a0-cpg-mssr.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/clk/renesas/r8a779a0-cpg-mssr.c b/drivers/clk/renesas/r8a779a0-cpg-mssr.c index 2a00eb82013f3..aa5389b04d742 100644 --- a/drivers/clk/renesas/r8a779a0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779a0-cpg-mssr.c @@ -26,7 +26,6 @@ #include #include "renesas-cpg-mssr.h" -#include "rcar-gen3-cpg.h" enum rcar_r8a779a0_clk_types { CLK_TYPE_R8A779A0_MAIN = CLK_TYPE_CUSTOM, @@ -84,6 +83,14 @@ enum clk_ids { DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_PLL2X_3X, CLK_MAIN, \ .offset = _offset) +#define DEF_MDSEL(_name, _id, _md, _parent0, _div0, _parent1, _div1) \ + DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_MDSEL, \ + (_parent0) << 16 | (_parent1), \ + .div = (_div0) << 16 | (_div1), .offset = _md) + +#define DEF_OSC(_name, _id, _parent, _div) \ + DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_OSC, _parent, .div = _div) + static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = { /* External Clock Inputs */ DEF_INPUT("extal", CLK_EXTAL), @@ -136,8 +143,8 @@ static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = { DEF_DIV6P1("canfd", R8A779A0_CLK_CANFD, CLK_PLL5_DIV4, 0x878), DEF_DIV6P1("csi0", R8A779A0_CLK_CSI0, CLK_PLL5_DIV4, 0x880), - DEF_GEN3_OSC("osc", R8A779A0_CLK_OSC, CLK_EXTAL, 8), - DEF_GEN3_MDSEL("r", R8A779A0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1), + DEF_OSC("osc", R8A779A0_CLK_OSC, CLK_EXTAL, 8), + DEF_MDSEL("r", R8A779A0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1), }; static const struct mssr_mod_clk r8a779a0_mod_clks[] __initconst = { -- GitLab From 40745482eec81bea686cd1b38693191dc7e9ac66 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 16 Nov 2020 10:10:02 +0000 Subject: [PATCH 0489/1894] clk: renesas: r8a774c0: Add RPC clocks Describe the RPCSRC internal clock and the RPC[D2] clocks derived from it, as well as the RPC-IF module clock, in the RZ/G2E (R8A774C0) CPG/MSSR driver. Add new clk type CLK_TYPE_GEN3_E3_RPCSRC to register rpcsrc as a fixed clock on R-Car Gen3 E3 (and also RZ/G2E which is identical to E3 SoC), parent and the divider is set based on the register value CPG_RPCCKCR[4:3] which has been set prior to booting the kernel. Signed-off-by: Lad Prabhakar Reviewed-by: Biju Das Link: https://lore.kernel.org/r/20201116101002.5986-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r8a774c0-cpg-mssr.c | 9 ++++++++ drivers/clk/renesas/rcar-gen3-cpg.c | 28 +++++++++++++++++++++++++ drivers/clk/renesas/rcar-gen3-cpg.h | 5 +++++ 3 files changed, 42 insertions(+) diff --git a/drivers/clk/renesas/r8a774c0-cpg-mssr.c b/drivers/clk/renesas/r8a774c0-cpg-mssr.c index 9fc9fa9e531ab..ed3a2cf0e0bb2 100644 --- a/drivers/clk/renesas/r8a774c0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a774c0-cpg-mssr.c @@ -44,6 +44,7 @@ enum clk_ids { CLK_S2, CLK_S3, CLK_SDSRC, + CLK_RPCSRC, CLK_RINT, CLK_OCO, @@ -74,6 +75,13 @@ static const struct cpg_core_clk r8a774c0_core_clks[] __initconst = { DEF_FIXED(".s3", CLK_S3, CLK_PLL1, 6, 1), DEF_FIXED(".sdsrc", CLK_SDSRC, CLK_PLL1, 2, 1), + DEF_FIXED_RPCSRC_E3(".rpcsrc", CLK_RPCSRC, CLK_PLL0, CLK_PLL1), + + DEF_BASE("rpc", R8A774C0_CLK_RPC, CLK_TYPE_GEN3_RPC, + CLK_RPCSRC), + DEF_BASE("rpcd2", R8A774C0_CLK_RPCD2, CLK_TYPE_GEN3_RPCD2, + R8A774C0_CLK_RPC), + DEF_DIV6_RO(".r", CLK_RINT, CLK_EXTAL, CPG_RCKCR, 32), DEF_RATE(".oco", CLK_OCO, 8 * 1000 * 1000), @@ -199,6 +207,7 @@ static const struct mssr_mod_clk r8a774c0_mod_clks[] __initconst = { DEF_MOD("can-fd", 914, R8A774C0_CLK_S3D2), DEF_MOD("can-if1", 915, R8A774C0_CLK_S3D4), DEF_MOD("can-if0", 916, R8A774C0_CLK_S3D4), + DEF_MOD("rpc-if", 917, R8A774C0_CLK_RPCD2), DEF_MOD("i2c6", 918, R8A774C0_CLK_S3D2), DEF_MOD("i2c5", 919, R8A774C0_CLK_S3D2), DEF_MOD("i2c-dvfs", 926, R8A774C0_CLK_CP), diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c index a7debddf7d099..063b611514886 100644 --- a/drivers/clk/renesas/rcar-gen3-cpg.c +++ b/drivers/clk/renesas/rcar-gen3-cpg.c @@ -695,6 +695,34 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev, cpg_rpcsrc_div_table, &cpg_lock); + case CLK_TYPE_GEN3_E3_RPCSRC: + /* + * Register RPCSRC as fixed factor clock based on the + * MD[4:1] pins and CPG_RPCCKCR[4:3] register value for + * which has been set prior to booting the kernel. + */ + value = (readl(base + CPG_RPCCKCR) & GENMASK(4, 3)) >> 3; + + switch (value) { + case 0: + div = 5; + break; + case 1: + div = 3; + break; + case 2: + parent = clks[core->parent >> 16]; + if (IS_ERR(parent)) + return ERR_CAST(parent); + div = core->div; + break; + case 3: + default: + div = 2; + break; + } + break; + case CLK_TYPE_GEN3_RPC: return cpg_rpc_clk_register(core->name, base, __clk_get_name(parent), notifiers); diff --git a/drivers/clk/renesas/rcar-gen3-cpg.h b/drivers/clk/renesas/rcar-gen3-cpg.h index c4ac80cac6a0a..3d949c4a3244c 100644 --- a/drivers/clk/renesas/rcar-gen3-cpg.h +++ b/drivers/clk/renesas/rcar-gen3-cpg.h @@ -24,6 +24,7 @@ enum rcar_gen3_clk_types { CLK_TYPE_GEN3_OSC, /* OSC EXTAL predivider and fixed divider */ CLK_TYPE_GEN3_RCKSEL, /* Select parent/divider using RCKCR.CKSEL */ CLK_TYPE_GEN3_RPCSRC, + CLK_TYPE_GEN3_E3_RPCSRC, CLK_TYPE_GEN3_RPC, CLK_TYPE_GEN3_RPCD2, @@ -54,6 +55,10 @@ enum rcar_gen3_clk_types { #define DEF_GEN3_Z(_name, _id, _type, _parent, _div, _offset) \ DEF_BASE(_name, _id, _type, _parent, .div = _div, .offset = _offset) +#define DEF_FIXED_RPCSRC_E3(_name, _id, _parent0, _parent1) \ + DEF_BASE(_name, _id, CLK_TYPE_GEN3_E3_RPCSRC, \ + (_parent0) << 16 | (_parent1), .div = 8) + struct rcar_gen3_cpg_pll_config { u8 extal_div; u8 pll1_mult; -- GitLab From fd0d8ed7c1b430f7e4a0a823a924ae3c849d74a3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 19 Nov 2020 13:50:53 +0100 Subject: [PATCH 0490/1894] clk: renesas: sh73a0: Stop using __raw_*() I/O accessors There is no reason to keep on using the __raw_{read,write}l() I/O accessors in Renesas ARM driver code. Switch to using the plain {read,write}l() I/O accessors, to have a chance that this works on big-endian. Suggested-by: Arnd Bergmann Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201119125053.4065746-1-geert+renesas@glider.be --- drivers/clk/renesas/clk-sh73a0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/renesas/clk-sh73a0.c b/drivers/clk/renesas/clk-sh73a0.c index 5f25a70bc61c4..4146c1d717b96 100644 --- a/drivers/clk/renesas/clk-sh73a0.c +++ b/drivers/clk/renesas/clk-sh73a0.c @@ -121,7 +121,7 @@ sh73a0_cpg_register_clock(struct device_node *np, struct sh73a0_cpg *cpg, (phy_no ? CPG_DSI1PHYCR : CPG_DSI0PHYCR); parent_name = phy_no ? "dsi1pck" : "dsi0pck"; - mult = __raw_readl(dsi_reg); + mult = readl(dsi_reg); if (!(mult & 0x8000)) mult = 1; else -- GitLab From f5c50b1fed55332beb88e81e9e17c49673b77344 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 5 Nov 2020 11:32:04 +0900 Subject: [PATCH 0491/1894] dt-bindings: clock: renesas: rcar-usb2-clock-sel: Convert bindings to json-schema Convert Renesas R-Car USB 2.0 clock selector bindings documentation to json-schema. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1604543524-31482-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Geert Uytterhoeven --- .../clock/renesas,rcar-usb2-clock-sel.txt | 68 ------------ .../clock/renesas,rcar-usb2-clock-sel.yaml | 100 ++++++++++++++++++ 2 files changed, 100 insertions(+), 68 deletions(-) delete mode 100644 Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt create mode 100644 Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt deleted file mode 100644 index da92f5748deee..0000000000000 --- a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt +++ /dev/null @@ -1,68 +0,0 @@ -* Renesas R-Car USB 2.0 clock selector - -This file provides information on what the device node for the R-Car USB 2.0 -clock selector. - -If you connect an external clock to the USB_EXTAL pin only, you should set -the clock rate to "usb_extal" node only. -If you connect an oscillator to both the USB_XTAL and USB_EXTAL, this module -is not needed because this is default setting. (Of course, you can set the -clock rates to both "usb_extal" and "usb_xtal" nodes. - -Case 1: An external clock connects to R-Car SoC - +----------+ +--- R-Car ---------------------+ - |External |---|USB_EXTAL ---> all usb channels| - |clock | |USB_XTAL | - +----------+ +-------------------------------+ -In this case, we need this driver with "usb_extal" clock. - -Case 2: An oscillator connects to R-Car SoC - +----------+ +--- R-Car ---------------------+ - |Oscillator|---|USB_EXTAL -+-> all usb channels| - | |---|USB_XTAL --+ | - +----------+ +-------------------------------+ -In this case, we don't need this selector. - -Required properties: -- compatible: "renesas,r8a7795-rcar-usb2-clock-sel" if the device is a part of - an R8A7795 SoC. - "renesas,r8a7796-rcar-usb2-clock-sel" if the device if a part of - an R8A77960 SoC. - "renesas,r8a77961-rcar-usb2-clock-sel" if the device if a part of - an R8A77961 SoC. - "renesas,rcar-gen3-usb2-clock-sel" for a generic R-Car Gen3 - compatible device. - - When compatible with the generic version, nodes must list the - SoC-specific version corresponding to the platform first - followed by the generic version. - -- reg: offset and length of the USB 2.0 clock selector register block. -- clocks: A list of phandles and specifier pairs. -- clock-names: Name of the clocks. - - The functional clock of USB 2.0 host side must be "ehci_ohci" - - The functional clock of HS-USB side must be "hs-usb-if" - - The USB_EXTAL clock pin must be "usb_extal" - - The USB_XTAL clock pin must be "usb_xtal" -- #clock-cells: Must be 0 -- power-domains: A phandle and symbolic PM domain specifier. - See power/renesas,rcar-sysc.yaml. -- resets: A list of phandles and specifier pairs. -- reset-names: Name of the resets. - - The reset of USB 2.0 host side must be "ehci_ohci" - - The reset of HS-USB side must be "hs-usb-if" - -Example (R-Car H3): - - usb2_clksel: clock-controller@e6590630 { - compatible = "renesas,r8a7795-rcar-usb2-clock-sel", - "renesas,rcar-gen3-usb2-clock-sel"; - reg = <0 0xe6590630 0 0x02>; - clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>, - <&usb_extal>, <&usb_xtal>; - clock-names = "ehci_ohci", "hs-usb-if", "usb_extal", "usb_xtal"; - #clock-cells = <0>; - power-domains = <&sysc R8A7795_PD_ALWAYS_ON>; - resets = <&cpg 703>, <&cpg 704>; - reset-names = "ehci_ohci", "hs-usb-if"; - }; diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml new file mode 100644 index 0000000000000..5be1229b3d6ec --- /dev/null +++ b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml @@ -0,0 +1,100 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/clock/renesas,rcar-usb2-clock-sel.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Renesas R-Car USB 2.0 clock selector + +maintainers: + - Yoshihiro Shimoda + +description: | + If you connect an external clock to the USB_EXTAL pin only, you should set + the clock rate to "usb_extal" node only. + If you connect an oscillator to both the USB_XTAL and USB_EXTAL, this module + is not needed because this is default setting. (Of course, you can set the + clock rates to both "usb_extal" and "usb_xtal" nodes. + + Case 1: An external clock connects to R-Car SoC + +----------+ +--- R-Car ---------------------+ + |External |---|USB_EXTAL ---> all usb channels| + |clock | |USB_XTAL | + +----------+ +-------------------------------+ + + In this case, we need this driver with "usb_extal" clock. + + Case 2: An oscillator connects to R-Car SoC + +----------+ +--- R-Car ---------------------+ + |Oscillator|---|USB_EXTAL -+-> all usb channels| + | |---|USB_XTAL --+ | + +----------+ +-------------------------------+ + In this case, we don't need this selector. + +properties: + compatible: + items: + - enum: + - renesas,r8a7795-rcar-usb2-clock-sel # R-Car H3 + - renesas,r8a7796-rcar-usb2-clock-sel # R-Car M3-W + - renesas,r8a77961-rcar-usb2-clock-sel # R-Car M3-W+ + - const: renesas,rcar-gen3-usb2-clock-sel + + reg: + maxItems: 1 + + clocks: + minItems: 4 + maxItems: 4 + + clock-names: + items: + - const: ehci_ohci + - const: hs-usb-if + - const: usb_extal + - const: usb_xtal + + '#clock-cells': + const: 0 + + power-domains: + maxItems: 1 + + resets: + minItems: 2 + maxItems: 2 + + reset-names: + items: + - const: ehci_ohci + - const: hs-usb-if + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - power-domains + - resets + - reset-names + +additionalProperties: false + +examples: + - | + #include + #include + + usb2_clksel: clock-controller@e6590630 { + compatible = "renesas,r8a7795-rcar-usb2-clock-sel", + "renesas,rcar-gen3-usb2-clock-sel"; + reg = <0xe6590630 0x02>; + clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>, + <&usb_extal>, <&usb_xtal>; + clock-names = "ehci_ohci", "hs-usb-if", "usb_extal", "usb_xtal"; + #clock-cells = <0>; + power-domains = <&sysc R8A7795_PD_ALWAYS_ON>; + resets = <&cpg 703>, <&cpg 704>; + reset-names = "ehci_ohci", "hs-usb-if"; + }; -- GitLab From c4196218737137b8e8cf7d0c375765f4f36cb591 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 6 Nov 2020 08:34:23 +0100 Subject: [PATCH 0492/1894] KVM: s390: Add memcg accounting to KVM allocations Almost all kvm allocations in the s390x KVM code can be attributed to the process that triggers the allocation (in other words, no global allocation for other guests). This will help the memcg controller to make the right decisions. Signed-off-by: Christian Borntraeger Acked-by: Janosch Frank Acked-by: Cornelia Huck --- arch/s390/kvm/guestdbg.c | 8 ++++---- arch/s390/kvm/intercept.c | 2 +- arch/s390/kvm/interrupt.c | 10 +++++----- arch/s390/kvm/kvm-s390.c | 20 ++++++++++---------- arch/s390/kvm/priv.c | 4 ++-- arch/s390/kvm/pv.c | 6 +++--- arch/s390/kvm/vsie.c | 4 ++-- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 394a5f53805ba..3765c4223bf94 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -184,7 +184,7 @@ static int __import_wp_info(struct kvm_vcpu *vcpu, if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE) return -EINVAL; - wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL); + wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL_ACCOUNT); if (!wp_info->old_data) return -ENOMEM; /* try to backup the original value */ @@ -234,7 +234,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, if (nr_wp > 0) { wp_info = kmalloc_array(nr_wp, sizeof(*wp_info), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!wp_info) { ret = -ENOMEM; goto error; @@ -243,7 +243,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, if (nr_bp > 0) { bp_info = kmalloc_array(nr_bp, sizeof(*bp_info), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!bp_info) { ret = -ENOMEM; goto error; @@ -349,7 +349,7 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu) if (!wp_info || !wp_info->old_data || wp_info->len <= 0) continue; - temp = kmalloc(wp_info->len, GFP_KERNEL); + temp = kmalloc(wp_info->len, GFP_KERNEL_ACCOUNT); if (!temp) continue; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index e7a7c499a73f4..72b25b7cc6aec 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -398,7 +398,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - sctns = (void *)get_zeroed_page(GFP_KERNEL); + sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!sctns) return -ENOMEM; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2f177298c663b..e3183bd059107 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1792,7 +1792,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, goto out; } gisa_out: - tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL); + tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (tmp_inti) { tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0); tmp_inti->io.io_int_word = isc_to_int_word(isc); @@ -2015,7 +2015,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti; int rc; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); + inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM; @@ -2414,7 +2414,7 @@ static int enqueue_floating_irq(struct kvm_device *dev, return -EINVAL; while (len >= sizeof(struct kvm_s390_irq)) { - inti = kzalloc(sizeof(*inti), GFP_KERNEL); + inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM; @@ -2462,7 +2462,7 @@ static int register_io_adapter(struct kvm_device *dev, if (dev->kvm->arch.adapters[adapter_info.id] != NULL) return -EINVAL; - adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL_ACCOUNT); if (!adapter) return -ENOMEM; @@ -3290,7 +3290,7 @@ int kvm_s390_gib_init(u8 nisc) goto out; } - gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); if (!gib) { rc = -ENOMEM; goto out; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b74b92c1a586..19804c388d618 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1254,7 +1254,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) ret = -EBUSY; goto out; } - proc = kzalloc(sizeof(*proc), GFP_KERNEL); + proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); if (!proc) { ret = -ENOMEM; goto out; @@ -1416,7 +1416,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_vm_cpu_processor *proc; int ret = 0; - proc = kzalloc(sizeof(*proc), GFP_KERNEL); + proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); if (!proc) { ret = -ENOMEM; goto out; @@ -1444,7 +1444,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_vm_cpu_machine *mach; int ret = 0; - mach = kzalloc(sizeof(*mach), GFP_KERNEL); + mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); if (!mach) { ret = -ENOMEM; goto out; @@ -1812,7 +1812,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM; @@ -1857,7 +1857,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM; @@ -2625,7 +2625,7 @@ static void sca_dispose(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - gfp_t alloc_flags = GFP_KERNEL; + gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; int i, rc; char debug_name[16]; static unsigned long sca_offset; @@ -2670,7 +2670,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); kvm->arch.sie_page2 = - (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); if (!kvm->arch.sie_page2) goto out_err; @@ -2900,7 +2900,7 @@ static int sca_switch_to_extended(struct kvm *kvm) if (kvm->arch.use_esca) return 0; - new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!new_sca) return -ENOMEM; @@ -3133,7 +3133,7 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) { - vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); + vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!vcpu->arch.sie_block->cbrlo) return -ENOMEM; return 0; @@ -3243,7 +3243,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) int rc; BUILD_BUG_ON(sizeof(struct sie_page) != 4096); - sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); + sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!sie_page) return -ENOMEM; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index cd74989ce0b02..9928f785c6773 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -879,7 +879,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: - mem = get_zeroed_page(GFP_KERNEL); + mem = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!mem) goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) @@ -888,7 +888,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 3: if (sel1 != 2 || sel2 != 2) goto out_no_data; - mem = get_zeroed_page(GFP_KERNEL); + mem = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!mem) goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index eb99e2f95ebed..373b654c84bd1 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -60,7 +60,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) if (kvm_s390_pv_cpu_get_handle(vcpu)) return -EINVAL; - vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, + vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(uv_info.guest_cpu_stor_len)); if (!vcpu->arch.pv.stor_base) return -ENOMEM; @@ -72,7 +72,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; /* Alloc Secure Instruction Data Area Designation */ - vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO); + vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vcpu->arch.sie_block->sidad) { free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); @@ -120,7 +120,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm->arch.pv.stor_var = NULL; - kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base)); + kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base)); if (!kvm->arch.pv.stor_base) return -ENOMEM; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f3cbf6003a9d..c5d0a58b2c29c 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1234,7 +1234,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); if (!page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); @@ -1336,7 +1336,7 @@ out_put: void kvm_s390_vsie_init(struct kvm *kvm) { mutex_init(&kvm->arch.vsie.mutex); - INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL); + INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT); } /* Destroy the vsie data structures. To be called when a vm is destroyed. */ -- GitLab From 0cd2a787cffb5750ba2e7b5de39a6f3d1dfc17e9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 9 Nov 2020 13:14:35 +0100 Subject: [PATCH 0493/1894] s390/gmap: make gmap memcg aware gmap allocations can be attributed to a process. Signed-off-by: Christian Borntraeger Acked-by: Heiko Carstens Acked-by: Janosch Frank Acked-by: Cornelia Huck --- arch/s390/mm/gmap.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cfb0017f33a70..0160ac97a27df 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2,7 +2,7 @@ /* * KVM guest address space mapping code * - * Copyright IBM Corp. 2007, 2016, 2018 + * Copyright IBM Corp. 2007, 2020 * Author(s): Martin Schwidefsky * David Hildenbrand * Janosch Frank @@ -56,19 +56,19 @@ static struct gmap *gmap_alloc(unsigned long limit) atype = _ASCE_TYPE_REGION1; etype = _REGION1_ENTRY_EMPTY; } - gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); INIT_LIST_HEAD(&gmap->pt_list); - INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); - INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); - INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); + INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->shadow_lock); refcount_set(&gmap->ref_count, 1); - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) goto out_free; page->index = 0; @@ -309,7 +309,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); @@ -594,7 +594,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) return rc; ptl = pmd_lock(mm, pmd); @@ -1218,11 +1218,11 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, vmaddr = __gmap_translate(parent, paddr); if (IS_ERR_VALUE(vmaddr)) return vmaddr; - rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); if (!rmap) return -ENOMEM; rmap->raddr = raddr; - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) { kfree(rmap); return rc; @@ -1741,7 +1741,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r2t & _REGION_ENTRY_ORIGIN; @@ -1825,7 +1825,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r3t & _REGION_ENTRY_ORIGIN; @@ -1909,7 +1909,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); /* Allocate a shadow segment table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = sgt & _REGION_ENTRY_ORIGIN; @@ -2116,7 +2116,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) parent = sg->parent; prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; - rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); if (!rmap) return -ENOMEM; rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; @@ -2128,7 +2128,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) rc = vmaddr; break; } - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) break; rc = -EAGAIN; -- GitLab From efaa83a3736d392c61499ee3aad8690a142675cd Mon Sep 17 00:00:00 2001 From: Collin Walling Date: Mon, 7 Dec 2020 10:41:25 -0500 Subject: [PATCH 0494/1894] KVM: selftests: sync_regs test for diag318 The DIAGNOSE 0x0318 instruction, unique to s390x, is a privileged call that must be intercepted via SIE, handled in userspace, and the information set by the instruction is communicated back to KVM. To test the instruction interception, an ad-hoc handler is defined which simply has a VM execute the instruction and then userspace will extract the necessary info. The handler is defined such that the instruction invocation occurs only once. It is up to the caller to determine how the info returned by this handler should be used. The diag318 info is communicated from userspace to KVM via a sync_regs call. This is tested during a sync_regs test, where the diag318 info is requested via the handler, then the info is stored in the appropriate register in KVM via a sync registers call. If KVM does not support diag318, then the tests will print a message stating that diag318 was skipped, and the asserts will simply test against a value of 0. Signed-off-by: Collin Walling Link: https://lore.kernel.org/r/20201207154125.10322-1-walling@linux.ibm.com Acked-by: Janosch Frank Acked-by: Cornelia Huck Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- tools/testing/selftests/kvm/Makefile | 2 +- .../kvm/include/s390x/diag318_test_handler.h | 13 +++ .../kvm/lib/s390x/diag318_test_handler.c | 82 +++++++++++++++++++ .../selftests/kvm/s390x/sync_regs_test.c | 16 +++- 4 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h create mode 100644 tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 30afbad36cd55..1b9c257fca5e0 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -36,7 +36,7 @@ endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c -LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c +LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h new file mode 100644 index 0000000000000..b0ed71302722a --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * Test handler for the s390x DIAGNOSE 0x0318 instruction. + * + * Copyright (C) 2020, IBM + */ + +#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER +#define SELFTEST_KVM_DIAG318_TEST_HANDLER + +uint64_t get_diag318_info(void); + +#endif diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c new file mode 100644 index 0000000000000..86b9e611ad871 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test handler for the s390x DIAGNOSE 0x0318 instruction. + * + * Copyright (C) 2020, IBM + */ + +#include "test_util.h" +#include "kvm_util.h" + +#define VCPU_ID 6 + +#define ICPT_INSTRUCTION 0x04 +#define IPA0_DIAG 0x8300 + +static void guest_code(void) +{ + uint64_t diag318_info = 0x12345678; + + asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info)); +} + +/* + * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such, + * we create an ad-hoc VM here to handle the instruction then extract the + * necessary data. It is up to the caller to decide what to do with that data. + */ +static uint64_t diag318_handler(void) +{ + struct kvm_vm *vm; + struct kvm_run *run; + uint64_t reg; + uint64_t diag318_info; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_run(vm, VCPU_ID); + run = vcpu_state(vm, VCPU_ID); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "DIAGNOSE 0x0318 instruction was not intercepted"); + TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION, + "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode); + TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG, + "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00)); + + reg = (run->s390_sieic.ipa & 0x00f0) >> 4; + diag318_info = run->s.regs.gprs[reg]; + + TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set"); + + kvm_vm_free(vm); + + return diag318_info; +} + +uint64_t get_diag318_info(void) +{ + static uint64_t diag318_info; + static bool printed_skip; + + /* + * If KVM does not support diag318, then return 0 to + * ensure tests do not break. + */ + if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) { + if (!printed_skip) { + fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. " + "Skipping diag318 test.\n"); + printed_skip = true; + } + return 0; + } + + /* + * If a test has previously requested the diag318 info, + * then don't bother spinning up a temporary VM again. + */ + if (!diag318_info) + diag318_info = diag318_handler(); + + return diag318_info; +} diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 5731ccf34917f..caf7b8859a94a 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -20,6 +20,7 @@ #include "test_util.h" #include "kvm_util.h" +#include "diag318_test_handler.h" #define VCPU_ID 5 @@ -70,7 +71,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) #undef REG_COMPARE -#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS) +#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318) #define INVALID_SYNC_FIELD 0x80000000 int main(int argc, char *argv[]) @@ -152,6 +153,12 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS; + + if (get_diag318_info() > 0) { + run->s.regs.diag318 = get_diag318_info(); + run->kvm_dirty_regs |= KVM_SYNC_DIAG318; + } + rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, @@ -164,6 +171,9 @@ int main(int argc, char *argv[]) TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, "acr0 sync regs value incorrect 0x%x.", run->s.regs.acrs[0]); + TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(), + "diag318 sync regs value incorrect 0x%llx.", + run->s.regs.diag318); vcpu_regs_get(vm, VCPU_ID, ®s); compare_regs(®s, &run->s.regs); @@ -177,6 +187,7 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.gprs[11] = 0xDEADBEEF; + run->s.regs.diag318 = 0x4B1D; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, @@ -186,6 +197,9 @@ int main(int argc, char *argv[]) TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, "r11 sync regs value incorrect 0x%llx.", run->s.regs.gprs[11]); + TEST_ASSERT(run->s.regs.diag318 != 0x4B1D, + "diag318 sync regs value incorrect 0x%llx.", + run->s.regs.diag318); kvm_vm_free(vm); -- GitLab From 50a05be484cb70d9dfb55fa5a6ed57eab193901f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 25 Nov 2020 10:06:58 +0100 Subject: [PATCH 0495/1894] KVM: s390: track synchronous pfault events in kvm_stat Right now we do count pfault (pseudo page faults aka async page faults start and completion events). What we do not count is, if an async page fault would have been possible by the host, but it was disabled by the guest (e.g. interrupts off, pfault disabled, secure execution....). Let us count those as well in the pfault_sync counter. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20201125090658.38463-1-borntraeger@de.ibm.com --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/kvm-s390.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 463c24e26000f..74f9a036bab2c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -459,6 +459,7 @@ struct kvm_vcpu_stat { u64 diagnose_308; u64 diagnose_500; u64 diagnose_other; + u64 pfault_sync; }; #define PGM_OPERATION 0x01 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 19804c388d618..065f94f22fd38 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -60,6 +60,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("userspace_handled", exit_userspace), VCPU_STAT("exit_null", exit_null), + VCPU_STAT("pfault_sync", pfault_sync), VCPU_STAT("exit_validity", exit_validity), VCPU_STAT("exit_stop_request", exit_stop_request), VCPU_STAT("exit_external_request", exit_external_request), @@ -4111,6 +4112,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_pfault = 0; if (kvm_arch_setup_async_pf(vcpu)) return 0; + vcpu->stat.pfault_sync++; return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } return vcpu_post_run_fault_in_sie(vcpu); -- GitLab From 43ffe817bfe3871ffbaa1e98952a2a01b140e71e Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 10 Dec 2020 15:10:56 +0530 Subject: [PATCH 0496/1894] arm64: dts: bitmain: Use generic "ngpios" rather than "snps,nr-gpios" This is to remove similar errors as below: OF: /.../gpio-port@0: could not find phandle Commit 7569486d79ae ("gpio: dwapb: Add ngpios DT-property support") explained the reason of above errors well and added the generic "ngpios" property, let's use it. Signed-off-by: Jisheng Zhang Signed-off-by: Manivannan Sadhasivam Reviewed-by: Linus Walleij Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20201210094056.54553-1-manivannan.sadhasivam@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/bitmain/bm1880.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/bitmain/bm1880.dtsi b/arch/arm64/boot/dts/bitmain/bm1880.dtsi index fa6e6905f5888..53a9b76057aa1 100644 --- a/arch/arm64/boot/dts/bitmain/bm1880.dtsi +++ b/arch/arm64/boot/dts/bitmain/bm1880.dtsi @@ -127,7 +127,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -145,7 +145,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -163,7 +163,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; -- GitLab From 967069aa4de65fc525c67b4b7b72b6ded6bd92a0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Nov 2020 17:24:28 +0100 Subject: [PATCH 0497/1894] clk: pwm: drop of_match_ptr from of_device_id table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might be not relevant here). This fixes compile warning (!CONFIG_OF && !CONFIG_MODULES): drivers/clk/clk-pwm.c:139:34: warning: ‘clk_pwm_dt_ids’ defined but not used [-Wunused-const-variable=] Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201103162435.13689-1-krzk@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/clk-pwm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-pwm.c b/drivers/clk/clk-pwm.c index 86f2e2d3fc022..da2c8eddfd9ff 100644 --- a/drivers/clk/clk-pwm.c +++ b/drivers/clk/clk-pwm.c @@ -147,7 +147,7 @@ static struct platform_driver clk_pwm_driver = { .remove = clk_pwm_remove, .driver = { .name = "pwm-clock", - .of_match_table = of_match_ptr(clk_pwm_dt_ids), + .of_match_table = clk_pwm_dt_ids, }, }; -- GitLab From 975d25cbb505e92644de2e603965bb5e17215012 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Nov 2020 17:24:35 +0100 Subject: [PATCH 0498/1894] clk: scpi: mark scpi_clk_match as maybe unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scpi_clk_match (struct of_device_id) is referenced only with CONFIG_OF builds thus mark it as __maybe_unused: drivers/clk/clk-scpi.c:132:34: warning: ‘scpi_clk_match’ defined but not used [-Wunused-const-variable=] Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201103162435.13689-8-krzk@kernel.org Acked-by: Sudeep Holla Signed-off-by: Stephen Boyd --- drivers/clk/clk-scpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c index 5a9b140dd8c80..a39af7616b13c 100644 --- a/drivers/clk/clk-scpi.c +++ b/drivers/clk/clk-scpi.c @@ -129,7 +129,7 @@ static const struct clk_ops scpi_dvfs_ops = { .set_rate = scpi_dvfs_set_rate, }; -static const struct of_device_id scpi_clk_match[] = { +static const struct of_device_id scpi_clk_match[] __maybe_unused = { { .compatible = "arm,scpi-dvfs-clocks", .data = &scpi_dvfs_ops, }, { .compatible = "arm,scpi-variable-clocks", .data = &scpi_clk_ops, }, {} -- GitLab From 6d37a8d192830267e6b10a6d57ae28d2e89097e7 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 10 Dec 2020 10:22:38 -0800 Subject: [PATCH 0499/1894] clk: qcom: gcc-sc7180: Use floor ops for sdcc clks I would repeat the same commit message that was in commit 5e4b7e82d497 ("clk: qcom: gcc-sdm845: Use floor ops for sdcc clks") but it seems silly to do so when you could just go read that commit. NOTE: this is actually extra terrible because we're missing the 50 MHz rate in the table (see the next patch AKA ("clk: qcom: gcc-sc7180: Add 50 MHz clock rate for SDC2")). That means then when you run an older SD card it'll try to clock it at 100 MHz when it's only specced to run at 50 MHz max. As you can probably guess that doesn't work super well. Signed-off-by: Douglas Anderson Fixes: 17269568f726 ("clk: qcom: Add Global Clock controller (GCC) driver for SC7180") Link: https://lore.kernel.org/r/20201210102234.1.I096779f219625148900fc984dd0084ed1ba87c7f@changeid Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sc7180.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c index 68d8f7aaf64e1..b080739ab0c33 100644 --- a/drivers/clk/qcom/gcc-sc7180.c +++ b/drivers/clk/qcom/gcc-sc7180.c @@ -642,7 +642,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = { .name = "gcc_sdcc1_ice_core_clk_src", .parent_data = gcc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -666,7 +666,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = { .name = "gcc_sdcc2_apps_clk_src", .parent_data = gcc_parent_data_5, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; -- GitLab From 043577518f027544e8f9e9568140a1fe87ee01a0 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 10 Dec 2020 10:22:39 -0800 Subject: [PATCH 0500/1894] clk: qcom: gcc-sc7180: Add 50 MHz clock rate for SDC2 50 MHz is an incredibly common clock rate for SD cards to run at. It's "high speed" mode in SD (not very fast these days, but it used to be) or: #define HIGH_SPEED_MAX_DTR 50000000 If we don't support this then older "high speed" cards can only run at 25 MHz or at half their normal speed. There doesn't seem to be any reason to skip this clock rate, so add it. Fixes: 17269568f726 ("clk: qcom: Add Global Clock controller (GCC) driver for SC7180") Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20201210102234.2.I26dcc0cee374f5571d9929c9985f463773167e68@changeid Reviewed-by: Bjorn Andersson Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sc7180.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c index b080739ab0c33..d82d725ac2319 100644 --- a/drivers/clk/qcom/gcc-sc7180.c +++ b/drivers/clk/qcom/gcc-sc7180.c @@ -651,6 +651,7 @@ static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = { F(9600000, P_BI_TCXO, 2, 0, 0), F(19200000, P_BI_TCXO, 1, 0, 0), F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0), + F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0), F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0), F(202000000, P_GPLL7_OUT_MAIN, 4, 0, 0), { } -- GitLab From ce8c195e652fa69f669789de37712a519b09155f Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 20 Nov 2020 22:10:17 +0530 Subject: [PATCH 0501/1894] clk: qcom: lpasscc: Introduce pm autosuspend for SC7180 The LPASSCC driver's suspend/resume is invoked multiple number of times and thus allow the device to autosuspend for 500ms. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1605890417-721-1-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/lpasscorecc-sc7180.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c index 9081649f476f5..2e0ecc38efdd1 100644 --- a/drivers/clk/qcom/lpasscorecc-sc7180.c +++ b/drivers/clk/qcom/lpasscorecc-sc7180.c @@ -370,7 +370,10 @@ static int lpass_create_pm_clks(struct platform_device *pdev) { int ret; + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, 500); pm_runtime_enable(&pdev->dev); + ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_runtime_disable, &pdev->dev); if (ret) return ret; @@ -423,7 +426,12 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) clk_fabia_pll_configure(&lpass_lpaaudio_dig_pll, regmap, &lpass_lpaaudio_dig_pll_config); - return qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap); + ret = qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap); + + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + + return ret; } static int lpass_hm_core_probe(struct platform_device *pdev) -- GitLab From eb50f3f42cb6de3820736206ac5019583f6856b1 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 8 Dec 2020 12:16:58 +0530 Subject: [PATCH 0502/1894] dt-bindings: clock: Add RPMHCC bindings for SM8350 Add bindings and update documentation for clock rpmh driver on SM8350. Reviewed-by: Bjorn Andersson Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20201208064702.3654324-2-vkoul@kernel.org Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml index a54930f111ba5..12c9cbc0ebf92 100644 --- a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml @@ -22,6 +22,7 @@ properties: - qcom,sdx55-rpmh-clk - qcom,sm8150-rpmh-clk - qcom,sm8250-rpmh-clk + - qcom,sm8350-rpmh-clk clocks: maxItems: 1 -- GitLab From f7b36cc19efb4765467af7cce3a91269fbb529b1 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 8 Dec 2020 12:16:59 +0530 Subject: [PATCH 0503/1894] clk: qcom: rpmh: add support for SM8350 rpmh clocks This adds the RPMH clocks present in SM8350 SoC Reviewed-by: Bjorn Andersson Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20201208064702.3654324-3-vkoul@kernel.org [sboyd@kernel.org: Move sdx55 to the right place] Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rpmh.c | 58 +++++++++++++++++++++------ include/dt-bindings/clock/qcom,rpmh.h | 8 ++++ 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c index f47595a048853..6a2a13c5058e3 100644 --- a/drivers/clk/qcom/clk-rpmh.c +++ b/drivers/clk/qcom/clk-rpmh.c @@ -373,6 +373,25 @@ static const struct clk_rpmh_desc clk_rpmh_sdm845 = { .num_clks = ARRAY_SIZE(sdm845_rpmh_clocks), }; +DEFINE_CLK_RPMH_VRM(sdx55, rf_clk1, rf_clk1_ao, "rfclkd1", 1); +DEFINE_CLK_RPMH_VRM(sdx55, rf_clk2, rf_clk2_ao, "rfclkd2", 1); +DEFINE_CLK_RPMH_BCM(sdx55, qpic_clk, "QP0"); + +static struct clk_hw *sdx55_rpmh_clocks[] = { + [RPMH_CXO_CLK] = &sdm845_bi_tcxo.hw, + [RPMH_CXO_CLK_A] = &sdm845_bi_tcxo_ao.hw, + [RPMH_RF_CLK1] = &sdx55_rf_clk1.hw, + [RPMH_RF_CLK1_A] = &sdx55_rf_clk1_ao.hw, + [RPMH_RF_CLK2] = &sdx55_rf_clk2.hw, + [RPMH_RF_CLK2_A] = &sdx55_rf_clk2_ao.hw, + [RPMH_QPIC_CLK] = &sdx55_qpic_clk.hw, +}; + +static const struct clk_rpmh_desc clk_rpmh_sdx55 = { + .clks = sdx55_rpmh_clocks, + .num_clks = ARRAY_SIZE(sdx55_rpmh_clocks), +}; + static struct clk_hw *sm8150_rpmh_clocks[] = { [RPMH_CXO_CLK] = &sdm845_bi_tcxo.hw, [RPMH_CXO_CLK_A] = &sdm845_bi_tcxo_ao.hw, @@ -434,23 +453,37 @@ static const struct clk_rpmh_desc clk_rpmh_sm8250 = { .num_clks = ARRAY_SIZE(sm8250_rpmh_clocks), }; -DEFINE_CLK_RPMH_VRM(sdx55, rf_clk1, rf_clk1_ao, "rfclkd1", 1); -DEFINE_CLK_RPMH_VRM(sdx55, rf_clk2, rf_clk2_ao, "rfclkd2", 1); -DEFINE_CLK_RPMH_BCM(sdx55, qpic_clk, "QP0"); +DEFINE_CLK_RPMH_VRM(sm8350, div_clk1, div_clk1_ao, "divclka1", 2); +DEFINE_CLK_RPMH_VRM(sm8350, rf_clk4, rf_clk4_ao, "rfclka4", 1); +DEFINE_CLK_RPMH_VRM(sm8350, rf_clk5, rf_clk5_ao, "rfclka5", 1); +DEFINE_CLK_RPMH_BCM(sm8350, pka, "PKA0"); +DEFINE_CLK_RPMH_BCM(sm8350, hwkm, "HK0"); -static struct clk_hw *sdx55_rpmh_clocks[] = { +static struct clk_hw *sm8350_rpmh_clocks[] = { [RPMH_CXO_CLK] = &sdm845_bi_tcxo.hw, [RPMH_CXO_CLK_A] = &sdm845_bi_tcxo_ao.hw, - [RPMH_RF_CLK1] = &sdx55_rf_clk1.hw, - [RPMH_RF_CLK1_A] = &sdx55_rf_clk1_ao.hw, - [RPMH_RF_CLK2] = &sdx55_rf_clk2.hw, - [RPMH_RF_CLK2_A] = &sdx55_rf_clk2_ao.hw, - [RPMH_QPIC_CLK] = &sdx55_qpic_clk.hw, + [RPMH_DIV_CLK1] = &sm8350_div_clk1.hw, + [RPMH_DIV_CLK1_A] = &sm8350_div_clk1_ao.hw, + [RPMH_LN_BB_CLK1] = &sm8250_ln_bb_clk1.hw, + [RPMH_LN_BB_CLK1_A] = &sm8250_ln_bb_clk1_ao.hw, + [RPMH_LN_BB_CLK2] = &sdm845_ln_bb_clk2.hw, + [RPMH_LN_BB_CLK2_A] = &sdm845_ln_bb_clk2_ao.hw, + [RPMH_RF_CLK1] = &sdm845_rf_clk1.hw, + [RPMH_RF_CLK1_A] = &sdm845_rf_clk1_ao.hw, + [RPMH_RF_CLK3] = &sdm845_rf_clk3.hw, + [RPMH_RF_CLK3_A] = &sdm845_rf_clk3_ao.hw, + [RPMH_RF_CLK4] = &sm8350_rf_clk4.hw, + [RPMH_RF_CLK4_A] = &sm8350_rf_clk4_ao.hw, + [RPMH_RF_CLK5] = &sm8350_rf_clk5.hw, + [RPMH_RF_CLK5_A] = &sm8350_rf_clk5_ao.hw, + [RPMH_IPA_CLK] = &sdm845_ipa.hw, + [RPMH_PKA_CLK] = &sm8350_pka.hw, + [RPMH_HWKM_CLK] = &sm8350_hwkm.hw, }; -static const struct clk_rpmh_desc clk_rpmh_sdx55 = { - .clks = sdx55_rpmh_clocks, - .num_clks = ARRAY_SIZE(sdx55_rpmh_clocks), +static const struct clk_rpmh_desc clk_rpmh_sm8350 = { + .clks = sm8350_rpmh_clocks, + .num_clks = ARRAY_SIZE(sm8350_rpmh_clocks), }; static struct clk_hw *of_clk_rpmh_hw_get(struct of_phandle_args *clkspec, @@ -541,6 +574,7 @@ static const struct of_device_id clk_rpmh_match_table[] = { { .compatible = "qcom,sdx55-rpmh-clk", .data = &clk_rpmh_sdx55}, { .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150}, { .compatible = "qcom,sm8250-rpmh-clk", .data = &clk_rpmh_sm8250}, + { .compatible = "qcom,sm8350-rpmh-clk", .data = &clk_rpmh_sm8350}, { } }; MODULE_DEVICE_TABLE(of, clk_rpmh_match_table); diff --git a/include/dt-bindings/clock/qcom,rpmh.h b/include/dt-bindings/clock/qcom,rpmh.h index 0662a583e93be..583a99161aaab 100644 --- a/include/dt-bindings/clock/qcom,rpmh.h +++ b/include/dt-bindings/clock/qcom,rpmh.h @@ -23,5 +23,13 @@ #define RPMH_LN_BB_CLK1_A 14 #define RPMH_CE_CLK 15 #define RPMH_QPIC_CLK 16 +#define RPMH_DIV_CLK1 17 +#define RPMH_DIV_CLK1_A 18 +#define RPMH_RF_CLK4 19 +#define RPMH_RF_CLK4_A 20 +#define RPMH_RF_CLK5 21 +#define RPMH_RF_CLK5_A 22 +#define RPMH_PKA_CLK 23 +#define RPMH_HWKM_CLK 24 #endif -- GitLab From 5bf5861d6ea6c3f4b38fc8fda2062b2dc44ac63d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 26 Oct 2020 01:42:12 +0300 Subject: [PATCH 0504/1894] clk: tegra: Fix duplicated SE clock entry The periph_clks[] array contains duplicated entry for Security Engine clock which was meant to be defined for T210, but it wasn't added properly. This patch corrects the T210 SE entry and fixes the following error message on T114/T124: "Tegra clk 127: register failed with -17". Fixes: dc37fec48314 ("clk: tegra: periph: Add new periph clks and muxes for Tegra210") Tested-by Nicolas Chauvet Reported-by Nicolas Chauvet Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20201025224212.7790-1-digetx@gmail.com Acked-by: Thierry Reding Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-id.h | 1 + drivers/clk/tegra/clk-tegra-periph.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h index ff7da2d3e94d8..24413812ec5b6 100644 --- a/drivers/clk/tegra/clk-id.h +++ b/drivers/clk/tegra/clk-id.h @@ -227,6 +227,7 @@ enum clk_id { tegra_clk_sdmmc4, tegra_clk_sdmmc4_8, tegra_clk_se, + tegra_clk_se_10, tegra_clk_soc_therm, tegra_clk_soc_therm_8, tegra_clk_sor0, diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 2b2a3b81c16ba..60cc34f90cb9b 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -630,7 +630,7 @@ static struct tegra_periph_init_data periph_clks[] = { INT8("host1x", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x_8), INT8("host1x", mux_pllc4_out1_pllc_pllc4_out2_pllp_clkm_plla_pllc4_out0, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x_9), INT8("se", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se), - INT8("se", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se), + INT8("se", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se_10), INT8("2d", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_2D, 21, 0, tegra_clk_gr2d_8), INT8("3d", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_3D, 24, 0, tegra_clk_gr3d_8), INT8("vic03", mux_pllm_pllc_pllp_plla_pllc2_c3_clkm, CLK_SOURCE_VIC03, 178, 0, tegra_clk_vic03), -- GitLab From 6ae9b5ffcaeba64c290dfb8bd7b0194b1fdf0c92 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 5 Nov 2020 18:03:05 -0800 Subject: [PATCH 0505/1894] platform/chrome: cros_ec_typec: Tolerate unrecognized mux flags On occasion, the Chrome Embedded Controller (EC) can send a mux configuration which doesn't map to a particular data mode. For instance, dedicated Type C chargers, when connected, may cause only USB_PD_MUX_POLARITY_INVERTED to be set. This is a valid flag combination and should not lead to a driver abort. Modify the mux configuration handling to not return an error when an unrecognized mux flag combination is encountered. Concordantly, make the ensuing print a debug level print so as to not pollute the kernel logs. Cc: Keith Short Signed-off-by: Prashant Malani Acked-by: Heikki Krogerus Signed-off-by: Benson Leung Link: https://lore.kernel.org/r/20201106020305.767202-1-pmalani@chromium.org --- drivers/platform/chrome/cros_ec_typec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index ce031a10eb1b3..5b8db02ab84a9 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -537,10 +537,9 @@ static int cros_typec_configure_mux(struct cros_typec_data *typec, int port_num, port->state.mode = TYPEC_STATE_USB; ret = typec_mux_set(port->mux, &port->state); } else { - dev_info(typec->dev, - "Unsupported mode requested, mux flags: %x\n", - mux_flags); - ret = -ENOTSUPP; + dev_dbg(typec->dev, + "Unrecognized mode requested, mux flags: %x\n", + mux_flags); } return ret; -- GitLab From edf7ddbf1c5eb98b720b063b73e20e8a4a1ce673 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 31 Oct 2020 21:40:21 -0700 Subject: [PATCH 0506/1894] fs/namespace.c: WARN if mnt_count has become negative Missing calls to mntget() (or equivalently, too many calls to mntput()) are hard to detect because mntput() delays freeing mounts using task_work_add(), then again using call_rcu(). As a result, mnt_count can often be decremented to -1 without getting a KASAN use-after-free report. Such cases are still bugs though, and they point to real use-after-frees being possible. For an example of this, see the bug fixed by commit 1b0b9cc8d379 ("vfs: fsmount: add missing mntget()"), discussed at https://lkml.kernel.org/linux-fsdevel/20190605135401.GB30925@xxxxxxxxxxxxxxxxxxxxxxxxx/T/#u. This bug *should* have been trivial to find. But actually, it wasn't found until syzkaller happened to use fchdir() to manipulate the reference count just right for the bug to be noticeable. Address this by making mntput_no_expire() issue a WARN if mnt_count has become negative. Suggested-by: Miklos Szeredi Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- fs/namespace.c | 9 ++++++--- fs/pnode.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index cebaa3e817940..93006abe7946a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -156,10 +156,10 @@ static inline void mnt_add_count(struct mount *mnt, int n) /* * vfsmount lock must be held for write */ -unsigned int mnt_get_count(struct mount *mnt) +int mnt_get_count(struct mount *mnt) { #ifdef CONFIG_SMP - unsigned int count = 0; + int count = 0; int cpu; for_each_possible_cpu(cpu) { @@ -1139,6 +1139,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { LIST_HEAD(list); + int count; rcu_read_lock(); if (likely(READ_ONCE(mnt->mnt_ns))) { @@ -1162,7 +1163,9 @@ static void mntput_no_expire(struct mount *mnt) */ smp_mb(); mnt_add_count(mnt, -1); - if (mnt_get_count(mnt)) { + count = mnt_get_count(mnt); + if (count != 0) { + WARN_ON(count < 0); rcu_read_unlock(); unlock_mount_hash(); return; diff --git a/fs/pnode.h b/fs/pnode.h index 49a058c73e4c7..26f74e092bd98 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -44,7 +44,7 @@ int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -unsigned int mnt_get_count(struct mount *mnt); +int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, -- GitLab From 88149082bb8ef31b289673669e080ec6a00c2e59 Mon Sep 17 00:00:00 2001 From: Hao Li Date: Tue, 8 Dec 2020 10:08:43 +0800 Subject: [PATCH 0507/1894] fs: Handle I_DONTCACHE in iput_final() instead of generic_drop_inode() If generic_drop_inode() returns true, it means iput_final() can evict this inode regardless of whether it is dirty or not. If we check I_DONTCACHE in generic_drop_inode(), any inode with this bit set will be evicted unconditionally. This is not the desired behavior because I_DONTCACHE only means the inode shouldn't be cached on the LRU list. As for whether we need to evict this inode, this is what generic_drop_inode() should do. This patch corrects the usage of I_DONTCACHE. This patch was proposed in [1]. [1]: https://lore.kernel.org/linux-fsdevel/20200831003407.GE12096@dread.disaster.area/ Fixes: dae2f8ed7992 ("fs: Lift XFS_IDONTCACHE to the VFS layer") Signed-off-by: Hao Li Reviewed-by: Dave Chinner Reviewed-by: Ira Weiny Signed-off-by: Al Viro --- fs/inode.c | 4 +++- include/linux/fs.h | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 9d78c37b00b81..5eea9912a0b9d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1627,7 +1627,9 @@ static void iput_final(struct inode *inode) else drop = generic_drop_inode(inode); - if (!drop && (sb->s_flags & SB_ACTIVE)) { + if (!drop && + !(inode->i_state & I_DONTCACHE) && + (sb->s_flags & SB_ACTIVE)) { inode_add_lru(inode); spin_unlock(&inode->i_lock); return; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e7..8bde32cf97115 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2878,8 +2878,7 @@ extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || inode_unhashed(inode) || - (inode->i_state & I_DONTCACHE); + return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); -- GitLab From 77573fa310d95e4293efdec98dace74cd9e52f43 Mon Sep 17 00:00:00 2001 From: Hao Li Date: Tue, 8 Dec 2020 10:10:50 +0800 Subject: [PATCH 0508/1894] fs: Kill DCACHE_DONTCACHE dentry even if DCACHE_REFERENCED is set If DCACHE_REFERENCED is set, fast_dput() will return true, and then retain_dentry() have no chance to check DCACHE_DONTCACHE. As a result, the dentry won't be killed and the corresponding inode can't be evicted. In the following example, the DAX policy can't take effects unless we do a drop_caches manually. # DCACHE_LRU_LIST will be set echo abcdefg > test.txt # DCACHE_REFERENCED will be set and DCACHE_DONTCACHE can't do anything xfs_io -c 'chattr +x' test.txt # Drop caches to make DAX changing take effects echo 2 > /proc/sys/vm/drop_caches What this patch does is preventing fast_dput() from returning true if DCACHE_DONTCACHE is set. Then retain_dentry() will detect the DCACHE_DONTCACHE and will return false. As a result, the dentry will be killed and the inode will be evicted. In this way, if we change per-file DAX policy, it will take effects automatically after this file is closed by all processes. I also add some comments to make the code more clear. Signed-off-by: Hao Li Reviewed-by: Jan Kara Reviewed-by: Ira Weiny Signed-off-by: Al Viro --- fs/dcache.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index ea0485861d937..97e81a844a966 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -793,10 +793,17 @@ static inline bool fast_dput(struct dentry *dentry) * a reference to the dentry and change that, but * our work is done - we can leave the dentry * around with a zero refcount. + * + * Nevertheless, there are two cases that we should kill + * the dentry anyway. + * 1. free disconnected dentries as soon as their refcount + * reached zero. + * 2. free dentries if they should not be cached. */ smp_rmb(); d_flags = READ_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | + DCACHE_DISCONNECTED | DCACHE_DONTCACHE; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) -- GitLab From 1a97d899ecbc4b60c8e8f9b41cde443510b5b1bf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 Sep 2020 17:55:58 +0100 Subject: [PATCH 0509/1894] Make sure that make_create_in_sticky() never sees uninitialized value of dir_mode make sure nd->dir_mode is always initialized after success exit from link_path_walk(); in case of empty path it did not happen. Reported-by: Anant Thazhemadam Tested-by: Anant Thazhemadam Signed-off-by: Al Viro --- fs/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index d4a6dd7723038..fc193c684a573 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2114,8 +2114,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) return PTR_ERR(name); while (*name=='/') name++; - if (!*name) + if (!*name) { + nd->dir_mode = 0; // short-circuit the 'hardening' idiocy return 0; + } /* At this point we know we have a real path component. */ for(;;) { -- GitLab From 6c44221b05236cc65d76cb5dc2463f738edff39d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 29 Oct 2020 15:04:57 +0100 Subject: [PATCH 0510/1894] KVM/VMX: Use TEST %REG,%REG instead of CMP $0,%REG in vmenter.S Saves one byte in __vmx_vcpu_run for the same functionality. Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Uros Bizjak Message-Id: <20201029140457.126965-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmenter.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 90ad7a6246e36..e85aa5faa22d3 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -132,7 +132,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - cmpb $0, %bl + testb %bl, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX -- GitLab From 3cea1891748e0ed8e79fa5d9afe40750319751d1 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:16 -0700 Subject: [PATCH 0511/1894] selftests: kvm: Test MSR exiting to userspace Add a selftest to test that when the ioctl KVM_X86_SET_MSR_FILTER is called with an MSR list, those MSRs exit to userspace. This test uses 3 MSRs to test this: 1. MSR_IA32_XSS, an MSR the kernel knows about. 2. MSR_IA32_FLUSH_CMD, an MSR the kernel does not know about. 3. MSR_NON_EXISTENT, an MSR invented in this test for the purposes of passing a fake MSR from the guest to userspace. KVM just acts as a pass through. Userspace is also able to inject a #GP. This is demonstrated when MSR_IA32_XSS and MSR_IA32_FLUSH_CMD are misused in the test. When this happens a #GP is initiated in userspace to be thrown in the guest which is handled gracefully by the exception handling framework introduced earlier in this series. Tests for the generic instruction emulator were also added. For this to work the module parameter kvm.force_emulation_prefix=1 has to be enabled. If it isn't enabled the tests will be skipped. A test was also added to ensure the MSR permission bitmap is being set correctly by executing reads and writes of MSR_FS_BASE and MSR_GS_BASE in the guest while alternating which MSR userspace should intercept. If the permission bitmap is being set correctly only one of the MSRs should be coming through at a time, and the guest should be able to read and write the other one directly. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-5-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 3 +- tools/testing/selftests/kvm/lib/kvm_util.c | 2 + .../kvm/x86_64/userspace_msr_exit_test.c | 560 ++++++++++++++++++ 4 files changed, 565 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 5468db7dd6742..5008bf90772be 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -18,6 +18,7 @@ /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/user_msr_test +/x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4febf4d5ead9a..d54870db21c41 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -50,6 +50,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test +TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test @@ -58,7 +60,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b2c426adb87f8..88ef7067f1e66 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1806,6 +1806,8 @@ static struct exit_reason { {KVM_EXIT_OSI, "OSI"}, {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, + {KVM_EXIT_X86_RDMSR, "RDMSR"}, + {KVM_EXIT_X86_WRMSR, "WRMSR"}, #ifdef KVM_EXIT_MEMORY_NOT_PRESENT {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, #endif diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c new file mode 100644 index 0000000000000..e8b6918cdea0f --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -0,0 +1,560 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for exiting into userspace on registered MSRs + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +/* Forced emulation prefix, used to invoke the emulator unconditionally. */ +#define KVM_FEP "ud2; .byte 'k', 'v', 'm';" +#define KVM_FEP_LENGTH 5 +static int fep_available = 1; + +#define VCPU_ID 1 +#define MSR_NON_EXISTENT 0x474f4f00 + +u64 deny_bits = 0; +struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test an MSR the kernel knows about. */ + .base = MSR_IA32_XSS, + .bitmap = (uint8_t*)&deny_bits, + }, { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test an MSR the kernel doesn't know about. */ + .base = MSR_IA32_FLUSH_CMD, + .bitmap = (uint8_t*)&deny_bits, + }, { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test a fabricated MSR that no one knows about. */ + .base = MSR_NON_EXISTENT, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +struct kvm_msr_filter filter_fs = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + .base = MSR_FS_BASE, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +struct kvm_msr_filter filter_gs = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + .base = MSR_GS_BASE, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +uint64_t msr_non_existent_data; +int guest_exception_count; + +/* + * Note: Force test_rdmsr() to not be inlined to prevent the labels, + * rdmsr_start and rdmsr_end, from being defined multiple times. + */ +static noinline uint64_t test_rdmsr(uint32_t msr) +{ + uint32_t a, d; + + guest_exception_count = 0; + + __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" : + "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +/* + * Note: Force test_wrmsr() to not be inlined to prevent the labels, + * wrmsr_start and wrmsr_end, from being defined multiple times. + */ +static noinline void test_wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + guest_exception_count = 0; + + __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" :: + "a"(a), "d"(d), "c"(msr) : "memory"); +} + +extern char rdmsr_start, rdmsr_end; +extern char wrmsr_start, wrmsr_end; + +/* + * Note: Force test_em_rdmsr() to not be inlined to prevent the labels, + * rdmsr_start and rdmsr_end, from being defined multiple times. + */ +static noinline uint64_t test_em_rdmsr(uint32_t msr) +{ + uint32_t a, d; + + guest_exception_count = 0; + + __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" : + "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +/* + * Note: Force test_em_wrmsr() to not be inlined to prevent the labels, + * wrmsr_start and wrmsr_end, from being defined multiple times. + */ +static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + guest_exception_count = 0; + + __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" :: + "a"(a), "d"(d), "c"(msr) : "memory"); +} + +extern char em_rdmsr_start, em_rdmsr_end; +extern char em_wrmsr_start, em_wrmsr_end; + +static void guest_code(void) +{ + uint64_t data; + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS. + * + * A GP is thrown if anything other than 0 is written to + * MSR_IA32_XSS. + */ + data = test_rdmsr(MSR_IA32_XSS); + GUEST_ASSERT(data == 0); + GUEST_ASSERT(guest_exception_count == 0); + + test_wrmsr(MSR_IA32_XSS, 0); + GUEST_ASSERT(guest_exception_count == 0); + + test_wrmsr(MSR_IA32_XSS, 1); + GUEST_ASSERT(guest_exception_count == 1); + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD. + * + * A GP is thrown if MSR_IA32_FLUSH_CMD is read + * from or if a value other than 1 is written to it. + */ + test_rdmsr(MSR_IA32_FLUSH_CMD); + GUEST_ASSERT(guest_exception_count == 1); + + test_wrmsr(MSR_IA32_FLUSH_CMD, 0); + GUEST_ASSERT(guest_exception_count == 1); + + test_wrmsr(MSR_IA32_FLUSH_CMD, 1); + GUEST_ASSERT(guest_exception_count == 0); + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT. + * + * Test that a fabricated MSR can pass through the kernel + * and be handled in userspace. + */ + test_wrmsr(MSR_NON_EXISTENT, 2); + GUEST_ASSERT(guest_exception_count == 0); + + data = test_rdmsr(MSR_NON_EXISTENT); + GUEST_ASSERT(data == 2); + GUEST_ASSERT(guest_exception_count == 0); + + /* + * Test to see if the instruction emulator is available (ie: the module + * parameter 'kvm.force_emulation_prefix=1' is set). This instruction + * will #UD if it isn't available. + */ + __asm__ __volatile__(KVM_FEP "nop"); + + if (fep_available) { + /* Let userspace know we aren't done. */ + GUEST_SYNC(0); + + /* + * Now run the same tests with the instruction emulator. + */ + data = test_em_rdmsr(MSR_IA32_XSS); + GUEST_ASSERT(data == 0); + GUEST_ASSERT(guest_exception_count == 0); + test_em_wrmsr(MSR_IA32_XSS, 0); + GUEST_ASSERT(guest_exception_count == 0); + test_em_wrmsr(MSR_IA32_XSS, 1); + GUEST_ASSERT(guest_exception_count == 1); + + test_em_rdmsr(MSR_IA32_FLUSH_CMD); + GUEST_ASSERT(guest_exception_count == 1); + test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0); + GUEST_ASSERT(guest_exception_count == 1); + test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1); + GUEST_ASSERT(guest_exception_count == 0); + + test_em_wrmsr(MSR_NON_EXISTENT, 2); + GUEST_ASSERT(guest_exception_count == 0); + data = test_em_rdmsr(MSR_NON_EXISTENT); + GUEST_ASSERT(data == 2); + GUEST_ASSERT(guest_exception_count == 0); + } + + GUEST_DONE(); +} + + +static void guest_code_permission_bitmap(void) +{ + uint64_t data; + + test_wrmsr(MSR_FS_BASE, 0); + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data == MSR_FS_BASE); + + test_wrmsr(MSR_GS_BASE, 0); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data == 0); + + /* Let userspace know to switch the filter */ + GUEST_SYNC(0); + + test_wrmsr(MSR_FS_BASE, 0); + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data == 0); + + test_wrmsr(MSR_GS_BASE, 0); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data == MSR_GS_BASE); + + GUEST_DONE(); +} + +static void __guest_gp_handler(struct ex_regs *regs, + char *r_start, char *r_end, + char *w_start, char *w_end) +{ + if (regs->rip == (uintptr_t)r_start) { + regs->rip = (uintptr_t)r_end; + regs->rax = 0; + regs->rdx = 0; + } else if (regs->rip == (uintptr_t)w_start) { + regs->rip = (uintptr_t)w_end; + } else { + GUEST_ASSERT(!"RIP is at an unknown location!"); + } + + ++guest_exception_count; +} + +static void guest_gp_handler(struct ex_regs *regs) +{ + __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end, + &wrmsr_start, &wrmsr_end); +} + +static void guest_fep_gp_handler(struct ex_regs *regs) +{ + __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end, + &em_wrmsr_start, &em_wrmsr_end); +} + +static void guest_ud_handler(struct ex_regs *regs) +{ + fep_available = 0; + regs->rip += KVM_FEP_LENGTH; +} + +static void run_guest(struct kvm_vm *vm) +{ + int rc; + + rc = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); +} + +static void check_for_guest_assert(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + if (run->exit_reason == KVM_EXIT_IO && + get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) { + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + } +} + +static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->msr.index == msr_index, + "Unexpected msr (0x%04x), expected 0x%04x", + run->msr.index, msr_index); + + switch (run->msr.index) { + case MSR_IA32_XSS: + run->msr.data = 0; + break; + case MSR_IA32_FLUSH_CMD: + run->msr.error = 1; + break; + case MSR_NON_EXISTENT: + run->msr.data = msr_non_existent_data; + break; + case MSR_FS_BASE: + run->msr.data = MSR_FS_BASE; + break; + case MSR_GS_BASE: + run->msr.data = MSR_GS_BASE; + break; + default: + TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); + } +} + +static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->msr.index == msr_index, + "Unexpected msr (0x%04x), expected 0x%04x", + run->msr.index, msr_index); + + switch (run->msr.index) { + case MSR_IA32_XSS: + if (run->msr.data != 0) + run->msr.error = 1; + break; + case MSR_IA32_FLUSH_CMD: + if (run->msr.data != 1) + run->msr.error = 1; + break; + case MSR_NON_EXISTENT: + msr_non_existent_data = run->msr.data; + break; + case MSR_FS_BASE: + case MSR_GS_BASE: + break; + default: + TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); + } +} + +static void process_ucall_done(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE, + "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", + uc.cmd, UCALL_DONE); +} + +static uint64_t process_ucall(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc = {}; + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + break; + case UCALL_ABORT: + check_for_guest_assert(vm); + break; + case UCALL_DONE: + process_ucall_done(vm); + break; + default: + TEST_ASSERT(false, "Unexpected ucall"); + } + + return uc.cmd; +} + +static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + run_guest(vm); + process_rdmsr(vm, msr_index); +} + +static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + run_guest(vm); + process_wrmsr(vm, msr_index); +} + +static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm) +{ + run_guest(vm); + return process_ucall(vm); +} + +static void run_guest_then_process_ucall_done(struct kvm_vm *vm) +{ + run_guest(vm); + process_ucall_done(vm); +} + +static void test_msr_filter(void) { + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + int rc; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + /* Process guest code userspace exits. */ + run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + + run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + + run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); + + vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + run_guest(vm); + vm_handle_exception(vm, UD_VECTOR, NULL); + + if (process_ucall(vm) != UCALL_DONE) { + vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler); + + /* Process emulated rdmsr and wrmsr instructions. */ + run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + + run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + + run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); + + /* Confirm the guest completed without issues. */ + run_guest_then_process_ucall_done(vm); + } else { + printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n"); + } + + kvm_vm_free(vm); +} + +static void test_msr_permission_bitmap(void) { + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + int rc; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs); + run_guest_then_process_wrmsr(vm, MSR_FS_BASE); + run_guest_then_process_rdmsr(vm, MSR_FS_BASE); + TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC."); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); + run_guest_then_process_wrmsr(vm, MSR_GS_BASE); + run_guest_then_process_rdmsr(vm, MSR_GS_BASE); + run_guest_then_process_ucall_done(vm); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + test_msr_filter(); + + test_msr_permission_bitmap(); + + return 0; +} -- GitLab From fb6360534ecc0a2703f7b6076cf1397385d23df8 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Fri, 4 Dec 2020 09:25:31 -0800 Subject: [PATCH 0512/1894] selftests: kvm: Merge user_msr_test into userspace_msr_exit_test Both user_msr_test and userspace_msr_exit_test tests the functionality of kvm_msr_filter. Instead of testing this feature in two tests, merge them together, so there is only one test for this feature. Signed-off-by: Aaron Lewis Message-Id: <20201204172530.2958493-1-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 - tools/testing/selftests/kvm/Makefile | 1 - .../selftests/kvm/x86_64/user_msr_test.c | 251 ----------------- .../kvm/x86_64/userspace_msr_exit_test.c | 262 ++++++++++++++++-- 4 files changed, 236 insertions(+), 279 deletions(-) delete mode 100644 tools/testing/selftests/kvm/x86_64/user_msr_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 5008bf90772be..ce8f4ad39684c 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -17,7 +17,6 @@ /x86_64/svm_vmcall_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test -/x86_64/user_msr_test /x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index d54870db21c41..86ccca8ca89dd 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -50,7 +50,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c deleted file mode 100644 index fe88b98908ee1..0000000000000 --- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c +++ /dev/null @@ -1,251 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER - * - * Copyright (C) 2020, Amazon Inc. - * - * This is a functional test to verify that we can deflect MSR events - * into user space. - */ -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -#define VCPU_ID 5 - -static u32 msr_reads, msr_writes; - -static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_deadbeef[1] = { 0x1 }; - -static void deny_msr(uint8_t *bitmap, u32 msr) -{ - u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); - - bitmap[idx / 8] &= ~(1 << (idx % 8)); -} - -static void prepare_bitmaps(void) -{ - memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); - memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); - memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); - memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); - memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); - - deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); - deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); - deny_msr(bitmap_c0000000_read, MSR_GS_BASE); -} - -struct kvm_msr_filter filter = { - .flags = KVM_MSR_FILTER_DEFAULT_DENY, - .ranges = { - { - .flags = KVM_MSR_FILTER_READ, - .base = 0x00000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_00000000, - }, { - .flags = KVM_MSR_FILTER_WRITE, - .base = 0x00000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_00000000_write, - }, { - .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, - .base = 0x40000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_40000000, - }, { - .flags = KVM_MSR_FILTER_READ, - .base = 0xc0000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_c0000000_read, - }, { - .flags = KVM_MSR_FILTER_WRITE, - .base = 0xc0000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_c0000000, - }, { - .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, - .base = 0xdeadbeef, - .nmsrs = 1, - .bitmap = bitmap_deadbeef, - }, - }, -}; - -struct kvm_msr_filter no_filter = { - .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, -}; - -static void guest_msr_calls(bool trapped) -{ - /* This goes into the in-kernel emulation */ - wrmsr(MSR_SYSCALL_MASK, 0); - - if (trapped) { - /* This goes into user space emulation */ - GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); - GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); - } else { - GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); - GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); - } - - /* If trapped == true, this goes into user space emulation */ - wrmsr(MSR_IA32_POWER_CTL, 0x1234); - - /* This goes into the in-kernel emulation */ - rdmsr(MSR_IA32_POWER_CTL); - - /* Invalid MSR, should always be handled by user space exit */ - GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); - wrmsr(0xdeadbeef, 0x1234); -} - -static void guest_code(void) -{ - guest_msr_calls(true); - - /* - * Disable msr filtering, so that the kernel - * handles everything in the next round - */ - GUEST_SYNC(0); - - guest_msr_calls(false); - - GUEST_DONE(); -} - -static int handle_ucall(struct kvm_vm *vm) -{ - struct ucall uc; - - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_ABORT: - TEST_FAIL("Guest assertion not met"); - break; - case UCALL_SYNC: - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter); - break; - case UCALL_DONE: - return 1; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - return 0; -} - -static void handle_rdmsr(struct kvm_run *run) -{ - run->msr.data = run->msr.index; - msr_reads++; - - if (run->msr.index == MSR_SYSCALL_MASK || - run->msr.index == MSR_GS_BASE) { - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, - "MSR read trap w/o access fault"); - } - - if (run->msr.index == 0xdeadbeef) { - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, - "MSR deadbeef read trap w/o inval fault"); - } -} - -static void handle_wrmsr(struct kvm_run *run) -{ - /* ignore */ - msr_writes++; - - if (run->msr.index == MSR_IA32_POWER_CTL) { - TEST_ASSERT(run->msr.data == 0x1234, - "MSR data for MSR_IA32_POWER_CTL incorrect"); - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, - "MSR_IA32_POWER_CTL trap w/o access fault"); - } - - if (run->msr.index == 0xdeadbeef) { - TEST_ASSERT(run->msr.data == 0x1234, - "MSR data for deadbeef incorrect"); - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, - "deadbeef trap w/o inval fault"); - } -} - -int main(int argc, char *argv[]) -{ - struct kvm_enable_cap cap = { - .cap = KVM_CAP_X86_USER_SPACE_MSR, - .args[0] = KVM_MSR_EXIT_REASON_INVAL | - KVM_MSR_EXIT_REASON_UNKNOWN | - KVM_MSR_EXIT_REASON_FILTER, - }; - struct kvm_vm *vm; - struct kvm_run *run; - int rc; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); - - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - if (!rc) { - print_skip("KVM_CAP_X86_USER_SPACE_MSR not supported"); - exit(KSFT_SKIP); - } - - vm_enable_cap(vm, &cap); - - rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); - TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - - prepare_bitmaps(); - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); - - while (1) { - rc = _vcpu_run(vm, VCPU_ID); - - TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); - - switch (run->exit_reason) { - case KVM_EXIT_X86_RDMSR: - handle_rdmsr(run); - break; - case KVM_EXIT_X86_WRMSR: - handle_wrmsr(run); - break; - case KVM_EXIT_IO: - if (handle_ucall(vm)) - goto done; - break; - } - - } - -done: - TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); - TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index e8b6918cdea0f..72c0d07975221 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -20,8 +20,8 @@ static int fep_available = 1; #define VCPU_ID 1 #define MSR_NON_EXISTENT 0x474f4f00 -u64 deny_bits = 0; -struct kvm_msr_filter filter = { +static u64 deny_bits = 0; +struct kvm_msr_filter filter_allow = { .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, .ranges = { { @@ -53,8 +53,7 @@ struct kvm_msr_filter filter_fs = { .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, .ranges = { { - .flags = KVM_MSR_FILTER_READ | - KVM_MSR_FILTER_WRITE, + .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_FS_BASE, .bitmap = (uint8_t*)&deny_bits, @@ -66,8 +65,7 @@ struct kvm_msr_filter filter_gs = { .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, .ranges = { { - .flags = KVM_MSR_FILTER_READ | - KVM_MSR_FILTER_WRITE, + .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_GS_BASE, .bitmap = (uint8_t*)&deny_bits, @@ -75,8 +73,77 @@ struct kvm_msr_filter filter_gs = { }, }; -uint64_t msr_non_existent_data; -int guest_exception_count; +static uint64_t msr_non_existent_data; +static int guest_exception_count; +static u32 msr_reads, msr_writes; + +static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_deadbeef[1] = { 0x1 }; + +static void deny_msr(uint8_t *bitmap, u32 msr) +{ + u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); + + bitmap[idx / 8] &= ~(1 << (idx % 8)); +} + +static void prepare_bitmaps(void) +{ + memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); + memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); + memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); + memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); + memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); + + deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); + deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); + deny_msr(bitmap_c0000000_read, MSR_GS_BASE); +} + +struct kvm_msr_filter filter_deny = { + .flags = KVM_MSR_FILTER_DEFAULT_DENY, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000_write, + }, { + .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, + .base = 0x40000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_40000000, + }, { + .flags = KVM_MSR_FILTER_READ, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000_read, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000, + }, { + .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, + .base = 0xdeadbeef, + .nmsrs = 1, + .bitmap = bitmap_deadbeef, + }, + }, +}; + +struct kvm_msr_filter no_filter_deny = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, +}; /* * Note: Force test_rdmsr() to not be inlined to prevent the labels, @@ -146,7 +213,7 @@ static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) extern char em_rdmsr_start, em_rdmsr_end; extern char em_wrmsr_start, em_wrmsr_end; -static void guest_code(void) +static void guest_code_filter_allow(void) { uint64_t data; @@ -233,27 +300,60 @@ static void guest_code(void) GUEST_DONE(); } +static void guest_msr_calls(bool trapped) +{ + /* This goes into the in-kernel emulation */ + wrmsr(MSR_SYSCALL_MASK, 0); + + if (trapped) { + /* This goes into user space emulation */ + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); + } else { + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); + } + + /* If trapped == true, this goes into user space emulation */ + wrmsr(MSR_IA32_POWER_CTL, 0x1234); + + /* This goes into the in-kernel emulation */ + rdmsr(MSR_IA32_POWER_CTL); + + /* Invalid MSR, should always be handled by user space exit */ + GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); + wrmsr(0xdeadbeef, 0x1234); +} + +static void guest_code_filter_deny(void) +{ + guest_msr_calls(true); + + /* + * Disable msr filtering, so that the kernel + * handles everything in the next round + */ + GUEST_SYNC(0); + + guest_msr_calls(false); + + GUEST_DONE(); +} static void guest_code_permission_bitmap(void) { uint64_t data; - test_wrmsr(MSR_FS_BASE, 0); data = test_rdmsr(MSR_FS_BASE); GUEST_ASSERT(data == MSR_FS_BASE); - - test_wrmsr(MSR_GS_BASE, 0); data = test_rdmsr(MSR_GS_BASE); - GUEST_ASSERT(data == 0); + GUEST_ASSERT(data != MSR_GS_BASE); /* Let userspace know to switch the filter */ GUEST_SYNC(0); - test_wrmsr(MSR_FS_BASE, 0); data = test_rdmsr(MSR_FS_BASE); - GUEST_ASSERT(data == 0); - - test_wrmsr(MSR_GS_BASE, 0); + GUEST_ASSERT(data != MSR_FS_BASE); data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); @@ -376,9 +476,6 @@ static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) case MSR_NON_EXISTENT: msr_non_existent_data = run->msr.data; break; - case MSR_FS_BASE: - case MSR_GS_BASE: - break; default: TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); } @@ -453,7 +550,7 @@ static void run_guest_then_process_ucall_done(struct kvm_vm *vm) process_ucall_done(vm); } -static void test_msr_filter(void) { +static void test_msr_filter_allow(void) { struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR, .args[0] = KVM_MSR_EXIT_REASON_FILTER, @@ -462,7 +559,7 @@ static void test_msr_filter(void) { int rc; /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); @@ -472,7 +569,7 @@ static void test_msr_filter(void) { rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); @@ -519,6 +616,116 @@ static void test_msr_filter(void) { kvm_vm_free(vm); } +static int handle_ucall(struct kvm_vm *vm) +{ + struct ucall uc; + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("Guest assertion not met"); + break; + case UCALL_SYNC: + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny); + break; + case UCALL_DONE: + return 1; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + return 0; +} + +static void handle_rdmsr(struct kvm_run *run) +{ + run->msr.data = run->msr.index; + msr_reads++; + + if (run->msr.index == MSR_SYSCALL_MASK || + run->msr.index == MSR_GS_BASE) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR read trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "MSR deadbeef read trap w/o inval fault"); + } +} + +static void handle_wrmsr(struct kvm_run *run) +{ + /* ignore */ + msr_writes++; + + if (run->msr.index == MSR_IA32_POWER_CTL) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for MSR_IA32_POWER_CTL incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR_IA32_POWER_CTL trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for deadbeef incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "deadbeef trap w/o inval fault"); + } +} + +static void test_msr_filter_deny(void) { + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + struct kvm_run *run; + int rc; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + prepare_bitmaps(); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); + + switch (run->exit_reason) { + case KVM_EXIT_X86_RDMSR: + handle_rdmsr(run); + break; + case KVM_EXIT_X86_WRMSR: + handle_wrmsr(run); + break; + case KVM_EXIT_IO: + if (handle_ucall(vm)) + goto done; + break; + } + + } + +done: + TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); + TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); + + kvm_vm_free(vm); +} + static void test_msr_permission_bitmap(void) { struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR, @@ -539,11 +746,9 @@ static void test_msr_permission_bitmap(void) { TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs); - run_guest_then_process_wrmsr(vm, MSR_FS_BASE); run_guest_then_process_rdmsr(vm, MSR_FS_BASE); TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); - run_guest_then_process_wrmsr(vm, MSR_GS_BASE); run_guest_then_process_rdmsr(vm, MSR_GS_BASE); run_guest_then_process_ucall_done(vm); @@ -552,7 +757,12 @@ static void test_msr_permission_bitmap(void) { int main(int argc, char *argv[]) { - test_msr_filter(); + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + test_msr_filter_allow(); + + test_msr_filter_deny(); test_msr_permission_bitmap(); -- GitLab From e1b35da5e624f8b09d2e98845c2e4c84b179d9a4 Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Mon, 7 Dec 2020 19:34:40 -0800 Subject: [PATCH 0513/1894] x86: Enumerate AVX512 FP16 CPUID feature flag Enumerate AVX512 Half-precision floating point (FP16) CPUID feature flag. Compared with using FP32, using FP16 cut the number of bits required for storage in half, reducing the exponent from 8 bits to 5, and the mantissa from 23 bits to 10. Using FP16 also enables developers to train and run inference on deep learning models fast when all precision or magnitude (FP32) is not needed. A processor supports AVX512 FP16 if CPUID.(EAX=7,ECX=0):EDX[bit 23] is present. The AVX512 FP16 requires AVX512BW feature be implemented since the instructions for manipulating 32bit masks are associated with AVX512BW. The only in-kernel usage of this is kvm passthrough. The CPU feature flag is shown as "avx512_fp16" in /proc/cpuinfo. Signed-off-by: Kyung Min Park Acked-by: Dave Hansen Reviewed-by: Tony Luck Message-Id: <20201208033441.28207-2-kyung.min.park@intel.com> Acked-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/cpuid-deps.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dad350d42ecfb..b9dc6a56d360a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -374,6 +374,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index d502241995a39..42af31b64c2ce 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, {} -- GitLab From 2224fc9efb2d6593fbfb57287e39ba4958b188ba Mon Sep 17 00:00:00 2001 From: Cathy Zhang Date: Mon, 7 Dec 2020 19:34:41 -0800 Subject: [PATCH 0514/1894] KVM: x86: Expose AVX512_FP16 for supported CPUID AVX512_FP16 is supported by Intel processors, like Sapphire Rapids. It could gain better performance for it's faster compared to FP32 if the precision or magnitude requirements are met. It's availability is indicated by CPUID.(EAX=7,ECX=0):EDX[bit 23]. Expose it in KVM supported CPUID, then guest could make use of it; no new registers are used, only new instructions. Signed-off-by: Cathy Zhang Signed-off-by: Kyung Min Park Acked-by: Dave Hansen Reviewed-by: Tony Luck Message-Id: <20201208033441.28207-3-kyung.min.park@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5d352cc204cec..a22a3108b5f0f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -419,7 +419,7 @@ void kvm_set_cpu_caps(void) F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) + F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ -- GitLab From 39485ed95d6b83b62fa75c06c2c4d33992e0d971 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Dec 2020 09:40:15 -0500 Subject: [PATCH 0515/1894] KVM: x86: reinstate vendor-agnostic check on SPEC_CTRL cpuid bits Until commit e7c587da1252 ("x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP"), KVM was testing both Intel and AMD CPUID bits before allowing the guest to write MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD. Testing only Intel bits on VMX processors, or only AMD bits on SVM processors, fails if the guests are created with the "opposite" vendor as the host. While at it, also tweak the host CPU check to use the vendor-agnostic feature bit X86_FEATURE_IBPB, since we only care about the availability of the MSR on the host here and not about specific CPUID bits. Fixes: e7c587da1252 ("x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP") Cc: stable@vger.kernel.org Reported-by: Denis V. Lunev Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.h | 14 ++++++++++++++ arch/x86/kvm/svm/svm.c | 14 ++++---------- arch/x86/kvm/vmx/vmx.c | 8 ++++---- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f7a6e8f83783c..dc921d76e42e8 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -264,6 +264,20 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) return x86_stepping(best->eax); } +static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)); +} + +static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)); +} + static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu) { return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6dc337b9c2319..0e52fac4f5ae9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2545,10 +2545,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; msr_info->data = svm->spec_ctrl; @@ -2632,10 +2629,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; if (kvm_spec_ctrl_test_value(data)) @@ -2660,12 +2654,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) + !guest_has_pred_cmd_msr(vcpu)) return 1; if (data & ~PRED_CMD_IBPB) return 1; - if (!boot_cpu_has(X86_FEATURE_AMD_IBPB)) + if (!boot_cpu_has(X86_FEATURE_IBPB)) return 1; if (!data) break; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c3441e7e5a87b..4b854a197e44e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1826,7 +1826,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -2028,7 +2028,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; if (kvm_spec_ctrl_test_value(data)) @@ -2063,12 +2063,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) goto find_uret_msr; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_pred_cmd_msr(vcpu)) return 1; if (data & ~PRED_CMD_IBPB) return 1; - if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (!boot_cpu_has(X86_FEATURE_IBPB)) return 1; if (!data) break; -- GitLab From 33114c4359592d5c8a3d840eee9ff40039caf26f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 13 Dec 2020 01:54:29 +0900 Subject: [PATCH 0516/1894] kbuild: do not use scripts/ld-version.sh for checking spatch version scripts/ld-version.sh was, as its file name implies, originally intended for the GNU ld version, but is (ab)used for the spatch version too. Use 'sort -CV' for the version comparison, then coccicheck does not need to use scripts/ld-version.sh. Fix nsdeps as well. Signed-off-by: Masahiro Yamada Signed-off-by: Julia Lawall --- scripts/coccicheck | 14 +++++--------- scripts/nsdeps | 4 +--- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/scripts/coccicheck b/scripts/coccicheck index 209bb0427b43c..d7f6b7ff130ab 100755 --- a/scripts/coccicheck +++ b/scripts/coccicheck @@ -16,7 +16,6 @@ if [ ! -x "$SPATCH" ]; then fi SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}') -SPATCH_VERSION_NUM=$(echo $SPATCH_VERSION | ${DIR}/scripts/ld-version.sh) USE_JOBS="no" $SPATCH --help | grep "\-\-jobs" > /dev/null && USE_JOBS="yes" @@ -186,14 +185,11 @@ coccinelle () { OPT=`grep "Options:" $COCCI | cut -d':' -f2` REQ=`grep "Requires:" $COCCI | cut -d':' -f2 | sed "s| ||"` - REQ_NUM=$(echo $REQ | ${DIR}/scripts/ld-version.sh) - if [ "$REQ_NUM" != "0" ] ; then - if [ "$SPATCH_VERSION_NUM" -lt "$REQ_NUM" ] ; then - echo "Skipping coccinelle SmPL patch: $COCCI" - echo "You have coccinelle: $SPATCH_VERSION" - echo "This SmPL patch requires: $REQ" - return - fi + if [ -n "$REQ" ] && ! { echo "$REQ"; echo "$SPATCH_VERSION"; } | sort -CV ; then + echo "Skipping coccinelle SmPL patch: $COCCI" + echo "You have coccinelle: $SPATCH_VERSION" + echo "This SmPL patch requires: $REQ" + return fi # The option '--parse-cocci' can be used to syntactically check the SmPL files. diff --git a/scripts/nsdeps b/scripts/nsdeps index dab4c1a0e27dd..e8ce2a4d704ab 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -12,11 +12,9 @@ if [ ! -x "$SPATCH" ]; then exit 1 fi -SPATCH_REQ_VERSION_NUM=$(echo $SPATCH_REQ_VERSION | ${DIR}/scripts/ld-version.sh) SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}') -SPATCH_VERSION_NUM=$(echo $SPATCH_VERSION | ${DIR}/scripts/ld-version.sh) -if [ "$SPATCH_VERSION_NUM" -lt "$SPATCH_REQ_VERSION_NUM" ] ; then +if ! { echo "$SPATCH_REQ_VERSION"; echo "$SPATCH_VERSION"; } | sort -CV ; then echo "spatch needs to be version $SPATCH_REQ_VERSION or higher" exit 1 fi -- GitLab From 7f6f1dfb2dcbe5d2bfa213f2df5d74c147cd5954 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 8 Nov 2020 08:25:49 -0800 Subject: [PATCH 0517/1894] watchdog: armada_37xx: Add missing dependency on HAS_IOMEM The following kbuild warning is seen on a system without HAS_IOMEM. WARNING: unmet direct dependencies detected for MFD_SYSCON Depends on [n]: HAS_IOMEM [=n] Selected by [y]: - ARMADA_37XX_WATCHDOG [=y] && WATCHDOG [=y] && (ARCH_MVEBU || COMPILE_TEST This results in a subsequent compile error. drivers/watchdog/armada_37xx_wdt.o: in function `armada_37xx_wdt_probe': armada_37xx_wdt.c:(.text+0xdc): undefined reference to `devm_ioremap' Add the missing dependency. Reported-by: Necip Fazil Yildiran Fixes: 54e3d9b518c8 ("watchdog: Add support for Armada 37xx CPU watchdog") Link: https://lore.kernel.org/r/20201108162550.27660-1-linux@roeck-us.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index fd7968635e6df..b3e8bdaa2a112 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -386,6 +386,7 @@ config ARM_SBSA_WATCHDOG config ARMADA_37XX_WATCHDOG tristate "Armada 37xx watchdog" depends on ARCH_MVEBU || COMPILE_TEST + depends on HAS_IOMEM select MFD_SYSCON select WATCHDOG_CORE help -- GitLab From 8ae2511112d2e18bc7d324b77f965d34083a25a2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 8 Nov 2020 08:25:50 -0800 Subject: [PATCH 0518/1894] watchdog: sirfsoc: Add missing dependency on HAS_IOMEM If HAS_IOMEM is not defined and SIRFSOC_WATCHDOG is enabled, the build fails with the following error. drivers/watchdog/sirfsoc_wdt.o: in function `sirfsoc_wdt_probe': sirfsoc_wdt.c:(.text+0x112): undefined reference to `devm_platform_ioremap_resource' Reported-by: Necip Fazil Yildiran Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible") Link: https://lore.kernel.org/r/20201108162550.27660-2-linux@roeck-us.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b3e8bdaa2a112..f8e9be65036ae 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -790,6 +790,7 @@ config MOXART_WDT config SIRFSOC_WATCHDOG tristate "SiRFSOC watchdog" + depends on HAS_IOMEM depends on ARCH_SIRF || COMPILE_TEST select WATCHDOG_CORE default y -- GitLab From f61a59acb462840bebcc192f754fe71b6a16ff99 Mon Sep 17 00:00:00 2001 From: Lingling Xu Date: Thu, 29 Oct 2020 10:39:31 +0800 Subject: [PATCH 0519/1894] watchdog: sprd: remove watchdog disable from resume fail path sprd_wdt_start() would return fail if the loading operation is not completed in a certain time, disabling watchdog for that case would probably cause the kernel crash when kick watchdog later, that's too bad, so remove the watchdog disable operation for the fail case to make sure other parts in the kernel can run normally. [ chunyan: Massaged changelog ] Fixes: 477603467009 ("watchdog: Add Spreadtrum watchdog driver") Signed-off-by: Lingling Xu Signed-off-by: Chunyan Zhang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201029023933.24548-2-zhang.lyra@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sprd_wdt.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c index 65cb55f3916fc..f3c90b4afead1 100644 --- a/drivers/watchdog/sprd_wdt.c +++ b/drivers/watchdog/sprd_wdt.c @@ -345,15 +345,10 @@ static int __maybe_unused sprd_wdt_pm_resume(struct device *dev) if (ret) return ret; - if (watchdog_active(&wdt->wdd)) { + if (watchdog_active(&wdt->wdd)) ret = sprd_wdt_start(&wdt->wdd); - if (ret) { - sprd_wdt_disable(wdt); - return ret; - } - } - return 0; + return ret; } static const struct dev_pm_ops sprd_wdt_pm_ops = { -- GitLab From 3e07d240939803bed9feb2a353d94686a411a7ca Mon Sep 17 00:00:00 2001 From: Lingling Xu Date: Mon, 9 Nov 2020 11:00:54 +0800 Subject: [PATCH 0520/1894] watchdog: sprd: check busy bit before new loading rather than after that As the specification described, users must check busy bit before start a new loading operation to make sure that the previous loading is done and the device is ready to accept a new one. [ chunyan: Massaged changelog ] Fixes: 477603467009 ("watchdog: Add Spreadtrum watchdog driver") Signed-off-by: Lingling Xu Signed-off-by: Chunyan Zhang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201029023933.24548-3-zhang.lyra@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sprd_wdt.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c index f3c90b4afead1..b9b1daa9e2a4c 100644 --- a/drivers/watchdog/sprd_wdt.c +++ b/drivers/watchdog/sprd_wdt.c @@ -108,18 +108,6 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout, u32 tmr_step = timeout * SPRD_WDT_CNT_STEP; u32 prtmr_step = pretimeout * SPRD_WDT_CNT_STEP; - sprd_wdt_unlock(wdt->base); - writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & - SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH); - writel_relaxed((tmr_step & SPRD_WDT_LOW_VALUE_MASK), - wdt->base + SPRD_WDT_LOAD_LOW); - writel_relaxed((prtmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & - SPRD_WDT_LOW_VALUE_MASK, - wdt->base + SPRD_WDT_IRQ_LOAD_HIGH); - writel_relaxed(prtmr_step & SPRD_WDT_LOW_VALUE_MASK, - wdt->base + SPRD_WDT_IRQ_LOAD_LOW); - sprd_wdt_lock(wdt->base); - /* * Waiting the load value operation done, * it needs two or three RTC clock cycles. @@ -134,6 +122,19 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout, if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT) return -EBUSY; + + sprd_wdt_unlock(wdt->base); + writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & + SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH); + writel_relaxed((tmr_step & SPRD_WDT_LOW_VALUE_MASK), + wdt->base + SPRD_WDT_LOAD_LOW); + writel_relaxed((prtmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & + SPRD_WDT_LOW_VALUE_MASK, + wdt->base + SPRD_WDT_IRQ_LOAD_HIGH); + writel_relaxed(prtmr_step & SPRD_WDT_LOW_VALUE_MASK, + wdt->base + SPRD_WDT_IRQ_LOAD_LOW); + sprd_wdt_lock(wdt->base); + return 0; } -- GitLab From 2a6c9c65b2fe1023f8bec543d3c70a107fd8b9fb Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 9 Nov 2020 11:00:55 +0800 Subject: [PATCH 0521/1894] watchdog: sprd: change to use usleep_range() instead of busy loop After changing to check busy bit for the previous loading operation instead of the current one, for most of cases, the busy bit is not set for the first time of read, so there's no need to check so frequently, so this patch use usleep_range() to replace cpu_relax() to avoid busy loop. Also this patch change the max times to 11 which would be enough, since according to the specification, the busy bit would be set after a new loading operation and last 2 or 3 RTC clock cycles (about 60us~92us). Fixes: 477603467009 ("watchdog: Add Spreadtrum watchdog driver") Original-by: Lingling Xu Signed-off-by: Chunyan Zhang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201029023933.24548-4-zhang.lyra@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sprd_wdt.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c index b9b1daa9e2a4c..4e689b6ff1418 100644 --- a/drivers/watchdog/sprd_wdt.c +++ b/drivers/watchdog/sprd_wdt.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -53,7 +54,7 @@ #define SPRD_WDT_CNT_HIGH_SHIFT 16 #define SPRD_WDT_LOW_VALUE_MASK GENMASK(15, 0) -#define SPRD_WDT_LOAD_TIMEOUT 1000 +#define SPRD_WDT_LOAD_TIMEOUT 11 struct sprd_wdt { void __iomem *base; @@ -109,15 +110,17 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout, u32 prtmr_step = pretimeout * SPRD_WDT_CNT_STEP; /* - * Waiting the load value operation done, - * it needs two or three RTC clock cycles. + * Checking busy bit to make sure the previous loading operation is + * done. According to the specification, the busy bit would be set + * after a new loading operation and last 2 or 3 RTC clock + * cycles (about 60us~92us). */ do { val = readl_relaxed(wdt->base + SPRD_WDT_INT_RAW); if (!(val & SPRD_WDT_LD_BUSY_BIT)) break; - cpu_relax(); + usleep_range(10, 100); } while (delay_cnt++ < SPRD_WDT_LOAD_TIMEOUT); if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT) -- GitLab From 7c7164f935c8190af7e3663f4e82edc0607dc3a4 Mon Sep 17 00:00:00 2001 From: Etienne Carriere Date: Fri, 6 Nov 2020 15:23:27 +0100 Subject: [PATCH 0522/1894] watchdog: stm32_iwdg: don't print an error on probe deferral Do not print an error trace when deferring probe for clock resources. Signed-off-by: Etienne Carriere Signed-off-by: Christophe Roullier Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201106142327.3129-2-christophe.roullier@st.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/stm32_iwdg.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index 25188d6bbe152..a3436c296c97b 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -162,18 +162,15 @@ static int stm32_iwdg_clk_init(struct platform_device *pdev, u32 ret; wdt->clk_lsi = devm_clk_get(dev, "lsi"); - if (IS_ERR(wdt->clk_lsi)) { - dev_err(dev, "Unable to get lsi clock\n"); - return PTR_ERR(wdt->clk_lsi); - } + if (IS_ERR(wdt->clk_lsi)) + return dev_err_probe(dev, PTR_ERR(wdt->clk_lsi), "Unable to get lsi clock\n"); /* optional peripheral clock */ if (wdt->data->has_pclk) { wdt->clk_pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(wdt->clk_pclk)) { - dev_err(dev, "Unable to get pclk clock\n"); - return PTR_ERR(wdt->clk_pclk); - } + if (IS_ERR(wdt->clk_pclk)) + return dev_err_probe(dev, PTR_ERR(wdt->clk_pclk), + "Unable to get pclk clock\n"); ret = clk_prepare_enable(wdt->clk_pclk); if (ret) { -- GitLab From 4600736f050f210bcdaafd2ef730ad736da9bc0c Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 6 Nov 2020 14:05:07 +0100 Subject: [PATCH 0523/1894] watchdog: remove pnx83xx driver Commit 625326ea9c84 ("MIPS: Remove PNX833x alias NXP_STB22x") removed support for PNX833x, so it's time to remove watchdog driver, too. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201106130508.103598-1-tsbogend@alpha.franken.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 10 -- drivers/watchdog/Makefile | 1 - drivers/watchdog/pnx833x_wdt.c | 277 --------------------------------- 3 files changed, 288 deletions(-) delete mode 100644 drivers/watchdog/pnx833x_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index f8e9be65036ae..5d3c3ec918a80 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1698,16 +1698,6 @@ config WDT_MTX1 Hardware driver for the MTX-1 boards. This is a watchdog timer that will reboot the machine after a 100 seconds timer expired. -config PNX833X_WDT - tristate "PNX833x Hardware Watchdog" - depends on SOC_PNX8335 - depends on BROKEN - help - Hardware driver for the PNX833x's watchdog. This is a - watchdog timer that will reboot the machine after a programmable - timer has expired and no process has written to /dev/watchdog during - that time. - config SIBYTE_WDOG tristate "Sibyte SoC hardware watchdog" depends on CPU_SB1 || (MIPS && COMPILE_TEST) diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 071a2e50be980..5c74ee19d4411 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -161,7 +161,6 @@ obj-$(CONFIG_RC32434_WDT) += rc32434_wdt.o obj-$(CONFIG_INDYDOG) += indydog.o obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o obj-$(CONFIG_WDT_MTX1) += mtx-1_wdt.o -obj-$(CONFIG_PNX833X_WDT) += pnx833x_wdt.o obj-$(CONFIG_SIBYTE_WDOG) += sb_wdog.o obj-$(CONFIG_AR7_WDT) += ar7_wdt.o obj-$(CONFIG_TXX9_WDT) += txx9wdt.o diff --git a/drivers/watchdog/pnx833x_wdt.c b/drivers/watchdog/pnx833x_wdt.c deleted file mode 100644 index 4097d076aab87..0000000000000 --- a/drivers/watchdog/pnx833x_wdt.c +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * PNX833x Hardware Watchdog Driver - * Copyright 2008 NXP Semiconductors - * Daniel Laird - * Andre McCurdy - * - * Heavily based upon - IndyDog 0.3 - * A Hardware Watchdog Device for SGI IP22 - * - * (c) Copyright 2002 Guido Guenther , All Rights Reserved. - * - * based on softdog.c by Alan Cox - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define WATCHDOG_TIMEOUT 30 /* 30 sec Maximum timeout */ -#define WATCHDOG_COUNT_FREQUENCY 68000000U /* Watchdog counts at 68MHZ. */ -#define PNX_WATCHDOG_TIMEOUT (WATCHDOG_TIMEOUT * WATCHDOG_COUNT_FREQUENCY) -#define PNX_TIMEOUT_VALUE 2040000000U - -/** CONFIG block */ -#define PNX833X_CONFIG (0x07000U) -#define PNX833X_CONFIG_CPU_WATCHDOG (0x54) -#define PNX833X_CONFIG_CPU_WATCHDOG_COMPARE (0x58) -#define PNX833X_CONFIG_CPU_COUNTERS_CONTROL (0x1c) - -/** RESET block */ -#define PNX833X_RESET (0x08000U) -#define PNX833X_RESET_CONFIG (0x08) - -static int pnx833x_wdt_alive; - -/* Set default timeout in MHZ.*/ -static int pnx833x_wdt_timeout = PNX_WATCHDOG_TIMEOUT; -module_param(pnx833x_wdt_timeout, int, 0); -MODULE_PARM_DESC(timeout, "Watchdog timeout in Mhz. (68Mhz clock), default=" - __MODULE_STRING(PNX_TIMEOUT_VALUE) "(30 seconds)."); - -static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" - __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); - -#define START_DEFAULT 1 -static int start_enabled = START_DEFAULT; -module_param(start_enabled, int, 0); -MODULE_PARM_DESC(start_enabled, "Watchdog is started on module insertion " - "(default=" __MODULE_STRING(START_DEFAULT) ")"); - -static void pnx833x_wdt_start(void) -{ - /* Enable watchdog causing reset. */ - PNX833X_REG(PNX833X_RESET + PNX833X_RESET_CONFIG) |= 0x1; - /* Set timeout.*/ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = pnx833x_wdt_timeout; - /* Enable watchdog. */ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_COUNTERS_CONTROL) |= 0x1; - - pr_info("Started watchdog timer\n"); -} - -static void pnx833x_wdt_stop(void) -{ - /* Disable watchdog causing reset. */ - PNX833X_REG(PNX833X_RESET + PNX833X_CONFIG) &= 0xFFFFFFFE; - /* Disable watchdog.*/ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_COUNTERS_CONTROL) &= 0xFFFFFFFE; - - pr_info("Stopped watchdog timer\n"); -} - -static void pnx833x_wdt_ping(void) -{ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = pnx833x_wdt_timeout; -} - -/* - * Allow only one person to hold it open - */ -static int pnx833x_wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(0, &pnx833x_wdt_alive)) - return -EBUSY; - - if (nowayout) - __module_get(THIS_MODULE); - - /* Activate timer */ - if (!start_enabled) - pnx833x_wdt_start(); - - pnx833x_wdt_ping(); - - pr_info("Started watchdog timer\n"); - - return stream_open(inode, file); -} - -static int pnx833x_wdt_release(struct inode *inode, struct file *file) -{ - /* Shut off the timer. - * Lock it in if it's a module and we defined ...NOWAYOUT */ - if (!nowayout) - pnx833x_wdt_stop(); /* Turn the WDT off */ - - clear_bit(0, &pnx833x_wdt_alive); - return 0; -} - -static ssize_t pnx833x_wdt_write(struct file *file, const char *data, size_t len, loff_t *ppos) -{ - /* Refresh the timer. */ - if (len) - pnx833x_wdt_ping(); - - return len; -} - -static long pnx833x_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int options, new_timeout = 0; - uint32_t timeout, timeout_left = 0; - - static const struct watchdog_info ident = { - .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT, - .firmware_version = 0, - .identity = "Hardware Watchdog for PNX833x", - }; - - switch (cmd) { - default: - return -ENOTTY; - - case WDIOC_GETSUPPORT: - if (copy_to_user((struct watchdog_info *)arg, - &ident, sizeof(ident))) - return -EFAULT; - return 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, (int *)arg); - - case WDIOC_SETOPTIONS: - if (get_user(options, (int *)arg)) - return -EFAULT; - - if (options & WDIOS_DISABLECARD) - pnx833x_wdt_stop(); - - if (options & WDIOS_ENABLECARD) - pnx833x_wdt_start(); - - return 0; - - case WDIOC_KEEPALIVE: - pnx833x_wdt_ping(); - return 0; - - case WDIOC_SETTIMEOUT: - { - if (get_user(new_timeout, (int *)arg)) - return -EFAULT; - - pnx833x_wdt_timeout = new_timeout; - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = new_timeout; - return put_user(new_timeout, (int *)arg); - } - - case WDIOC_GETTIMEOUT: - timeout = PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE); - return put_user(timeout, (int *)arg); - - case WDIOC_GETTIMELEFT: - timeout_left = PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG); - return put_user(timeout_left, (int *)arg); - - } -} - -static int pnx833x_wdt_notify_sys(struct notifier_block *this, - unsigned long code, void *unused) -{ - if (code == SYS_DOWN || code == SYS_HALT) - pnx833x_wdt_stop(); /* Turn the WDT off */ - - return NOTIFY_DONE; -} - -static const struct file_operations pnx833x_wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .write = pnx833x_wdt_write, - .unlocked_ioctl = pnx833x_wdt_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = pnx833x_wdt_open, - .release = pnx833x_wdt_release, -}; - -static struct miscdevice pnx833x_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &pnx833x_wdt_fops, -}; - -static struct notifier_block pnx833x_wdt_notifier = { - .notifier_call = pnx833x_wdt_notify_sys, -}; - -static int __init watchdog_init(void) -{ - int ret, cause; - - /* Lets check the reason for the reset.*/ - cause = PNX833X_REG(PNX833X_RESET); - /*If bit 31 is set then watchdog was cause of reset.*/ - if (cause & 0x80000000) { - pr_info("The system was previously reset due to the watchdog firing - please investigate...\n"); - } - - ret = register_reboot_notifier(&pnx833x_wdt_notifier); - if (ret) { - pr_err("cannot register reboot notifier (err=%d)\n", ret); - return ret; - } - - ret = misc_register(&pnx833x_wdt_miscdev); - if (ret) { - pr_err("cannot register miscdev on minor=%d (err=%d)\n", - WATCHDOG_MINOR, ret); - unregister_reboot_notifier(&pnx833x_wdt_notifier); - return ret; - } - - pr_info("Hardware Watchdog Timer for PNX833x: Version 0.1\n"); - - if (start_enabled) - pnx833x_wdt_start(); - - return 0; -} - -static void __exit watchdog_exit(void) -{ - misc_deregister(&pnx833x_wdt_miscdev); - unregister_reboot_notifier(&pnx833x_wdt_notifier); -} - -module_init(watchdog_init); -module_exit(watchdog_exit); - -MODULE_AUTHOR("Daniel Laird/Andre McCurdy"); -MODULE_DESCRIPTION("Hardware Watchdog Device for PNX833x"); -MODULE_LICENSE("GPL"); -- GitLab From 8650d0f9e9334f2e1c209f1e2ac8341f91e30d75 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Sat, 31 Oct 2020 13:11:15 +0100 Subject: [PATCH 0524/1894] watchdog: qcom_wdt: set WDOG_HW_RUNNING bit when appropriate If the watchdog hardware is enabled/running during boot, e.g. due to a boot loader configuring it, we must tell the watchdog framework about this fact so that it can ping the watchdog until userspace opens the device and takes over control. Do so using the WDOG_HW_RUNNING flag that exists for exactly that use-case. Signed-off-by: Robert Marko Cc: Luka Perkov Reviewed-by: Guenter Roeck Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201031121115.542752-1-robert.marko@sartura.hr Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/qcom-wdt.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index ab7465d186fda..07d399c4edc4a 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -152,6 +152,13 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, return 0; } +static int qcom_wdt_is_running(struct watchdog_device *wdd) +{ + struct qcom_wdt *wdt = to_qcom_wdt(wdd); + + return (readl(wdt_addr(wdt, WDT_EN)) & QCOM_WDT_ENABLE); +} + static const struct watchdog_ops qcom_wdt_ops = { .start = qcom_wdt_start, .stop = qcom_wdt_stop, @@ -294,6 +301,17 @@ static int qcom_wdt_probe(struct platform_device *pdev) wdt->wdd.timeout = min(wdt->wdd.max_timeout, 30U); watchdog_init_timeout(&wdt->wdd, 0, dev); + /* + * If WDT is already running, call WDT start which + * will stop the WDT, set timeouts as bootloader + * might use different ones and set running bit + * to inform the WDT subsystem to ping the WDT + */ + if (qcom_wdt_is_running(&wdt->wdd)) { + qcom_wdt_start(&wdt->wdd); + set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); + } + ret = devm_watchdog_register_device(dev, &wdt->wdd); if (ret) return ret; -- GitLab From 8711071e9700b67045fe5518161d63f7a03e3c9e Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 30 Oct 2020 23:49:09 +0800 Subject: [PATCH 0525/1894] watchdog: rti-wdt: fix reference leak in rti_wdt_probe pm_runtime_get_sync() will increment pm usage counter even it failed. Forgetting to call pm_runtime_put_noidle will result in reference leak in rti_wdt_probe, so we should fix it. Signed-off-by: Zhang Qilong Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201030154909.100023-1-zhangqilong3@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rti_wdt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index 836319cbaca9d..359302f71f7ef 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -227,8 +227,10 @@ static int rti_wdt_probe(struct platform_device *pdev) pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); - if (ret) + if (ret) { + pm_runtime_put_noidle(dev); return dev_err_probe(dev, ret, "runtime pm failed\n"); + } platform_set_drvdata(pdev, wdt); -- GitLab From 9747f12b5be9f55bfba72a82b619355cd861bdfe Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 19 Oct 2020 10:53:42 -0700 Subject: [PATCH 0526/1894] watchdog: geodewdt: remove unneeded break A break is not needed if it is preceded by a return Signed-off-by: Tom Rix Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201019175342.2646-1-trix@redhat.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/geodewdt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c index 83418924e30af..0b699c783d575 100644 --- a/drivers/watchdog/geodewdt.c +++ b/drivers/watchdog/geodewdt.c @@ -150,8 +150,6 @@ static long geodewdt_ioctl(struct file *file, unsigned int cmd, case WDIOC_GETSUPPORT: return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; - break; - case WDIOC_GETSTATUS: case WDIOC_GETBOOTSTATUS: return put_user(0, p); -- GitLab From 347755d2a88e54e7462be23f1e1a1018d9be4a4b Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sun, 25 Oct 2020 13:45:18 +0100 Subject: [PATCH 0527/1894] watchdog: sbc_fitpc2_wdt: add __user annotations After a change to the put_user() macro on x86, kernel test robot has started sending me complaints (from sparse) about passing kernel pointers to put_user(). So add the __user annotations to the various casts in fitpc2_wdt_ioctl(), and while in here, also make the write method actually match the prototype of file_operations::write. Reported-by: kernel test robot Signed-off-by: Rasmus Villemoes Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201025124518.31647-1-linux@rasmusvillemoes.dk Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbc_fitpc2_wdt.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c index 04483d6453d6a..13db71e165836 100644 --- a/drivers/watchdog/sbc_fitpc2_wdt.c +++ b/drivers/watchdog/sbc_fitpc2_wdt.c @@ -78,7 +78,7 @@ static int fitpc2_wdt_open(struct inode *inode, struct file *file) return stream_open(inode, file); } -static ssize_t fitpc2_wdt_write(struct file *file, const char *data, +static ssize_t fitpc2_wdt_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { size_t i; @@ -125,16 +125,16 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case WDIOC_GETSUPPORT: - ret = copy_to_user((struct watchdog_info *)arg, &ident, + ret = copy_to_user((struct watchdog_info __user *)arg, &ident, sizeof(ident)) ? -EFAULT : 0; break; case WDIOC_GETSTATUS: - ret = put_user(0, (int *)arg); + ret = put_user(0, (int __user *)arg); break; case WDIOC_GETBOOTSTATUS: - ret = put_user(0, (int *)arg); + ret = put_user(0, (int __user *)arg); break; case WDIOC_KEEPALIVE: @@ -143,7 +143,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, break; case WDIOC_SETTIMEOUT: - ret = get_user(time, (int *)arg); + ret = get_user(time, (int __user *)arg); if (ret) break; @@ -157,7 +157,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, fallthrough; case WDIOC_GETTIMEOUT: - ret = put_user(margin, (int *)arg); + ret = put_user(margin, (int __user *)arg); break; } -- GitLab From 42e967f3c6cb3828f07a3822d7249bccb55221a4 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Fri, 27 Nov 2020 15:52:17 +0800 Subject: [PATCH 0528/1894] wdt: sp805: add watchdog_stop on reboot Call watchdog_stop_on_reboot in probe func Signed-off-by: Zhao Qiang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201127075217.31312-1-qiang.zhao@nxp.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sp805_wdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index 190d26e2e75f9..958dc32a708fb 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -291,6 +291,7 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id) set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); } + watchdog_stop_on_reboot(&wdt->wdd); ret = watchdog_register_device(&wdt->wdd); if (ret) goto err; -- GitLab From acc195bd2cc48445ea35d00036d8c0afcc4fcc9c Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Sun, 22 Nov 2020 19:08:39 -0700 Subject: [PATCH 0529/1894] watchdog/hpwdt: Disable NMI in Crash Kernel NMIs received during the crash path are problematic as hpwdt_pretimeout handling of the NMI would cause a reentry into kdump. The situation is complicated in that I/O errors can be signaled as NMI circumventing hpwdt_pretimeout's attempt to not claim NMI not associated with either the WDT or the iLO NMI switch. These NMI can additionally cause a secondary NMI which cause the system to hang. By disabling pretimeout and hpwdtimeout in crash path we both reduce the risk of receiving an NMI and simuletaneously leave the WDT running (if it was already in use) to allow the WDT to break the system out of hangs by the WDT reset. Signed-off-by: Jerry Hoemann Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/1606097320-56762-2-git-send-email-jerry.hoemann@hpe.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/hpwdt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 7d34bcf1c45b4..eeb4df24a7e79 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -21,6 +21,7 @@ #include #include #include +#include #define HPWDT_VERSION "2.0.3" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) @@ -334,6 +335,11 @@ static int hpwdt_init_one(struct pci_dev *dev, watchdog_set_nowayout(&hpwdt_dev, nowayout); watchdog_init_timeout(&hpwdt_dev, soft_margin, NULL); + if (is_kdump_kernel()) { + pretimeout = 0; + kdumptimeout = 0; + } + if (pretimeout && hpwdt_dev.timeout <= PRETIMEOUT_SEC) { dev_warn(&dev->dev, "timeout <= pretimeout. Setting pretimeout to zero\n"); pretimeout = 0; -- GitLab From 5674b74e52c052a34e0c4e8a14cdb0924f816d5e Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Sun, 22 Nov 2020 19:08:40 -0700 Subject: [PATCH 0530/1894] watchdog/hpwdt: Reflect changes Bump driver number to reflect recent changes. Signed-off-by: Jerry Hoemann Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/1606097320-56762-3-git-send-email-jerry.hoemann@hpe.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/hpwdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index eeb4df24a7e79..cbd1498ff0155 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -23,7 +23,7 @@ #include #include -#define HPWDT_VERSION "2.0.3" +#define HPWDT_VERSION "2.0.4" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) #define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000) #define HPWDT_MAX_TICKS 65535 -- GitLab From 24f98562bb5b1cd6184c583fb53a6068992bec4b Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Thu, 12 Nov 2020 01:22:05 +0800 Subject: [PATCH 0531/1894] watchdog: wdat_wdt: Fix missing kerneldoc reported by W=1 Fix the following warning while compiling with W=1. drivers/watchdog/wdat_wdt.c:48: warning: Function parameter or member 'instructions' not described in 'wdat_wdt' Signed-off-by: Wong Vee Khee Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201111172205.17215-1-vee.khee.wong@intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/wdat_wdt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 3065dd670a182..cec7917790e5e 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -34,9 +34,9 @@ struct wdat_instruction { * @period: How long is one watchdog period in ms * @stopped_in_sleep: Is this watchdog stopped by the firmware in S1-S5 * @stopped: Was the watchdog stopped by the driver in suspend - * @actions: An array of instruction lists indexed by an action number from - * the WDAT table. There can be %NULL entries for not implemented - * actions. + * @instructions: An array of instruction lists indexed by an action number from + * the WDAT table. There can be %NULL entries for not implemented + * actions. */ struct wdat_wdt { struct platform_device *pdev; -- GitLab From 6f733cb2e7db38f8141b14740bcde577844a03b7 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Mon, 9 Nov 2020 13:05:12 +0000 Subject: [PATCH 0532/1894] watchdog: Fix potential dereferencing of null pointer A reboot notifier, which stops the WDT by calling the stop hook without any check, would be registered when we set WDOG_STOP_ON_REBOOT flag. Howerer we allow the WDT driver to omit the stop hook since commit "d0684c8a93549" ("watchdog: Make stop function optional") and provide a module parameter for user that controls the WDOG_STOP_ON_REBOOT flag in commit 9232c80659e94 ("watchdog: Add stop_on_reboot parameter to control reboot policy"). Together that commits make user potential to insert a watchdog driver that don't provide a stop hook but with the stop_on_reboot parameter set, then dereferencing of null pointer occurs on system reboot. Check the stop hook before registering the reboot notifier to fix the issue. Fixes: d0684c8a9354 ("watchdog: Make stop function optional") Signed-off-by: Wang Wensheng Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201109130512.28121-1-wangwensheng4@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_core.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 4238447578128..0e9a99559609c 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -267,15 +267,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd) } if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { - wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; - - ret = register_reboot_notifier(&wdd->reboot_nb); - if (ret) { - pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", - wdd->id, ret); - watchdog_dev_unregister(wdd); - ida_simple_remove(&watchdog_ida, id); - return ret; + if (!wdd->ops->stop) + pr_warn("watchdog%d: stop_on_reboot not supported\n", wdd->id); + else { + wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; + + ret = register_reboot_notifier(&wdd->reboot_nb); + if (ret) { + pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", + wdd->id, ret); + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, id); + return ret; + } } } -- GitLab From 89c866f5a238f6f68b0f71fab55f77a07e8f3adb Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Tue, 17 Nov 2020 16:22:12 +0100 Subject: [PATCH 0533/1894] watchdog: iTCO_wdt: use module_platform_device() macro Reducing init boilerplate by using the module_platform_device macro. Signed-off-by: Enrico Weigelt, metux IT consult Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201117152214.32244-1-info@metux.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index a370a185a41c4..f2ddc8fc71cd6 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -651,21 +651,7 @@ static struct platform_driver iTCO_wdt_driver = { }, }; -static int __init iTCO_wdt_init_module(void) -{ - pr_info("Intel TCO WatchDog Timer Driver v%s\n", DRV_VERSION); - - return platform_driver_register(&iTCO_wdt_driver); -} - -static void __exit iTCO_wdt_cleanup_module(void) -{ - platform_driver_unregister(&iTCO_wdt_driver); - pr_info("Watchdog Module Unloaded\n"); -} - -module_init(iTCO_wdt_init_module); -module_exit(iTCO_wdt_cleanup_module); +module_platform_driver(iTCO_wdt_driver); MODULE_AUTHOR("Wim Van Sebroeck "); MODULE_DESCRIPTION("Intel TCO WatchDog Timer Driver"); -- GitLab From e0a6aa30504cb8179d07609fb6386705e8f00663 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 13 Dec 2020 09:39:40 +0100 Subject: [PATCH 0534/1894] efi: ia64: disable the capsule loader EFI capsule loading is a feature that was introduced into EFI long after its initial introduction on Itanium, and it is highly unlikely that IA64 systems are receiving firmware updates in the first place, let alone using EFI capsules. So let's disable capsule support altogether on IA64. This fixes a build error on IA64 due to a recent change that added an unconditional include of asm/efi.h, which IA64 does not provide. While at it, tweak the make rules a bit so that the EFI capsule component that is always builtin (even if the EFI capsule loader itself is built as a module) is omitted for all architectures if the module is not enabled in the build. Cc: Tony Luck Link: https://lore.kernel.org/linux-efi/20201214152200.38353-1-ardb@kernel.org Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/Kconfig | 2 +- drivers/firmware/efi/Makefile | 5 ++++- include/linux/efi.h | 10 ++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index b452cfa2100b4..5ac2a37ed0253 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -147,7 +147,7 @@ config EFI_BOOTLOADER_CONTROL config EFI_CAPSULE_LOADER tristate "EFI capsule loader" - depends on EFI + depends on EFI && !IA64 help This option exposes a loader interface "/dev/efi_capsule_loader" for users to load EFI capsules. This driver requires working runtime diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index d6ca2da19339a..467e94259679f 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -12,7 +12,10 @@ KASAN_SANITIZE_runtime-wrappers.o := n obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o tpm.o -obj-$(CONFIG_EFI) += capsule.o memmap.o +obj-$(CONFIG_EFI) += memmap.o +ifneq ($(CONFIG_EFI_CAPSULE_LOADER),) +obj-$(CONFIG_EFI) += capsule.o +endif obj-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdtparams.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_ESRT) += esrt.o diff --git a/include/linux/efi.h b/include/linux/efi.h index 1cd5d91d8ca18..763b816ba19ca 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -817,12 +817,6 @@ static inline bool efi_enabled(int feature) static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} -static inline bool -efi_capsule_pending(int *reset_type) -{ - return false; -} - static inline bool efi_soft_reserve_enabled(void) { return false; @@ -1038,6 +1032,7 @@ bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, size_t len); +#if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER) extern bool efi_capsule_pending(int *reset_type); extern int efi_capsule_supported(efi_guid_t guid, u32 flags, @@ -1045,6 +1040,9 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags, extern int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages); +#else +static inline bool efi_capsule_pending(int *reset_type) { return false; } +#endif #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -- GitLab From d72c8b0e1cacc39495cd413433d260e8ae59374a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 13 Dec 2020 16:07:03 +0100 Subject: [PATCH 0535/1894] efi: arm: force use of unsigned type for EFI_PHYS_ALIGN Ensure that EFI_PHYS_ALIGN is an unsigned type, to prevent spurious warnings from the type checks in the definition of the max() macro. Link: https://lore.kernel.org/linux-efi/20201213151306.73558-1-ardb@kernel.org Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index abae071a02e16..9de7ab2ce05d9 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -71,7 +71,7 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) * here throws off the memory allocation logic, so let's use the lowest power * of two greater than 2 MiB and greater than TEXT_OFFSET. */ -#define EFI_PHYS_ALIGN max(SZ_2M, roundup_pow_of_two(TEXT_OFFSET)) +#define EFI_PHYS_ALIGN max(UL(SZ_2M), roundup_pow_of_two(TEXT_OFFSET)) /* on ARM, the initrd should be loaded in a lowmem region */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) -- GitLab From 3f1a18b9fa1c294802d2750d1ef6a1221b10b76b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 29 Oct 2020 14:56:00 +0100 Subject: [PATCH 0536/1894] KVM/VMX/SVM: Move kvm_machine_check function to x86.h Move kvm_machine_check to x86.h to avoid two exact copies of the same function in kvm.c and svm.c. Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Uros Bizjak Message-Id: <20201029135600.122392-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 20 -------------------- arch/x86/kvm/vmx/vmx.c | 20 -------------------- arch/x86/kvm/x86.h | 20 ++++++++++++++++++++ 3 files changed, 20 insertions(+), 40 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0e52fac4f5ae9..544b6e362cf76 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -1933,25 +1932,6 @@ static bool is_erratum_383(void) return true; } -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s); -#endif -} - static void svm_handle_mce(struct vcpu_svm *svm) { if (is_erratum_383()) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4b854a197e44e..849be2a9f2605 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -4716,25 +4715,6 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, return 1; } -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s); -#endif -} - static int handle_machine_check(struct kvm_vcpu *vcpu) { /* handled by vmx_vcpu_run() */ diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 764c967a19933..bf812c89c2e36 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -3,6 +3,7 @@ #define ARCH_X86_KVM_X86_H #include +#include #include #include "kvm_cache_regs.h" #include "kvm_emulate.h" @@ -366,6 +367,25 @@ static inline bool kvm_dr6_valid(u64 data) return !(data >> 32); } +/* + * Trigger machine check on the host. We assume all the MSRs are already set up + * by the CPU and that we still run on the same CPU as the MCE occurred on. + * We pass a fake environment to the machine check handler because we want + * the guest to be always treated like user space, no matter what context + * it used internally. + */ +static inline void kvm_machine_check(void) +{ +#if defined(CONFIG_X86_MCE) + struct pt_regs regs = { + .cs = 3, /* Fake ring 3 no matter what the guest ran on */ + .flags = X86_EFLAGS_IF, + }; + + do_machine_check(®s); +#endif +} + void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); int kvm_spec_ctrl_test_value(u64 value); -- GitLab From 69372cf01290b9587d2cee8fbe161d75d55c3adc Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:36 -0600 Subject: [PATCH 0537/1894] x86/cpu: Add VM page flush MSR availablility as a CPUID feature On systems that do not have hardware enforced cache coherency between encrypted and unencrypted mappings of the same physical page, the hypervisor can use the VM page flush MSR (0xc001011e) to flush the cache contents of an SEV guest page. When a small number of pages are being flushed, this can be used in place of issuing a WBINVD across all CPUs. CPUID 0x8000001f_eax[2] is used to determine if the VM page flush MSR is available. Add a CPUID feature to indicate it is supported and define the MSR. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b9dc6a56d360a..9f9e9511f7cd8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -237,6 +237,7 @@ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d935059..abfc9b0fbd8de 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -470,6 +470,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 866c9a9bcdee7..236924930bf06 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -44,6 +44,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, { X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 }, { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 }, + { X86_FEATURE_VM_PAGE_FLUSH, CPUID_EAX, 2, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } }; -- GitLab From 9d4747d02376aeb8de38afa25430de79129c5799 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:37 -0600 Subject: [PATCH 0538/1894] KVM: SVM: Remove the call to sev_platform_status() during setup When both KVM support and the CCP driver are built into the kernel instead of as modules, KVM initialization can happen before CCP initialization. As a result, sev_platform_status() will return a failure when it is called from sev_hardware_setup(), when this isn't really an error condition. Since sev_platform_status() doesn't need to be called at this time anyway, remove the invocation from sev_hardware_setup(). Signed-off-by: Tom Lendacky Message-Id: <618380488358b56af558f2682203786f09a49483.1607620209.git.thomas.lendacky@amd.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3418bb18dae7f..7166aec97ffb6 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1127,9 +1127,6 @@ void sev_vm_destroy(struct kvm *kvm) int __init sev_hardware_setup(void) { - struct sev_user_data_status *status; - int rc; - /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = cpuid_ecx(0x8000001F); @@ -1148,26 +1145,9 @@ int __init sev_hardware_setup(void) if (!sev_reclaim_asid_bitmap) return 1; - status = kmalloc(sizeof(*status), GFP_KERNEL); - if (!status) - return 1; - - /* - * Check SEV platform status. - * - * PLATFORM_STATUS can be called in any state, if we failed to query - * the PLATFORM status then either PSP firmware does not support SEV - * feature or SEV firmware is dead. - */ - rc = sev_platform_status(status, NULL); - if (rc) - goto err; - pr_info("SEV supported\n"); -err: - kfree(status); - return rc; + return 0; } void sev_hardware_teardown(void) -- GitLab From 916391a2d1dc225bfb68624352b1495ec529444e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:38 -0600 Subject: [PATCH 0539/1894] KVM: SVM: Add support for SEV-ES capability in KVM Add support to KVM for determining if a system is capable of supporting SEV-ES as well as determining if a guest is an SEV-ES guest. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 3 ++- arch/x86/kvm/svm/sev.c | 47 ++++++++++++++++++++++++++++++++++-------- arch/x86/kvm/svm/svm.c | 20 +++++++++--------- arch/x86/kvm/svm/svm.h | 39 ++++++++++++++++++++++++----------- 4 files changed, 77 insertions(+), 32 deletions(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f92dfd8ef10db..7ac592664c520 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -100,7 +100,8 @@ config KVM_AMD_SEV depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) help - Provides support for launching Encrypted VMs on AMD processors. + Provides support for launching Encrypted VMs (SEV) and Encrypted VMs + with Encrypted State (SEV-ES) on AMD processors. config KVM_MMU_AUDIT bool "Audit KVM MMU" diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7166aec97ffb6..a2b01cbd05113 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -932,7 +932,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) struct kvm_sev_cmd sev_cmd; int r; - if (!svm_sev_enabled()) + if (!svm_sev_enabled() || !sev) return -ENOTTY; if (!argp) @@ -1125,29 +1125,58 @@ void sev_vm_destroy(struct kvm *kvm) sev_asid_free(sev->asid); } -int __init sev_hardware_setup(void) +void __init sev_hardware_setup(void) { + unsigned int eax, ebx, ecx, edx; + bool sev_es_supported = false; + bool sev_supported = false; + + /* Does the CPU support SEV? */ + if (!boot_cpu_has(X86_FEATURE_SEV)) + goto out; + + /* Retrieve SEV CPUID information */ + cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); + /* Maximum number of encrypted guests supported simultaneously */ - max_sev_asid = cpuid_ecx(0x8000001F); + max_sev_asid = ecx; if (!svm_sev_enabled()) - return 1; + goto out; /* Minimum ASID value that should be used for SEV guest */ - min_sev_asid = cpuid_edx(0x8000001F); + min_sev_asid = edx; /* Initialize SEV ASID bitmaps */ sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_asid_bitmap) - return 1; + goto out; sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) - return 1; + goto out; - pr_info("SEV supported\n"); + pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1); + sev_supported = true; - return 0; + /* SEV-ES support requested? */ + if (!sev_es) + goto out; + + /* Does the CPU support SEV-ES? */ + if (!boot_cpu_has(X86_FEATURE_SEV_ES)) + goto out; + + /* Has the system been allocated ASIDs for SEV-ES? */ + if (min_sev_asid == 1) + goto out; + + pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1); + sev_es_supported = true; + +out: + sev = sev_supported; + sev_es = sev_es_supported; } void sev_hardware_teardown(void) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 544b6e362cf76..8cb9474b6a036 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -186,9 +186,13 @@ static int vgif = true; module_param(vgif, int, 0444); /* enable/disable SEV support */ -static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +/* enable/disable SEV-ES support */ +int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +module_param(sev_es, int, 0444); + static bool __read_mostly dump_invalid_vmcb = 0; module_param(dump_invalid_vmcb, bool, 0644); @@ -958,15 +962,11 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } - if (sev) { - if (boot_cpu_has(X86_FEATURE_SEV) && - IS_ENABLED(CONFIG_KVM_AMD_SEV)) { - r = sev_hardware_setup(); - if (r) - sev = false; - } else { - sev = false; - } + if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) { + sev_hardware_setup(); + } else { + sev = false; + sev_es = false; } svm_adjust_mmio_mask(); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fdff76eb6ceb7..ef0f0dfabc692 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -61,6 +61,7 @@ enum { struct kvm_sev_info { bool active; /* SEV enabled guest */ + bool es_active; /* SEV-ES enabled guest */ unsigned int asid; /* ASID used for this guest */ unsigned int handle; /* SEV firmware handle */ int fd; /* SEV device fd */ @@ -194,6 +195,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) return container_of(kvm, struct kvm_svm, kvm); } +static inline bool sev_guest(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_AMD_SEV + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + return sev->active; +#else + return false; +#endif +} + +static inline bool sev_es_guest(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_AMD_SEV + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + return sev_guest(kvm) && sev->es_active; +#else + return false; +#endif +} + static inline void vmcb_mark_all_dirty(struct vmcb *vmcb) { vmcb->control.clean = 0; @@ -352,6 +375,9 @@ static inline bool gif_set(struct vcpu_svm *svm) #define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U #define MSR_INVALID 0xffffffffU +extern int sev; +extern int sev_es; + u32 svm_msrpm_offset(u32 msr); u32 *svm_vcpu_alloc_msrpm(void); void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm); @@ -473,17 +499,6 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); extern unsigned int max_sev_asid; -static inline bool sev_guest(struct kvm *kvm) -{ -#ifdef CONFIG_KVM_AMD_SEV - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - - return sev->active; -#else - return false; -#endif -} - static inline bool svm_sev_enabled(void) { return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; @@ -496,7 +511,7 @@ int svm_register_enc_region(struct kvm *kvm, int svm_unregister_enc_region(struct kvm *kvm, struct kvm_enc_region *range); void pre_sev_run(struct vcpu_svm *svm, int cpu); -int __init sev_hardware_setup(void); +void __init sev_hardware_setup(void); void sev_hardware_teardown(void); #endif -- GitLab From 0f60bde15ee11d03b6143f567cf840d30bf1b588 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:39 -0600 Subject: [PATCH 0540/1894] KVM: SVM: Add GHCB accessor functions for retrieving fields Update the GHCB accessor functions to add functions for retrieve GHCB fields by name. Update existing code to use the new accessor functions. Signed-off-by: Tom Lendacky Message-Id: <664172c53a5fb4959914e1a45d88e805649af0ad.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 10 ++++++++++ arch/x86/kernel/cpu/vmware.c | 12 ++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 71d630bb5e086..1edf24f51b53d 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -379,6 +379,16 @@ struct vmcb { (unsigned long *)&ghcb->save.valid_bitmap); \ } \ \ + static inline u64 ghcb_get_##field(struct ghcb *ghcb) \ + { \ + return ghcb->save.field; \ + } \ + \ + static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \ + { \ + return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \ + } \ + \ static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \ { \ __set_bit(GHCB_BITMAP_IDX(field), \ diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 924571fe5864b..c6ede3b3d302d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -501,12 +501,12 @@ static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) ghcb_rbp_is_valid(ghcb))) return false; - regs->bx = ghcb->save.rbx; - regs->cx = ghcb->save.rcx; - regs->dx = ghcb->save.rdx; - regs->si = ghcb->save.rsi; - regs->di = ghcb->save.rdi; - regs->bp = ghcb->save.rbp; + regs->bx = ghcb_get_rbx(ghcb); + regs->cx = ghcb_get_rcx(ghcb); + regs->dx = ghcb_get_rdx(ghcb); + regs->si = ghcb_get_rsi(ghcb); + regs->di = ghcb_get_rdi(ghcb); + regs->bp = ghcb_get_rbp(ghcb); return true; } -- GitLab From add5e2f045414523aa6dc29d69b21e8f82e5ffb8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:40 -0600 Subject: [PATCH 0541/1894] KVM: SVM: Add support for the SEV-ES VMSA Allocate a page during vCPU creation to be used as the encrypted VM save area (VMSA) for the SEV-ES guest. Provide a flag in the kvm_vcpu_arch structure that indicates whether the guest state is protected. When freeing a VMSA page that has been encrypted, the cache contents must be flushed using the MSR_AMD64_VM_PAGE_FLUSH before freeing the page. [ i386 build warnings ] Reported-by: kernel test robot Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/svm/sev.c | 67 +++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 24 +++++++++++- arch/x86/kvm/svm/svm.h | 5 +++ 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f002cdb13a0b1..8cf6b0493d492 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -805,6 +805,9 @@ struct kvm_vcpu_arch { */ bool enforce; } pv_cpuid; + + /* Protected Guests */ + bool guest_state_protected; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a2b01cbd05113..501adb43ece37 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "x86.h" #include "svm.h" @@ -1190,6 +1191,72 @@ void sev_hardware_teardown(void) sev_flush_asids(); } +/* + * Pages used by hardware to hold guest encrypted state must be flushed before + * returning them to the system. + */ +static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, + unsigned long len) +{ + /* + * If hardware enforced cache coherency for encrypted mappings of the + * same physical page is supported, nothing to do. + */ + if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) + return; + + /* + * If the VM Page Flush MSR is supported, use it to flush the page + * (using the page virtual address and the guest ASID). + */ + if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { + struct kvm_sev_info *sev; + unsigned long va_start; + u64 start, stop; + + /* Align start and stop to page boundaries. */ + va_start = (unsigned long)va; + start = (u64)va_start & PAGE_MASK; + stop = PAGE_ALIGN((u64)va_start + len); + + if (start < stop) { + sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + + while (start < stop) { + wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, + start | sev->asid); + + start += PAGE_SIZE; + } + + return; + } + + WARN(1, "Address overflow, using WBINVD\n"); + } + + /* + * Hardware should always have one of the above features, + * but if not, use WBINVD and issue a warning. + */ + WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); + wbinvd_on_all_cpus(); +} + +void sev_free_vcpu(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm; + + if (!sev_es_guest(vcpu->kvm)) + return; + + svm = to_svm(vcpu); + + if (vcpu->arch.guest_state_protected) + sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); + __free_page(virt_to_page(svm->vmsa)); +} + void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu(svm_data, cpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8cb9474b6a036..801e0a641258f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1288,6 +1288,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *vmcb_page; + struct page *vmsa_page = NULL; int err; BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); @@ -1298,9 +1299,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) if (!vmcb_page) goto out; + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests require a separate VMSA page used to contain + * the encrypted register state of the guest. + */ + vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!vmsa_page) + goto error_free_vmcb_page; + } + err = avic_init_vcpu(svm); if (err) - goto error_free_vmcb_page; + goto error_free_vmsa_page; /* We initialize this flag to true to make sure that the is_running * bit would be set the first time the vcpu is loaded. @@ -1310,12 +1321,16 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->msrpm = svm_vcpu_alloc_msrpm(); if (!svm->msrpm) - goto error_free_vmcb_page; + goto error_free_vmsa_page; svm_vcpu_init_msrpm(vcpu, svm->msrpm); svm->vmcb = page_address(vmcb_page); svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT); + + if (vmsa_page) + svm->vmsa = page_address(vmsa_page); + svm->asid_generation = 0; init_vmcb(svm); @@ -1324,6 +1339,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) return 0; +error_free_vmsa_page: + if (vmsa_page) + __free_page(vmsa_page); error_free_vmcb_page: __free_page(vmcb_page); out: @@ -1351,6 +1369,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) svm_free_nested(svm); + sev_free_vcpu(vcpu); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index ef0f0dfabc692..f96a0a66ca35d 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -168,6 +168,10 @@ struct vcpu_svm { DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS); DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); } shadow_msr_intercept; + + /* SEV-ES support */ + struct vmcb_save_area *vmsa; + struct ghcb *ghcb; }; struct svm_cpu_data { @@ -513,5 +517,6 @@ int svm_unregister_enc_region(struct kvm *kvm, void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_hardware_setup(void); void sev_hardware_teardown(void); +void sev_free_vcpu(struct kvm_vcpu *vcpu); #endif -- GitLab From 1c04d8c986567c27c56c05205dceadc92efb14ff Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:41 -0600 Subject: [PATCH 0542/1894] KVM: x86: Mark GPRs dirty when written When performing VMGEXIT processing for an SEV-ES guest, register values will be synced between KVM and the GHCB. Prepare for detecting when a GPR has been updated (marked dirty) in order to determine whether to sync the register to the GHCB. Signed-off-by: Tom Lendacky Message-Id: <7ca2a1cdb61456f2fe9c64193e34d601e395c133.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_cache_regs.h | 51 ++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index a889563ad02d5..f15bc16de07c3 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -9,6 +9,31 @@ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) +static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + +static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ { \ @@ -18,6 +43,7 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ unsigned long val) \ { \ vcpu->arch.regs[VCPU_REGS_##uname] = val; \ + kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \ } BUILD_KVM_GPR_ACCESSORS(rax, RAX) BUILD_KVM_GPR_ACCESSORS(rbx, RBX) @@ -37,31 +63,6 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14) BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif -static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); -} - -static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); -} - -static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); -} - -static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); -} - static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) -- GitLab From f373a811fd9a69fc8bafb9bcb41d2cfa36c62665 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 11 Dec 2020 13:06:52 +0300 Subject: [PATCH 0543/1894] ASoC: Intel: fix error code cnl_set_dsp_D0() Return -ETIMEDOUT if the dsp boot times out instead of returning success. Fixes: cb6a55284629 ("ASoC: Intel: cnl: Add sst library functions for cnl platform") Signed-off-by: Dan Carpenter Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/X9NEvCzuN+IObnTN@mwanda Signed-off-by: Mark Brown --- sound/soc/intel/skylake/cnl-sst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/cnl-sst.c b/sound/soc/intel/skylake/cnl-sst.c index fcd8dff27ae8e..1275c149acc02 100644 --- a/sound/soc/intel/skylake/cnl-sst.c +++ b/sound/soc/intel/skylake/cnl-sst.c @@ -224,6 +224,7 @@ static int cnl_set_dsp_D0(struct sst_dsp *ctx, unsigned int core_id) "dsp boot timeout, status=%#x error=%#x\n", sst_dsp_shim_read(ctx, CNL_ADSP_FW_STATUS), sst_dsp_shim_read(ctx, CNL_ADSP_ERROR_CODE)); + ret = -ETIMEDOUT; goto err; } } else { -- GitLab From 1b04fa9900263b4e217ca2509fd778b32c2b4eb2 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 9 Dec 2020 21:27:31 +0100 Subject: [PATCH 0544/1894] rcu-tasks: Move RCU-tasks initialization to before early_initcall() PowerPC testing encountered boot failures due to RCU Tasks not being fully initialized until core_initcall() time. This commit therefore initializes RCU Tasks (along with Rude RCU and RCU Tasks Trace) just before early_initcall() time, thus allowing waiting on RCU Tasks grace periods from early_initcall() handlers. Link: https://lore.kernel.org/rcu/87eekfh80a.fsf@dja-thinkpad.axtens.net/ Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall") Tested-by: Daniel Axtens Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++++ init/main.c | 1 + kernel/rcu/tasks.h | 25 +++++++++++++++++++++---- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6cdd0152c253a..5c119d6cecf14 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -86,6 +86,12 @@ void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); +#ifdef CONFIG_TASKS_RCU_GENERIC +void rcu_init_tasks_generic(void); +#else +static inline void rcu_init_tasks_generic(void) { } +#endif + #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); diff --git a/init/main.c b/init/main.c index 32b2a8affafd1..9d964511fe0c2 100644 --- a/init/main.c +++ b/init/main.c @@ -1512,6 +1512,7 @@ static noinline void __init kernel_init_freeable(void) init_mm_internals(); + rcu_init_tasks_generic(); do_pre_smp_initcalls(); lockup_detector_init(); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index d5d9f2d03e8a0..73bbe792fe1e8 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -241,7 +241,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) } } -/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */ +/* Spawn RCU-tasks grace-period kthread. */ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp) { struct task_struct *t; @@ -569,7 +569,6 @@ static int __init rcu_spawn_tasks_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks); return 0; } -core_initcall(rcu_spawn_tasks_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_classic_gp_kthread(void) @@ -697,7 +696,6 @@ static int __init rcu_spawn_tasks_rude_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude); return 0; } -core_initcall(rcu_spawn_tasks_rude_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_rude_gp_kthread(void) @@ -975,6 +973,11 @@ static void rcu_tasks_trace_pregp_step(void) static void rcu_tasks_trace_pertask(struct task_struct *t, struct list_head *hop) { + // During early boot when there is only the one boot CPU, there + // is no idle task for the other CPUs. Just return. + if (unlikely(t == NULL)) + return; + WRITE_ONCE(t->trc_reader_special.b.need_qs, false); WRITE_ONCE(t->trc_reader_checked, false); t->trc_ipi_to_cpu = -1; @@ -1200,7 +1203,6 @@ static int __init rcu_spawn_tasks_trace_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace); return 0; } -core_initcall(rcu_spawn_tasks_trace_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_trace_gp_kthread(void) @@ -1229,6 +1231,21 @@ void show_rcu_tasks_gp_kthreads(void) } #endif /* #ifndef CONFIG_TINY_RCU */ +void __init rcu_init_tasks_generic(void) +{ +#ifdef CONFIG_TASKS_RCU + rcu_spawn_tasks_kthread(); +#endif + +#ifdef CONFIG_TASKS_RUDE_RCU + rcu_spawn_tasks_rude_kthread(); +#endif + +#ifdef CONFIG_TASKS_TRACE_RCU + rcu_spawn_tasks_trace_kthread(); +#endif +} + #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ static inline void rcu_tasks_bootup_oddness(void) {} void show_rcu_tasks_gp_kthreads(void) {} -- GitLab From 9caec4bf1d0126fa5f2fcd21852958bccd2a4c18 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Dec 2020 07:59:15 -0500 Subject: [PATCH 0545/1894] KVM: x86: remove bogus #GP injection There is no need to inject a #GP from kvm_mtrr_set_msr, kvm_emulate_wrmsr will handle it. Reviewed-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mtrr.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 7f0059aa30e16..f472fdb6ae7e0 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -84,12 +84,8 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) } else /* MTRR mask */ mask |= 0x7ff; - if (data & mask) { - kvm_inject_gp(vcpu, 0); - return false; - } - return true; + return (data & mask) == 0; } EXPORT_SYMBOL_GPL(kvm_mtrr_valid); -- GitLab From 8b474427cbeea05850fb32da65cc95eebcbad089 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Dec 2020 07:44:46 -0500 Subject: [PATCH 0546/1894] KVM: x86: use kvm_complete_insn_gp in emulating RDMSR/WRMSR Simplify the four functions that handle {kernel,user} {rd,wr}msr, there is still some repetition between the two instances of rdmsr but the whole business of calling kvm_inject_gp and kvm_skip_emulated_instruction can be unified nicely. Because complete_emulated_wrmsr now becomes essentially a call to kvm_complete_insn_gp, remove complete_emulated_msr. Reviewed-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 44 ++++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a3fdc16cfd6f3..5c5a6aa8696df 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1634,27 +1634,20 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } EXPORT_SYMBOL_GPL(kvm_set_msr); -static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read) +static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) { - if (vcpu->run->msr.error) { - kvm_inject_gp(vcpu, 0); - return 1; - } else if (is_read) { + int err = vcpu->run->msr.error; + if (!err) { kvm_rax_write(vcpu, (u32)vcpu->run->msr.data); kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32); } - return kvm_skip_emulated_instruction(vcpu); -} - -static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) -{ - return complete_emulated_msr(vcpu, true); + return kvm_complete_insn_gp(vcpu, err); } static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) { - return complete_emulated_msr(vcpu, false); + return kvm_complete_insn_gp(vcpu, vcpu->run->msr.error); } static u64 kvm_msr_reason(int r) @@ -1717,18 +1710,16 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) return 0; } - /* MSR read failed? Inject a #GP */ - if (r) { + if (!r) { + trace_kvm_msr_read(ecx, data); + + kvm_rax_write(vcpu, data & -1u); + kvm_rdx_write(vcpu, (data >> 32) & -1u); + } else { trace_kvm_msr_read_ex(ecx); - kvm_inject_gp(vcpu, 0); - return 1; } - trace_kvm_msr_read(ecx, data); - - kvm_rax_write(vcpu, data & -1u); - kvm_rdx_write(vcpu, (data >> 32) & -1u); - return kvm_skip_emulated_instruction(vcpu); + return kvm_complete_insn_gp(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); @@ -1749,15 +1740,12 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) if (r < 0) return r; - /* MSR write failed? Inject a #GP */ - if (r > 0) { + if (!r) + trace_kvm_msr_write(ecx, data); + else trace_kvm_msr_write_ex(ecx, data); - kvm_inject_gp(vcpu, 0); - return 1; - } - trace_kvm_msr_write(ecx, data); - return kvm_skip_emulated_instruction(vcpu); + return kvm_complete_insn_gp(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); -- GitLab From f9a4d621761a2c7db686cc47772a0688d389f2d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Dec 2020 10:26:51 -0500 Subject: [PATCH 0547/1894] KVM: x86: introduce complete_emulated_msr callback This will be used by SEV-ES to inject MSR failure via the GHCB. Reviewed-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 1 + arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 8 ++++---- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8cf6b0493d492..18aa15e6fadd0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1285,6 +1285,7 @@ struct kvm_x86_ops { void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); + int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 801e0a641258f..4067d511be080 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4306,6 +4306,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, .msr_filter_changed = svm_msr_filter_changed, + .complete_emulated_msr = kvm_complete_insn_gp, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 849be2a9f2605..55fa51c0cd9db 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7701,6 +7701,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .migrate_timers = vmx_migrate_timers, .msr_filter_changed = vmx_msr_filter_changed, + .complete_emulated_msr = kvm_complete_insn_gp, .cpu_dirty_log_size = vmx_cpu_dirty_log_size, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c5a6aa8696df..75f10aee23b5d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1642,12 +1642,12 @@ static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32); } - return kvm_complete_insn_gp(vcpu, err); + return kvm_x86_ops.complete_emulated_msr(vcpu, err); } static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) { - return kvm_complete_insn_gp(vcpu, vcpu->run->msr.error); + return kvm_x86_ops.complete_emulated_msr(vcpu, vcpu->run->msr.error); } static u64 kvm_msr_reason(int r) @@ -1719,7 +1719,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) trace_kvm_msr_read_ex(ecx); } - return kvm_complete_insn_gp(vcpu, r); + return kvm_x86_ops.complete_emulated_msr(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); @@ -1745,7 +1745,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) else trace_kvm_msr_write_ex(ecx, data); - return kvm_complete_insn_gp(vcpu, r); + return kvm_x86_ops.complete_emulated_msr(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); -- GitLab From f1c6366e304328de301be362eca905a3503ff33b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 14 Dec 2020 10:29:50 -0500 Subject: [PATCH 0548/1894] KVM: SVM: Add required changes to support intercepts under SEV-ES When a guest is running under SEV-ES, the hypervisor cannot access the guest register state. There are numerous places in the KVM code where certain registers are accessed that are not allowed to be accessed (e.g. RIP, CR0, etc). Add checks to prevent register accesses and add intercept update support at various points within the KVM code. Also, when handling a VMGEXIT, exceptions are passed back through the GHCB. Since the RDMSR/WRMSR intercepts (may) inject a #GP on error, update the SVM intercepts to handle this for SEV-ES guests. Signed-off-by: Tom Lendacky [Redo MSR part using the .complete_emulated_msr callback. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 3 +- arch/x86/kvm/svm/svm.c | 83 +++++++++++++++++++++++++++++++++----- arch/x86/kvm/x86.c | 11 ++++- 3 files changed, 84 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 1edf24f51b53d..bce28482d63d9 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -178,7 +178,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define LBR_CTL_ENABLE_MASK BIT_ULL(0) #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) -#define SVM_INTERRUPT_SHADOW_MASK 1 +#define SVM_INTERRUPT_SHADOW_MASK BIT_ULL(0) +#define SVM_GUEST_INTERRUPT_MASK BIT_ULL(1) #define SVM_IOIO_STR_SHIFT 2 #define SVM_IOIO_REP_SHIFT 3 diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4067d511be080..db81fb131033d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "trace.h" @@ -339,6 +340,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES does not expose the next RIP. The RIP update is controlled by + * the type of exit and the #VC handler in the guest. + */ + if (sev_es_guest(vcpu->kvm)) + goto done; + if (nrips && svm->vmcb->control.next_rip != 0) { WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; @@ -350,6 +358,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) } else { kvm_rip_write(vcpu, svm->next_rip); } + +done: svm_set_interrupt_shadow(vcpu, 0); return 1; @@ -1651,9 +1661,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) static void update_cr0_intercept(struct vcpu_svm *svm) { - ulong gcr0 = svm->vcpu.arch.cr0; - u64 *hcr0 = &svm->vmcb->save.cr0; + ulong gcr0; + u64 *hcr0; + + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return; + gcr0 = svm->vcpu.arch.cr0; + hcr0 = &svm->vmcb->save.cr0; *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | (gcr0 & SVM_CR0_SELECTIVE_MASK); @@ -1673,7 +1692,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_svm *svm = to_svm(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME) { + if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; svm->vmcb->save.efer |= EFER_LMA | EFER_LME; @@ -2583,6 +2602,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } +static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (!sev_es_guest(svm->vcpu.kvm) || !err) + return kvm_complete_insn_gp(&svm->vcpu, err); + + ghcb_set_sw_exit_info_1(svm->ghcb, 1); + ghcb_set_sw_exit_info_2(svm->ghcb, + X86_TRAP_GP | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID); + return 1; +} + static int rdmsr_interception(struct vcpu_svm *svm) { return kvm_emulate_rdmsr(&svm->vcpu); @@ -2801,7 +2834,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - bool in_kernel = (svm_get_cpl(vcpu) == 0); + bool in_kernel; + + /* + * CPL is not made available for an SEV-ES guest, therefore + * vcpu->arch.preempted_in_kernel can never be true. Just + * set in_kernel to false as well. + */ + in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu); @@ -3064,10 +3104,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) - vcpu->arch.cr0 = svm->vmcb->save.cr0; - if (npt_enabled) - vcpu->arch.cr3 = svm->vmcb->save.cr3; + /* SEV-ES guests must use the CR write traps to track CR registers. */ + if (!sev_es_guest(vcpu->kvm)) { + if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) + vcpu->arch.cr0 = svm->vmcb->save.cr0; + if (npt_enabled) + vcpu->arch.cr3 = svm->vmcb->save.cr3; + } if (is_guest_mode(vcpu)) { int vmexit; @@ -3179,6 +3222,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(vcpu->kvm)) + return; + if (nested_svm_virtualize_tpr(vcpu)) return; @@ -3250,7 +3300,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (is_guest_mode(vcpu)) { + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask + * bit to determine the state of the IF flag. + */ + if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) + return true; + } else if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) @@ -3432,6 +3489,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) svm->vcpu.arch.nmi_injected = true; break; case SVM_EXITINTINFO_TYPE_EXEPT: + /* + * Never re-inject a #VC exception. + */ + if (vector == X86_TRAP_VC) + break; + /* * In case of software exceptions, do not reinject the vector, * but re-execute the instruction instead. Rewind RIP first @@ -4306,7 +4369,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, .msr_filter_changed = svm_msr_filter_changed, - .complete_emulated_msr = kvm_complete_insn_gp, + .complete_emulated_msr = svm_complete_emulated_msr, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75f10aee23b5d..9685d056d8089 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4006,7 +4006,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted) + if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu); /* @@ -8149,7 +8149,14 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + /* + * if_flag is obsolete and useless, so do not bother + * setting it for SEV-ES guests. Userspace can just + * use kvm_run->ready_for_interrupt_injection. + */ + kvm_run->if_flag = !vcpu->arch.guest_state_protected + && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); -- GitLab From 8d4846b9b15045598d760470789716fb08b9b317 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:43 -0600 Subject: [PATCH 0549/1894] KVM: SVM: Prevent debugging under SEV-ES Since the guest register state of an SEV-ES guest is encrypted, debugging is not supported. Update the code to prevent guest debugging when the guest has protected state. Additionally, an SEV-ES guest must only and always intercept DR7 reads and writes. Update set_dr_intercepts() and clr_dr_intercepts() to account for this. Signed-off-by: Tom Lendacky Message-Id: <8db966fa2f9803d6454ce773863025d0e2e7f3cc.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 9 +++++++++ arch/x86/kvm/svm/svm.h | 37 +++++++++++++++++++++++-------------- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index db81fb131033d..bec427acab208 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1805,6 +1805,9 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) { struct vmcb *vmcb = svm->vmcb; + if (svm->vcpu.arch.guest_state_protected) + return; + if (unlikely(value != vmcb->save.dr6)) { vmcb->save.dr6 = value; vmcb_mark_dirty(vmcb, VMCB_DR); @@ -1815,6 +1818,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + get_debugreg(vcpu->arch.db[0], 0); get_debugreg(vcpu->arch.db[1], 1); get_debugreg(vcpu->arch.db[2], 2); @@ -1833,6 +1839,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + svm->vmcb->save.dr7 = value; vmcb_mark_dirty(svm->vmcb, VMCB_DR); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f96a0a66ca35d..abfe53d6b3dc8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -272,21 +272,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = get_host_vmcb(svm); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + if (!sev_es_guest(svm->vcpu.kvm)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); + } + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); recalc_intercepts(svm); @@ -298,6 +301,12 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm) vmcb->control.intercepts[INTERCEPT_DR] = 0; + /* DR7 access must remain intercepted for an SEV-ES guest */ + if (sev_es_guest(svm->vcpu.kvm)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + } + recalc_intercepts(svm); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9685d056d8089..d88e334b19e76 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9664,6 +9664,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, unsigned long rflags; int i, r; + if (vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { -- GitLab From bc624d9f1bbbfd6ae7057437cd3fcfef17066399 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:44 -0600 Subject: [PATCH 0550/1894] KVM: SVM: Do not allow instruction emulation under SEV-ES When a guest is running as an SEV-ES guest, it is not possible to emulate instructions. Add support to prevent instruction emulation. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bec427acab208..cade703d6edbe 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4157,6 +4157,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i bool smep, smap, is_user; unsigned long cr4; + /* + * When the guest is an SEV-ES guest, emulation is not possible. + */ + if (sev_es_guest(vcpu->kvm)) + return false; + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * -- GitLab From 8164a5ffe4c65291efecc03a590c978fd14c240f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:45 -0600 Subject: [PATCH 0551/1894] KVM: SVM: Cannot re-initialize the VMCB after shutdown with SEV-ES When a SHUTDOWN VMEXIT is encountered, normally the VMCB is re-initialized so that the guest can be re-launched. But when a guest is running as an SEV-ES guest, the VMSA cannot be re-initialized because it has been encrypted. For now, just return -EINVAL to prevent a possible attempt at a guest reset. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cade703d6edbe..19d1c9707fb72 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2010,6 +2010,13 @@ static int shutdown_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + /* + * The VM save area has already been encrypted so it + * cannot be reinitialized - just terminate. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return -EINVAL; + /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. -- GitLab From e9093fd49285ff7b5e4d3f8b528f5b43445c5f5d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:46 -0600 Subject: [PATCH 0552/1894] KVM: SVM: Prepare for SEV-ES exit handling in the sev.c file This is a pre-patch to consolidate some exit handling code into callable functions. Follow-on patches for SEV-ES exit handling will then be able to use them from the sev.c file. Signed-off-by: Tom Lendacky Message-Id: <5b8b0ffca8137f3e1e257f83df9f5c881c8a96a3.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 64 +++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 19d1c9707fb72..06ea34d61924d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3097,6 +3097,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "excp_to:", save->last_excp_to); } +static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) +{ + if (exit_code < ARRAY_SIZE(svm_exit_handlers) && + svm_exit_handlers[exit_code]) + return 0; + + vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code); + dump_vmcb(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return -EINVAL; +} + +static int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) +{ + if (svm_handle_invalid_exit(&svm->vcpu, exit_code)) + return 0; + +#ifdef CONFIG_RETPOLINE + if (exit_code == SVM_EXIT_MSR) + return msr_interception(svm); + else if (exit_code == SVM_EXIT_VINTR) + return interrupt_window_interception(svm); + else if (exit_code == SVM_EXIT_INTR) + return intr_interception(svm); + else if (exit_code == SVM_EXIT_HLT) + return halt_interception(svm); + else if (exit_code == SVM_EXIT_NPF) + return npf_interception(svm); +#endif + return svm_exit_handlers[exit_code](svm); +} + static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { @@ -3163,32 +3200,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - if (exit_code >= ARRAY_SIZE(svm_exit_handlers) - || !svm_exit_handlers[exit_code]) { - vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); - dump_vmcb(vcpu); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 2; - vcpu->run->internal.data[0] = exit_code; - vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; - return 0; - } - -#ifdef CONFIG_RETPOLINE - if (exit_code == SVM_EXIT_MSR) - return msr_interception(svm); - else if (exit_code == SVM_EXIT_VINTR) - return interrupt_window_interception(svm); - else if (exit_code == SVM_EXIT_INTR) - return intr_interception(svm); - else if (exit_code == SVM_EXIT_HLT) - return halt_interception(svm); - else if (exit_code == SVM_EXIT_NPF) - return npf_interception(svm); -#endif - return svm_exit_handlers[exit_code](svm); + return svm_invoke_exit_handler(svm, exit_code); } static void reload_tss(struct kvm_vcpu *vcpu) -- GitLab From 291bd20d5d88814a73d43b55b9428feab2f28094 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:47 -0600 Subject: [PATCH 0553/1894] KVM: SVM: Add initial support for a VMGEXIT VMEXIT SEV-ES adds a new VMEXIT reason code, VMGEXIT. Initial support for a VMGEXIT includes mapping the GHCB based on the guest GPA, which is obtained from a new VMCB field, and then validating the required inputs for the VMGEXIT exit reason. Since many of the VMGEXIT exit reasons correspond to existing VMEXIT reasons, the information from the GHCB is copied into the VMCB control exit code areas and KVM register areas. The standard exit handlers are invoked, similar to standard VMEXIT processing. Before restarting the vCPU, the GHCB is updated with any registers that have been updated by the hypervisor. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 2 +- arch/x86/include/uapi/asm/svm.h | 7 + arch/x86/kvm/svm/sev.c | 272 ++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 6 +- arch/x86/kvm/svm/svm.h | 8 + 5 files changed, 292 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bce28482d63d9..caa8628f5fba6 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -130,7 +130,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 exit_int_info_err; u64 nested_ctl; u64 avic_vapic_bar; - u8 reserved_4[8]; + u64 ghcb_gpa; u32 event_inj; u32 event_inj_err; u64 nested_cr3; diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index f1d8307454e0c..09f7239454254 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -81,6 +81,7 @@ #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +#define SVM_EXIT_VMGEXIT 0x403 /* SEV-ES software-defined VMGEXIT events */ #define SVM_VMGEXIT_MMIO_READ 0x80000001 @@ -187,6 +188,12 @@ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 501adb43ece37..0244f4f244b4a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -18,6 +18,7 @@ #include "x86.h" #include "svm.h" +#include "cpuid.h" static int sev_flush_asids(void); static DECLARE_RWSEM(sev_deactivate_lock); @@ -1257,11 +1258,226 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->vmsa)); } +static void dump_ghcb(struct vcpu_svm *svm) +{ + struct ghcb *ghcb = svm->ghcb; + unsigned int nbits; + + /* Re-use the dump_invalid_vmcb module parameter */ + if (!dump_invalid_vmcb) { + pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); + return; + } + + nbits = sizeof(ghcb->save.valid_bitmap) * 8; + + pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", + ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", + ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", + ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", + ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); +} + +static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct ghcb *ghcb = svm->ghcb; + + /* + * The GHCB protocol so far allows for the following data + * to be returned: + * GPRs RAX, RBX, RCX, RDX + * + * Copy their values to the GHCB if they are dirty. + */ + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX)) + ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX)) + ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX)) + ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX)) + ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); +} + +static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm_vcpu *vcpu = &svm->vcpu; + struct ghcb *ghcb = svm->ghcb; + u64 exit_code; + + /* + * The GHCB protocol so far allows for the following data + * to be supplied: + * GPRs RAX, RBX, RCX, RDX + * XCR0 + * CPL + * + * VMMCALL allows the guest to provide extra registers. KVM also + * expects RSI for hypercalls, so include that, too. + * + * Copy their values to the appropriate location if supplied. + */ + memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); + + vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb); + + svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb); + + if (ghcb_xcr0_is_valid(ghcb)) { + vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); + kvm_update_cpuid_runtime(vcpu); + } + + /* Copy the GHCB exit information into the VMCB fields */ + exit_code = ghcb_get_sw_exit_code(ghcb); + control->exit_code = lower_32_bits(exit_code); + control->exit_code_hi = upper_32_bits(exit_code); + control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); + control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); + + /* Clear the valid entries fields */ + memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); +} + +static int sev_es_validate_vmgexit(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu; + struct ghcb *ghcb; + u64 exit_code = 0; + + ghcb = svm->ghcb; + + /* Only GHCB Usage code 0 is supported */ + if (ghcb->ghcb_usage) + goto vmgexit_err; + + /* + * Retrieve the exit code now even though is may not be marked valid + * as it could help with debugging. + */ + exit_code = ghcb_get_sw_exit_code(ghcb); + + if (!ghcb_sw_exit_code_is_valid(ghcb) || + !ghcb_sw_exit_info_1_is_valid(ghcb) || + !ghcb_sw_exit_info_2_is_valid(ghcb)) + goto vmgexit_err; + + switch (ghcb_get_sw_exit_code(ghcb)) { + case SVM_EXIT_READ_DR7: + break; + case SVM_EXIT_WRITE_DR7: + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_RDTSC: + break; + case SVM_EXIT_RDPMC: + if (!ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_CPUID: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + if (ghcb_get_rax(ghcb) == 0xd) + if (!ghcb_xcr0_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_INVD: + break; + case SVM_EXIT_IOIO: + if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_MSR: + if (!ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + if (ghcb_get_sw_exit_info_1(ghcb)) { + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rdx_is_valid(ghcb)) + goto vmgexit_err; + } + break; + case SVM_EXIT_VMMCALL: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_cpl_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_RDTSCP: + break; + case SVM_EXIT_WBINVD: + break; + case SVM_EXIT_MONITOR: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb) || + !ghcb_rdx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_MWAIT: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_VMGEXIT_UNSUPPORTED_EVENT: + break; + default: + goto vmgexit_err; + } + + return 0; + +vmgexit_err: + vcpu = &svm->vcpu; + + if (ghcb->ghcb_usage) { + vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", + ghcb->ghcb_usage); + } else { + vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n", + exit_code); + dump_ghcb(svm); + } + + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return -EINVAL; +} + +static void pre_sev_es_run(struct vcpu_svm *svm) +{ + if (!svm->ghcb) + return; + + sev_es_sync_to_ghcb(svm); + + kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); + svm->ghcb = NULL; +} + void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); + /* Perform any SEV-ES pre-run actions */ + pre_sev_es_run(svm); + /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -1279,3 +1495,59 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } + +static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) +{ + return -EINVAL; +} + +int sev_handle_vmgexit(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + u64 ghcb_gpa, exit_code; + struct ghcb *ghcb; + int ret; + + /* Validate the GHCB */ + ghcb_gpa = control->ghcb_gpa; + if (ghcb_gpa & GHCB_MSR_INFO_MASK) + return sev_handle_vmgexit_msr_protocol(svm); + + if (!ghcb_gpa) { + vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n"); + return -EINVAL; + } + + if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) { + /* Unable to map GHCB from guest */ + vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", + ghcb_gpa); + return -EINVAL; + } + + svm->ghcb = svm->ghcb_map.hva; + ghcb = svm->ghcb_map.hva; + + exit_code = ghcb_get_sw_exit_code(ghcb); + + ret = sev_es_validate_vmgexit(svm); + if (ret) + return ret; + + sev_es_sync_from_ghcb(svm); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + ret = -EINVAL; + switch (exit_code) { + case SVM_VMGEXIT_UNSUPPORTED_EVENT: + vcpu_unimpl(&svm->vcpu, + "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", + control->exit_info_1, control->exit_info_2); + break; + default: + ret = svm_invoke_exit_handler(svm, exit_code); + } + + return ret; +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 06ea34d61924d..310de05d24792 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -194,7 +194,7 @@ module_param(sev, int, 0444); int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev_es, int, 0444); -static bool __read_mostly dump_invalid_vmcb = 0; +bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); static u8 rsm_ins_bytes[] = "\x0f\xaa"; @@ -2977,6 +2977,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, + [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit, }; static void dump_vmcb(struct kvm_vcpu *vcpu) @@ -3018,6 +3019,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); + pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa); pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext); @@ -3114,7 +3116,7 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) return -EINVAL; } -static int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) +int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) { if (svm_handle_invalid_exit(&svm->vcpu, exit_code)) return 0; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index abfe53d6b3dc8..89bcb26977e56 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -17,6 +17,7 @@ #include #include +#include #include @@ -172,6 +173,7 @@ struct vcpu_svm { /* SEV-ES support */ struct vmcb_save_area *vmsa; struct ghcb *ghcb; + struct kvm_host_map ghcb_map; }; struct svm_cpu_data { @@ -390,6 +392,7 @@ static inline bool gif_set(struct vcpu_svm *svm) extern int sev; extern int sev_es; +extern bool dump_invalid_vmcb; u32 svm_msrpm_offset(u32 msr); u32 *svm_vcpu_alloc_msrpm(void); @@ -405,6 +408,7 @@ bool svm_smi_blocked(struct kvm_vcpu *vcpu); bool svm_nmi_blocked(struct kvm_vcpu *vcpu); bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); void svm_set_gif(struct vcpu_svm *svm, bool value); +int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code); /* nested.c */ @@ -510,6 +514,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ +#define GHCB_MSR_INFO_POS 0 +#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) @@ -527,5 +534,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_hardware_setup(void); void sev_hardware_teardown(void); void sev_free_vcpu(struct kvm_vcpu *vcpu); +int sev_handle_vmgexit(struct vcpu_svm *svm); #endif -- GitLab From 1edc14599e06fdf23dcf7516f73f09091853eb9a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:49 -0600 Subject: [PATCH 0554/1894] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x002 The GHCB specification defines a GHCB MSR protocol using the lower 12-bits of the GHCB MSR (in the hypervisor this corresponds to the GHCB GPA field in the VMCB). Function 0x002 is a request to set the GHCB MSR value to the SEV INFO as per the specification via the VMCB GHCB GPA field. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 26 +++++++++++++++++++++++++- arch/x86/kvm/svm/svm.h | 17 +++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0244f4f244b4a..2246e4f3e4f38 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -20,6 +20,7 @@ #include "svm.h" #include "cpuid.h" +static u8 sev_enc_bit; static int sev_flush_asids(void); static DECLARE_RWSEM(sev_deactivate_lock); static DEFINE_MUTEX(sev_bitmap_lock); @@ -1140,6 +1141,9 @@ void __init sev_hardware_setup(void) /* Retrieve SEV CPUID information */ cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); + /* Set encryption bit location for SEV-ES guests */ + sev_enc_bit = ebx & 0x3f; + /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = ecx; @@ -1496,9 +1500,29 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } +static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) +{ + svm->vmcb->control.ghcb_gpa = value; +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { - return -EINVAL; + struct vmcb_control_area *control = &svm->vmcb->control; + u64 ghcb_info; + + ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; + + switch (ghcb_info) { + case GHCB_MSR_SEV_INFO_REQ: + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + GHCB_VERSION_MIN, + sev_enc_bit)); + break; + default: + return -EINVAL; + } + + return 1; } int sev_handle_vmgexit(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 89bcb26977e56..546f8d05e81e2 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -514,9 +514,26 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + #define GHCB_MSR_INFO_POS 0 #define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) -- GitLab From d36946679ef6a6fb32b655265602c174feb0ce5e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:50 -0600 Subject: [PATCH 0555/1894] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x004 The GHCB specification defines a GHCB MSR protocol using the lower 12-bits of the GHCB MSR (in the hypervisor this corresponds to the GHCB GPA field in the VMCB). Function 0x004 is a request for CPUID information. Only a single CPUID result register can be sent per invocation, so the protocol defines the register that is requested. The GHCB MSR value is set to the CPUID register value as per the specification via the VMCB GHCB GPA field. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 56 ++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/svm/svm.h | 9 +++++++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2246e4f3e4f38..fbb80b5828433 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1500,6 +1500,18 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } +static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, + unsigned int pos) +{ + svm->vmcb->control.ghcb_gpa &= ~(mask << pos); + svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; +} + +static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) +{ + return (svm->vmcb->control.ghcb_gpa >> pos) & mask; +} + static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) { svm->vmcb->control.ghcb_gpa = value; @@ -1508,7 +1520,9 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm_vcpu *vcpu = &svm->vcpu; u64 ghcb_info; + int ret = 1; ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; @@ -1518,11 +1532,49 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_VERSION_MIN, sev_enc_bit)); break; + case GHCB_MSR_CPUID_REQ: { + u64 cpuid_fn, cpuid_reg, cpuid_value; + + cpuid_fn = get_ghcb_msr_bits(svm, + GHCB_MSR_CPUID_FUNC_MASK, + GHCB_MSR_CPUID_FUNC_POS); + + /* Initialize the registers needed by the CPUID intercept */ + vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; + vcpu->arch.regs[VCPU_REGS_RCX] = 0; + + ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID); + if (!ret) { + ret = -EINVAL; + break; + } + + cpuid_reg = get_ghcb_msr_bits(svm, + GHCB_MSR_CPUID_REG_MASK, + GHCB_MSR_CPUID_REG_POS); + if (cpuid_reg == 0) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; + else if (cpuid_reg == 1) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; + else if (cpuid_reg == 2) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; + else + cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; + + set_ghcb_msr_bits(svm, cpuid_value, + GHCB_MSR_CPUID_VALUE_MASK, + GHCB_MSR_CPUID_VALUE_POS); + + set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, + GHCB_MSR_INFO_MASK, + GHCB_MSR_INFO_POS); + break; + } default: - return -EINVAL; + ret = -EINVAL; } - return 1; + return ret; } int sev_handle_vmgexit(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 546f8d05e81e2..9dd8429f2b276 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -534,6 +534,15 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ GHCB_MSR_SEV_INFO_RESP) +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) -- GitLab From e1d71116b64a54c3948d2692d4338e643408c411 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:51 -0600 Subject: [PATCH 0556/1894] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x100 The GHCB specification defines a GHCB MSR protocol using the lower 12-bits of the GHCB MSR (in the hypervisor this corresponds to the GHCB GPA field in the VMCB). Function 0x100 is a request for termination of the guest. The guest has encountered some situation for which it has requested to be terminated. The GHCB MSR value contains the reason for the request. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 13 +++++++++++++ arch/x86/kvm/svm/svm.h | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index fbb80b5828433..db123562e7d50 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1570,6 +1570,19 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_MSR_INFO_POS); break; } + case GHCB_MSR_TERM_REQ: { + u64 reason_set, reason_code; + + reason_set = get_ghcb_msr_bits(svm, + GHCB_MSR_TERM_REASON_SET_MASK, + GHCB_MSR_TERM_REASON_SET_POS); + reason_code = get_ghcb_msr_bits(svm, + GHCB_MSR_TERM_REASON_MASK, + GHCB_MSR_TERM_REASON_POS); + pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", + reason_set, reason_code); + fallthrough; + } default: ret = -EINVAL; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9dd8429f2b276..fc69bc2e0cad7 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -543,6 +543,12 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); #define GHCB_MSR_CPUID_REG_POS 30 #define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) -- GitLab From d523ab6ba2753bd41b4447ae48024182cb4da94f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:48 -0600 Subject: [PATCH 0557/1894] KVM: SVM: Create trace events for VMGEXIT processing Add trace events for entry to and exit from VMGEXIT processing. The vCPU id and the exit reason will be common for the trace events. The exit info fields will represent the input and output values for the entry and exit events, respectively. Signed-off-by: Tom Lendacky Message-Id: <25357dca49a38372e8f483753fb0c1c2a70a6898.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 6 +++++ arch/x86/kvm/trace.h | 53 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 2 ++ 3 files changed, 61 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index db123562e7d50..089951cbe28e9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -15,10 +15,12 @@ #include #include #include +#include #include "x86.h" #include "svm.h" #include "cpuid.h" +#include "trace.h" static u8 sev_enc_bit; static int sev_flush_asids(void); @@ -1468,6 +1470,8 @@ static void pre_sev_es_run(struct vcpu_svm *svm) if (!svm->ghcb) return; + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); + sev_es_sync_to_ghcb(svm); kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); @@ -1617,6 +1621,8 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) svm->ghcb = svm->ghcb_map.hva; ghcb = svm->ghcb_map.hva; + trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb); + exit_code = ghcb_get_sw_exit_code(ghcb); ret = sev_es_validate_vmgexit(svm); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index aef960f90f26e..7da931a511c94 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1578,6 +1578,59 @@ TRACE_EVENT(kvm_hv_syndbg_get_msr, __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); + +/* + * Tracepoint for the start of VMGEXIT processing + */ +TRACE_EVENT(kvm_vmgexit_enter, + TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), + TP_ARGS(vcpu_id, ghcb), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, exit_reason) + __field(u64, info1) + __field(u64, info2) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->exit_reason = ghcb->save.sw_exit_code; + __entry->info1 = ghcb->save.sw_exit_info_1; + __entry->info2 = ghcb->save.sw_exit_info_2; + ), + + TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", + __entry->vcpu_id, __entry->exit_reason, + __entry->info1, __entry->info2) +); + +/* + * Tracepoint for the end of VMGEXIT processing + */ +TRACE_EVENT(kvm_vmgexit_exit, + TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), + TP_ARGS(vcpu_id, ghcb), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, exit_reason) + __field(u64, info1) + __field(u64, info2) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->exit_reason = ghcb->save.sw_exit_code; + __entry->info1 = ghcb->save.sw_exit_info_1; + __entry->info2 = ghcb->save.sw_exit_info_2; + ), + + TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", + __entry->vcpu_id, __entry->exit_reason, + __entry->info1, __entry->info2) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d88e334b19e76..10704bd8c38a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11314,3 +11314,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); -- GitLab From 59e38b58de283f76c533a2da416abf93bfd9ea41 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:52 -0600 Subject: [PATCH 0558/1894] KVM: SVM: Create trace events for VMGEXIT MSR protocol processing Add trace events for entry to and exit from VMGEXIT MSR protocol processing. The vCPU will be common for the trace events. The MSR protocol processing is guided by the GHCB GPA in the VMCB, so the GHCB GPA will represent the input and output values for the entry and exit events, respectively. Additionally, the exit event will contain the return code for the event. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 6 ++++++ arch/x86/kvm/trace.h | 44 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 2 ++ 3 files changed, 52 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 089951cbe28e9..2a36efadf9a32 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1530,6 +1530,9 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; + trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, + control->ghcb_gpa); + switch (ghcb_info) { case GHCB_MSR_SEV_INFO_REQ: set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, @@ -1591,6 +1594,9 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) ret = -EINVAL; } + trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, + control->ghcb_gpa, ret); + return ret; } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7da931a511c94..2de30c20bc264 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1631,6 +1631,50 @@ TRACE_EVENT(kvm_vmgexit_exit, __entry->info1, __entry->info2) ); +/* + * Tracepoint for the start of VMGEXIT MSR procotol processing + */ +TRACE_EVENT(kvm_vmgexit_msr_protocol_enter, + TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa), + TP_ARGS(vcpu_id, ghcb_gpa), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, ghcb_gpa) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->ghcb_gpa = ghcb_gpa; + ), + + TP_printk("vcpu %u, ghcb_gpa %016llx", + __entry->vcpu_id, __entry->ghcb_gpa) +); + +/* + * Tracepoint for the end of VMGEXIT MSR procotol processing + */ +TRACE_EVENT(kvm_vmgexit_msr_protocol_exit, + TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result), + TP_ARGS(vcpu_id, ghcb_gpa, result), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, ghcb_gpa) + __field(int, result) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->ghcb_gpa = ghcb_gpa; + __entry->result = result; + ), + + TP_printk("vcpu %u, ghcb_gpa %016llx, result %d", + __entry->vcpu_id, __entry->ghcb_gpa, __entry->result) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 10704bd8c38a6..02c6fb166fc33 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11316,3 +11316,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); -- GitLab From 8f423a80d299a5b3964b8af005d1aab4e5e9106a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:53 -0600 Subject: [PATCH 0559/1894] KVM: SVM: Support MMIO for an SEV-ES guest For an SEV-ES guest, MMIO is performed to a shared (un-encrypted) page so that both the hypervisor and guest can read or write to it and each see the contents. The GHCB specification provides software-defined VMGEXIT exit codes to indicate a request for an MMIO read or an MMIO write. Add support to recognize the MMIO requests and invoke SEV-ES specific routines that can complete the MMIO operation. These routines use common KVM support to complete the MMIO operation. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 124 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.h | 6 ++ arch/x86/kvm/x86.c | 123 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 5 ++ 4 files changed, 258 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2a36efadf9a32..dc8ccc14241d2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1262,6 +1262,9 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_state_protected) sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); __free_page(virt_to_page(svm->vmsa)); + + if (svm->ghcb_sa_free) + kfree(svm->ghcb_sa); } static void dump_ghcb(struct vcpu_svm *svm) @@ -1436,6 +1439,11 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) !ghcb_rcx_is_valid(ghcb)) goto vmgexit_err; break; + case SVM_VMGEXIT_MMIO_READ: + case SVM_VMGEXIT_MMIO_WRITE: + if (!ghcb_sw_scratch_is_valid(ghcb)) + goto vmgexit_err; + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: @@ -1470,6 +1478,24 @@ static void pre_sev_es_run(struct vcpu_svm *svm) if (!svm->ghcb) return; + if (svm->ghcb_sa_free) { + /* + * The scratch area lives outside the GHCB, so there is a + * buffer that, depending on the operation performed, may + * need to be synced, then freed. + */ + if (svm->ghcb_sa_sync) { + kvm_write_guest(svm->vcpu.kvm, + ghcb_get_sw_scratch(svm->ghcb), + svm->ghcb_sa, svm->ghcb_sa_len); + svm->ghcb_sa_sync = false; + } + + kfree(svm->ghcb_sa); + svm->ghcb_sa = NULL; + svm->ghcb_sa_free = false; + } + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); sev_es_sync_to_ghcb(svm); @@ -1504,6 +1530,86 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } +#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) +static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct ghcb *ghcb = svm->ghcb; + u64 ghcb_scratch_beg, ghcb_scratch_end; + u64 scratch_gpa_beg, scratch_gpa_end; + void *scratch_va; + + scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); + if (!scratch_gpa_beg) { + pr_err("vmgexit: scratch gpa not provided\n"); + return false; + } + + scratch_gpa_end = scratch_gpa_beg + len; + if (scratch_gpa_end < scratch_gpa_beg) { + pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", + len, scratch_gpa_beg); + return false; + } + + if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { + /* Scratch area begins within GHCB */ + ghcb_scratch_beg = control->ghcb_gpa + + offsetof(struct ghcb, shared_buffer); + ghcb_scratch_end = control->ghcb_gpa + + offsetof(struct ghcb, reserved_1); + + /* + * If the scratch area begins within the GHCB, it must be + * completely contained in the GHCB shared buffer area. + */ + if (scratch_gpa_beg < ghcb_scratch_beg || + scratch_gpa_end > ghcb_scratch_end) { + pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", + scratch_gpa_beg, scratch_gpa_end); + return false; + } + + scratch_va = (void *)svm->ghcb; + scratch_va += (scratch_gpa_beg - control->ghcb_gpa); + } else { + /* + * The guest memory must be read into a kernel buffer, so + * limit the size + */ + if (len > GHCB_SCRATCH_AREA_LIMIT) { + pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", + len, GHCB_SCRATCH_AREA_LIMIT); + return false; + } + scratch_va = kzalloc(len, GFP_KERNEL); + if (!scratch_va) + return false; + + if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { + /* Unable to copy scratch area from guest */ + pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); + + kfree(scratch_va); + return false; + } + + /* + * The scratch area is outside the GHCB. The operation will + * dictate whether the buffer needs to be synced before running + * the vCPU next time (i.e. a read was requested so the data + * must be written back to the guest memory). + */ + svm->ghcb_sa_sync = sync; + svm->ghcb_sa_free = true; + } + + svm->ghcb_sa = scratch_va; + svm->ghcb_sa_len = len; + + return true; +} + static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, unsigned int pos) { @@ -1641,6 +1747,24 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) ret = -EINVAL; switch (exit_code) { + case SVM_VMGEXIT_MMIO_READ: + if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_read(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; + case SVM_VMGEXIT_MMIO_WRITE: + if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_write(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(&svm->vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fc69bc2e0cad7..9019ad6a81387 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -174,6 +174,12 @@ struct vcpu_svm { struct vmcb_save_area *vmsa; struct ghcb *ghcb; struct kvm_host_map ghcb_map; + + /* SEV-ES scratch area support */ + void *ghcb_sa; + u64 ghcb_sa_len; + bool ghcb_sa_sync; + bool ghcb_sa_free; }; struct svm_cpu_data { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02c6fb166fc33..fd4c47b7c71c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11292,6 +11292,129 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_handle_invpcid); +static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_mmio_fragment *frag; + unsigned int len; + + BUG_ON(!vcpu->mmio_needed); + + /* Complete previous fragment */ + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment]; + len = min(8u, frag->len); + if (!vcpu->mmio_is_write) + memcpy(frag->data, run->mmio.data, len); + + if (frag->len <= 8) { + /* Switch to the next fragment. */ + frag++; + vcpu->mmio_cur_fragment++; + } else { + /* Go forward to the next mmio piece. */ + frag->data += len; + frag->gpa += len; + frag->len -= len; + } + + if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { + vcpu->mmio_needed = 0; + + // VMG change, at this point, we're always done + // RIP has already been advanced + return 1; + } + + // More MMIO is needed + run->mmio.phys_addr = frag->gpa; + run->mmio.len = min(8u, frag->len); + run->mmio.is_write = vcpu->mmio_is_write; + if (run->mmio.is_write) + memcpy(run->mmio.data, frag->data, min(8u, frag->len)); + run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} + +int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 1; + memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len)); + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write); + +int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 0; + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index bf812c89c2e36..046709f16abe6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -427,4 +427,9 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); __reserved_bits; \ }) +int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, + void *dst); +int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, + void *dst); + #endif -- GitLab From 7ed9abfe8e9f62384f9b11c9fca19e551dbec5bd Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:54 -0600 Subject: [PATCH 0560/1894] KVM: SVM: Support string IO operations for an SEV-ES guest For an SEV-ES guest, string-based port IO is performed to a shared (un-encrypted) page so that both the hypervisor and guest can read or write to it and each see the contents. For string-based port IO operations, invoke SEV-ES specific routines that can complete the operation using common KVM port IO support. Signed-off-by: Tom Lendacky Message-Id: <9d61daf0ffda496703717218f415cdc8fd487100.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/sev.c | 18 +++++++++-- arch/x86/kvm/svm/svm.c | 11 +++++-- arch/x86/kvm/svm/svm.h | 1 + arch/x86/kvm/x86.c | 54 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 3 ++ 6 files changed, 83 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 18aa15e6fadd0..1c8c59d4a5722 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -614,6 +614,7 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; + void *guest_ins_data; u8 event_exit_inst_len; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index dc8ccc14241d2..154ac7601d026 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1406,9 +1406,14 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_EXIT_INVD: break; case SVM_EXIT_IOIO: - if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) - if (!ghcb_rax_is_valid(ghcb)) + if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) { + if (!ghcb_sw_scratch_is_valid(ghcb)) goto vmgexit_err; + } else { + if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + } break; case SVM_EXIT_MSR: if (!ghcb_rcx_is_valid(ghcb)) @@ -1776,3 +1781,12 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) return ret; } + +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) +{ + if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) + return -EINVAL; + + return kvm_sev_es_string_io(&svm->vcpu, size, port, + svm->ghcb_sa, svm->ghcb_sa_len, in); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 310de05d24792..18a46847bffc5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2038,11 +2038,16 @@ static int io_interception(struct vcpu_svm *svm) ++svm->vcpu.stat.io_exits; string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; - if (string) - return kvm_emulate_instruction(vcpu, 0); - port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + + if (string) { + if (sev_es_guest(vcpu->kvm)) + return sev_es_string_io(svm, size, port, in); + else + return kvm_emulate_instruction(vcpu, 0); + } + svm->next_rip = svm->vmcb->control.exit_info_2; return kvm_fast_pio(&svm->vcpu, size, port, in); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9019ad6a81387..b3f03dede6ac1 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -573,5 +573,6 @@ void __init sev_hardware_setup(void); void sev_hardware_teardown(void); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct vcpu_svm *svm); +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fd4c47b7c71c9..536399fdbc4b6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10783,6 +10783,10 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) { + /* Can't read the RIP when guest state is protected, just return 0 */ + if (vcpu->arch.guest_state_protected) + return 0; + if (is_64_bit_mode(vcpu)) return kvm_rip_read(vcpu); return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + @@ -11415,6 +11419,56 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, } EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data, + vcpu->arch.pio.count * vcpu->arch.pio.size); + vcpu->arch.pio.count = 0; + + return 1; +} + +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count) +{ + int ret; + + ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port, + data, count); + if (ret) + return ret; + + vcpu->arch.pio.count = 0; + + return 0; +} + +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count) +{ + int ret; + + ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port, + data, count); + if (ret) { + vcpu->arch.pio.count = 0; + } else { + vcpu->arch.guest_ins_data = data; + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; + } + + return 0; +} + +int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count, + int in) +{ + return in ? kvm_sev_es_ins(vcpu, size, port, data, count) + : kvm_sev_es_outs(vcpu, size, port, data, count); +} +EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 046709f16abe6..fe7f3df91b2c3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -431,5 +431,8 @@ int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, void *dst); int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, void *dst); +int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count, + int in); #endif -- GitLab From 2985afbcdbb1957a8d31992cebbc4e49d2ad8a77 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:55 -0600 Subject: [PATCH 0561/1894] KVM: SVM: Add support for EFER write traps for an SEV-ES guest For SEV-ES guests, the interception of EFER write access is not recommended. EFER interception occurs prior to EFER being modified and the hypervisor is unable to modify EFER itself because the register is located in the encrypted register state. SEV-ES support introduces a new EFER write trap. This trap provides intercept support of an EFER write after it has been modified. The new EFER value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest EFER. Add support to track the value of the guest EFER value using the EFER write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: <8993149352a3a87cd0625b3b61bfd31ab28977e1.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/svm.h | 2 ++ arch/x86/kvm/svm/svm.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 09f7239454254..6e3f92e176558 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -77,6 +77,7 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_EFER_WRITE_TRAP 0x08f #define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -184,6 +185,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 18a46847bffc5..983b993c949bb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2503,6 +2503,25 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int efer_trap(struct vcpu_svm *svm) +{ + struct msr_data msr_info; + int ret; + + /* + * Clear the EFER_SVME bit from EFER. The SVM code always sets this + * bit in svm_set_efer(), but __kvm_valid_efer() checks it against + * whether the guest has X86_FEATURE_SVM - this avoids a failure if + * the guest doesn't have X86_FEATURE_SVM. + */ + msr_info.host_initiated = false; + msr_info.index = MSR_EFER; + msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME; + ret = kvm_set_msr_common(&svm->vcpu, &msr_info); + + return kvm_complete_insn_gp(&svm->vcpu, ret); +} + static int svm_get_msr_feature(struct kvm_msr_entry *msr) { msr->data = 0; @@ -2977,6 +2996,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_RDPRU] = rdpru_interception, + [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, -- GitLab From f27ad38aac23263c40fe26c0188182c129a8f8dd Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:56 -0600 Subject: [PATCH 0562/1894] KVM: SVM: Add support for CR0 write traps for an SEV-ES guest For SEV-ES guests, the interception of control register write access is not recommended. Control register interception occurs prior to the control register being modified and the hypervisor is unable to modify the control register itself because the register is located in the encrypted register state. SEV-ES support introduces new control register write traps. These traps provide intercept support of a control register write after the control register has been modified. The new control register value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest control registers. Add support to track the value of the guest CR0 register using the control register write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: <182c9baf99df7e40ad9617ff90b84542705ef0d7.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/svm.h | 17 +++++++++++++++++ arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 33 ++++++++++++++++++++------------- 4 files changed, 64 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1c8c59d4a5722..f04d4c6f28f00 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1477,6 +1477,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); +void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 6e3f92e176558..14b0d97b50e2a 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -78,6 +78,22 @@ #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e #define SVM_EXIT_EFER_WRITE_TRAP 0x08f +#define SVM_EXIT_CR0_WRITE_TRAP 0x090 +#define SVM_EXIT_CR1_WRITE_TRAP 0x091 +#define SVM_EXIT_CR2_WRITE_TRAP 0x092 +#define SVM_EXIT_CR3_WRITE_TRAP 0x093 +#define SVM_EXIT_CR4_WRITE_TRAP 0x094 +#define SVM_EXIT_CR5_WRITE_TRAP 0x095 +#define SVM_EXIT_CR6_WRITE_TRAP 0x096 +#define SVM_EXIT_CR7_WRITE_TRAP 0x097 +#define SVM_EXIT_CR8_WRITE_TRAP 0x098 +#define SVM_EXIT_CR9_WRITE_TRAP 0x099 +#define SVM_EXIT_CR10_WRITE_TRAP 0x09a +#define SVM_EXIT_CR11_WRITE_TRAP 0x09b +#define SVM_EXIT_CR12_WRITE_TRAP 0x09c +#define SVM_EXIT_CR13_WRITE_TRAP 0x09d +#define SVM_EXIT_CR14_WRITE_TRAP 0x09e +#define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -186,6 +202,7 @@ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 983b993c949bb..ddcb7390bb0e7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2450,6 +2450,31 @@ static int cr_interception(struct vcpu_svm *svm) return kvm_complete_insn_gp(&svm->vcpu, err); } +static int cr_trap(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + unsigned long old_value, new_value; + unsigned int cr; + + new_value = (unsigned long)svm->vmcb->control.exit_info_1; + + cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP; + switch (cr) { + case 0: + old_value = kvm_read_cr0(vcpu); + svm_set_cr0(vcpu, new_value); + + kvm_post_set_cr0(vcpu, old_value, new_value); + break; + default: + WARN(1, "unhandled CR%d write trap", cr); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + return kvm_complete_insn_gp(vcpu, 0); +} + static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -2997,6 +3022,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_RDPRU] = rdpru_interception, [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, + [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 536399fdbc4b6..efa70e30d23f7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -804,11 +804,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(pdptrs_changed); +void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) +{ + unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; + + if ((cr0 ^ old_cr0) & X86_CR0_PG) { + kvm_clear_async_pf_completion_queue(vcpu); + kvm_async_pf_hash_reset(vcpu); + } + + if ((cr0 ^ old_cr0) & update_bits) + kvm_mmu_reset_context(vcpu); + + if (((cr0 ^ old_cr0) & X86_CR0_CD) && + kvm_arch_has_noncoherent_dma(vcpu->kvm) && + !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); +} +EXPORT_SYMBOL_GPL(kvm_post_set_cr0); + int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { unsigned long old_cr0 = kvm_read_cr0(vcpu); unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG; - unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; cr0 |= X86_CR0_ET; @@ -847,18 +865,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops.set_cr0(vcpu, cr0); - if ((cr0 ^ old_cr0) & X86_CR0_PG) { - kvm_clear_async_pf_completion_queue(vcpu); - kvm_async_pf_hash_reset(vcpu); - } - - if ((cr0 ^ old_cr0) & update_bits) - kvm_mmu_reset_context(vcpu); - - if (((cr0 ^ old_cr0) & X86_CR0_CD) && - kvm_arch_has_noncoherent_dma(vcpu->kvm) && - !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) - kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); + kvm_post_set_cr0(vcpu, old_cr0, cr0); return 0; } -- GitLab From 5b51cb13160ae0ba10645bd0a84e7847677fb6a0 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:57 -0600 Subject: [PATCH 0563/1894] KVM: SVM: Add support for CR4 write traps for an SEV-ES guest For SEV-ES guests, the interception of control register write access is not recommended. Control register interception occurs prior to the control register being modified and the hypervisor is unable to modify the control register itself because the register is located in the encrypted register state. SEV-ES guests introduce new control register write traps. These traps provide intercept support of a control register write after the control register has been modified. The new control register value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest control registers. Add support to track the value of the guest CR4 register using the control register write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm/svm.c | 7 +++++++ arch/x86/kvm/x86.c | 16 ++++++++++++---- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f04d4c6f28f00..8ae099b48f004 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1478,6 +1478,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); +void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 14b0d97b50e2a..c4152689ea936 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -203,6 +203,7 @@ { SVM_EXIT_XSETBV, "xsetbv" }, \ { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ddcb7390bb0e7..4b3d935a1325e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2466,6 +2466,12 @@ static int cr_trap(struct vcpu_svm *svm) kvm_post_set_cr0(vcpu, old_value, new_value); break; + case 4: + old_value = kvm_read_cr4(vcpu); + svm_set_cr4(vcpu, new_value); + + kvm_post_set_cr4(vcpu, old_value, new_value); + break; default: WARN(1, "unhandled CR%d write trap", cr); kvm_queue_exception(vcpu, UD_VECTOR); @@ -3023,6 +3029,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_RDPRU] = rdpru_interception, [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, + [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index efa70e30d23f7..c3686233508b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -983,12 +983,22 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); +void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) +{ + unsigned long mmu_role_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + + if (((cr4 ^ old_cr4) & mmu_role_bits) || + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) + kvm_mmu_reset_context(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_post_set_cr4); + int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP; - unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; if (!kvm_is_valid_cr4(vcpu, cr4)) return 1; @@ -1015,9 +1025,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_x86_ops.set_cr4(vcpu, cr4); - if (((cr4 ^ old_cr4) & mmu_role_bits) || - (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) - kvm_mmu_reset_context(vcpu); + kvm_post_set_cr4(vcpu, old_cr4, cr4); return 0; } -- GitLab From d1949b93c60504b338c89cf8b3873c0d11feb7ed Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:58 -0600 Subject: [PATCH 0564/1894] KVM: SVM: Add support for CR8 write traps for an SEV-ES guest For SEV-ES guests, the interception of control register write access is not recommended. Control register interception occurs prior to the control register being modified and the hypervisor is unable to modify the control register itself because the register is located in the encrypted register state. SEV-ES guests introduce new control register write traps. These traps provide intercept support of a control register write after the control register has been modified. The new control register value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest control registers. Add support to track the value of the guest CR8 register using the control register write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: <5a01033f4c8b3106ca9374b7cadf8e33da852df1.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm/svm.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index c4152689ea936..554f75fe013cf 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -204,6 +204,7 @@ { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4b3d935a1325e..0f4b496399556 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2455,6 +2455,7 @@ static int cr_trap(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; unsigned long old_value, new_value; unsigned int cr; + int ret = 0; new_value = (unsigned long)svm->vmcb->control.exit_info_1; @@ -2472,13 +2473,16 @@ static int cr_trap(struct vcpu_svm *svm) kvm_post_set_cr4(vcpu, old_value, new_value); break; + case 8: + ret = kvm_set_cr8(&svm->vcpu, new_value); + break; default: WARN(1, "unhandled CR%d write trap", cr); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - return kvm_complete_insn_gp(vcpu, 0); + return kvm_complete_insn_gp(vcpu, ret); } static int dr_interception(struct vcpu_svm *svm) @@ -3030,6 +3034,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap, + [SVM_EXIT_CR8_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, -- GitLab From 5265713a073754605108b3aba17619a0bbbae3c4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:59 -0600 Subject: [PATCH 0565/1894] KVM: x86: Update __get_sregs() / __set_sregs() to support SEV-ES Since many of the registers used by the SEV-ES are encrypted and cannot be read or written, adjust the __get_sregs() / __set_sregs() to take into account whether the VMSA/guest state is encrypted. For __get_sregs(), return the actual value that is in use by the guest for all registers being tracked using the write trap support. For __set_sregs(), skip setting of all guest registers values. Signed-off-by: Tom Lendacky Message-Id: <23051868db76400a9b07a2020525483a1e62dbcf.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c3686233508b8..86947e757dced 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9439,6 +9439,9 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct desc_ptr dt; + if (vcpu->arch.guest_state_protected) + goto skip_protected_regs; + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -9456,9 +9459,11 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->gdt.limit = dt.size; sregs->gdt.base = dt.address; - sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr2 = vcpu->arch.cr2; sregs->cr3 = kvm_read_cr3(vcpu); + +skip_protected_regs: + sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr4 = kvm_read_cr4(vcpu); sregs->cr8 = kvm_get_cr8(vcpu); sregs->efer = vcpu->arch.efer; @@ -9595,6 +9600,9 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (kvm_set_apic_base(vcpu, &apic_base_msr)) goto out; + if (vcpu->arch.guest_state_protected) + goto skip_protected_regs; + dt.size = sregs->idt.limit; dt.address = sregs->idt.base; kvm_x86_ops.set_idt(vcpu, &dt); @@ -9629,14 +9637,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - max_bits = KVM_NR_INTERRUPTS; - pending_vec = find_first_bit( - (const unsigned long *)sregs->interrupt_bitmap, max_bits); - if (pending_vec < max_bits) { - kvm_queue_interrupt(vcpu, pending_vec, false); - pr_debug("Set back pending irq %d\n", pending_vec); - } - kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -9655,6 +9655,15 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) !is_protmode(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; +skip_protected_regs: + max_bits = KVM_NR_INTERRUPTS; + pending_vec = find_first_bit( + (const unsigned long *)sregs->interrupt_bitmap, max_bits); + if (pending_vec < max_bits) { + kvm_queue_interrupt(vcpu, pending_vec, false); + pr_debug("Set back pending irq %d\n", pending_vec); + } + kvm_make_request(KVM_REQ_EVENT, vcpu); ret = 0; -- GitLab From 5719455fbd952a69ebc860d47bb0287e9198fe12 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:00 -0600 Subject: [PATCH 0566/1894] KVM: SVM: Do not report support for SMM for an SEV-ES guest SEV-ES guests do not currently support SMM. Update the has_emulated_msr() kvm_x86_ops function to take a struct kvm parameter so that the capability can be reported at a VM level. Since this op is also called during KVM initialization and before a struct kvm instance is available, comments will be added to each implementation of has_emulated_msr() to indicate the kvm parameter can be null. Signed-off-by: Tom Lendacky Message-Id: <75de5138e33b945d2fb17f81ae507bda381808e3.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm/svm.c | 11 ++++++++++- arch/x86/kvm/vmx/vmx.c | 6 +++++- arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8ae099b48f004..2b6f168436c8d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1092,7 +1092,7 @@ struct kvm_x86_ops { void (*hardware_disable)(void); void (*hardware_unsetup)(void); bool (*cpu_has_accelerated_tpr)(void); - bool (*has_emulated_msr)(u32 index); + bool (*has_emulated_msr)(struct kvm *kvm, u32 index); void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); unsigned int vm_size; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0f4b496399556..6064a6035dbbd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3878,12 +3878,21 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_emulated_msr(u32 index) +/* + * The kvm parameter can be NULL (module initialization, or invocation before + * VM creation). Be sure to check the kvm parameter before using it. + */ +static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return false; + case MSR_IA32_SMBASE: + /* SEV-ES guests do not support SMM, so report false */ + if (kvm && sev_es_guest(kvm)) + return false; + break; default: break; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 55fa51c0cd9db..75c9c6a0a3a45 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6379,7 +6379,11 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) handle_exception_nmi_irqoff(vmx); } -static bool vmx_has_emulated_msr(u32 index) +/* + * The kvm parameter can be NULL (module initialization, or invocation before + * VM creation). Be sure to check the kvm parameter before using it. + */ +static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_SMBASE: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 86947e757dced..ba8e6a9b1a770 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3783,7 +3783,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE); + r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops.cpu_has_accelerated_tpr(); @@ -5782,7 +5782,7 @@ static void kvm_init_msr_list(void) } for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) { - if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i])) + if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i])) continue; emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; -- GitLab From ed02b213098a90c2a415a0da18f05841f8cf0a81 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:01 -0600 Subject: [PATCH 0567/1894] KVM: SVM: Guest FPU state save/restore not needed for SEV-ES guest The guest FPU state is automatically restored on VMRUN and saved on VMEXIT by the hardware, so there is no reason to do this in KVM. Eliminate the allocation of the guest_fpu save area and key off that to skip operations related to the guest FPU state. Signed-off-by: Tom Lendacky Message-Id: <173e429b4d0d962c6a443c4553ffdaf31b7665a4.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm/svm.c | 8 +++++ arch/x86/kvm/x86.c | 56 +++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2b6f168436c8d..39707e72b062f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1477,6 +1477,8 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); +void kvm_free_guest_fpu(struct kvm_vcpu *vcpu); + void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6064a6035dbbd..f1c32cdc9d490 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1317,6 +1317,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vmsa_page) goto error_free_vmcb_page; + + /* + * SEV-ES guests maintain an encrypted version of their FPU + * state which is restored and saved on VMRUN and VMEXIT. + * Free the fpu structure to prevent KVM from attempting to + * access the FPU state. + */ + kvm_free_guest_fpu(vcpu); } err = avic_init_vcpu(svm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba8e6a9b1a770..eea0e9fed62a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4503,6 +4503,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { + if (!vcpu->arch.guest_fpu) + return; + if (boot_cpu_has(X86_FEATURE_XSAVE)) { memset(guest_xsave, 0, sizeof(struct kvm_xsave)); fill_xsave((u8 *) guest_xsave->region, vcpu); @@ -4520,9 +4523,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - u64 xstate_bv = - *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; - u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; + u64 xstate_bv; + u32 mxcsr; + + if (!vcpu->arch.guest_fpu) + return 0; + + xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; + mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; if (boot_cpu_has(X86_FEATURE_XSAVE)) { /* @@ -9245,9 +9253,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_save_current_fpu(vcpu->arch.user_fpu); - /* PKRU is separately restored in kvm_x86_ops.run. */ - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, - ~XFEATURE_MASK_PKRU); + /* + * Guests with protected state can't have it set by the hypervisor, + * so skip trying to set it. + */ + if (vcpu->arch.guest_fpu) + /* PKRU is separately restored in kvm_x86_ops.run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, + ~XFEATURE_MASK_PKRU); fpregs_mark_activate(); fpregs_unlock(); @@ -9260,7 +9273,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - kvm_save_current_fpu(vcpu->arch.guest_fpu); + /* + * Guests with protected state can't have it read by the hypervisor, + * so skip trying to save it. + */ + if (vcpu->arch.guest_fpu) + kvm_save_current_fpu(vcpu->arch.guest_fpu); copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); @@ -9770,6 +9788,9 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct fxregs_state *fxsave; + if (!vcpu->arch.guest_fpu) + return 0; + vcpu_load(vcpu); fxsave = &vcpu->arch.guest_fpu->state.fxsave; @@ -9790,6 +9811,9 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct fxregs_state *fxsave; + if (!vcpu->arch.guest_fpu) + return 0; + vcpu_load(vcpu); fxsave = &vcpu->arch.guest_fpu->state.fxsave; @@ -9848,6 +9872,9 @@ static int sync_regs(struct kvm_vcpu *vcpu) static void fx_init(struct kvm_vcpu *vcpu) { + if (!vcpu->arch.guest_fpu) + return; + fpstate_init(&vcpu->arch.guest_fpu->state); if (boot_cpu_has(X86_FEATURE_XSAVES)) vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv = @@ -9861,6 +9888,15 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +void kvm_free_guest_fpu(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.guest_fpu) { + kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + vcpu->arch.guest_fpu = NULL; + } +} +EXPORT_SYMBOL_GPL(kvm_free_guest_fpu); + int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) @@ -9956,7 +9992,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return 0; free_guest_fpu: - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + kvm_free_guest_fpu(vcpu); free_user_fpu: kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); free_emulate_ctxt: @@ -10010,7 +10046,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + kvm_free_guest_fpu(vcpu); kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); @@ -10058,7 +10094,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; - if (kvm_mpx_supported()) { + if (vcpu->arch.guest_fpu && kvm_mpx_supported()) { void *mpx_state_buffer; /* -- GitLab From 4444dfe4050b79964d7bb9b86a99e2bb21a972b0 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 14 Dec 2020 11:16:03 -0500 Subject: [PATCH 0568/1894] KVM: SVM: Add NMI support for an SEV-ES guest The GHCB specification defines how NMIs are to be handled for an SEV-ES guest. To detect the completion of an NMI the hypervisor must not intercept the IRET instruction (because a #VC while running the NMI will issue an IRET) and, instead, must receive an NMI Complete exit event from the guest. Update the KVM support for detecting the completion of NMIs in the guest to follow the GHCB specification. When an SEV-ES guest is active, the IRET instruction will no longer be intercepted. Now, when the NMI Complete exit event is received, the iret_interception() function will be called to simulate the completion of the NMI. Signed-off-by: Tom Lendacky Message-Id: <5ea3dd69b8d4396cefdc9048ebc1ab7caa70a847.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 4 ++++ arch/x86/kvm/svm/svm.c | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 154ac7601d026..c3006e9ef5ae7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1449,6 +1449,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!ghcb_sw_scratch_is_valid(ghcb)) goto vmgexit_err; break; + case SVM_VMGEXIT_NMI_COMPLETE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: @@ -1770,6 +1771,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) control->exit_info_2, svm->ghcb_sa); break; + case SVM_VMGEXIT_NMI_COMPLETE: + ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(&svm->vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f1c32cdc9d490..2e43f79c27030 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2319,9 +2319,11 @@ static int cpuid_interception(struct vcpu_svm *svm) static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; - svm_clr_intercept(svm, INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; - svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + if (!sev_es_guest(svm->vcpu.kvm)) { + svm_clr_intercept(svm, INTERCEPT_IRET); + svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + } kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); return 1; } @@ -3302,7 +3304,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; vcpu->arch.hflags |= HF_NMI_MASK; - svm_set_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_IRET); ++vcpu->stat.nmi_injections; } @@ -3386,10 +3389,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) if (masked) { svm->vcpu.arch.hflags |= HF_NMI_MASK; - svm_set_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_IRET); } else { svm->vcpu.arch.hflags &= ~HF_NMI_MASK; - svm_clr_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_clr_intercept(svm, INTERCEPT_IRET); } } @@ -3567,8 +3572,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) * If we've made progress since setting HF_IRET_MASK, we've * executed an IRET and can allow NMI injection. */ - if ((svm->vcpu.arch.hflags & HF_IRET_MASK) - && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { + if ((svm->vcpu.arch.hflags & HF_IRET_MASK) && + (sev_es_guest(svm->vcpu.kvm) || + kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) { svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); } -- GitLab From 85ca8be938c0e693b5ed5392279d5ecedf42901e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:04 -0600 Subject: [PATCH 0569/1894] KVM: SVM: Set the encryption mask for the SVM host save area The SVM host save area is used to restore some host state on VMEXIT of an SEV-ES guest. After allocating the save area, clear it and add the encryption mask to the SVM host save area physical address that is programmed into the VM_HSAVE_PA MSR. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 1 - arch/x86/kvm/svm/svm.c | 3 ++- arch/x86/kvm/svm/svm.h | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c3006e9ef5ae7..0e922741023bc 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -30,7 +30,6 @@ unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; -#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) struct enc_region { struct list_head list; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2e43f79c27030..4be7d13d4462b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -497,7 +497,7 @@ static int svm_hardware_enable(void) wrmsrl(MSR_EFER, efer | EFER_SVME); - wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); @@ -565,6 +565,7 @@ static int svm_cpu_init(int cpu) sd->save_area = alloc_page(GFP_KERNEL); if (!sd->save_area) goto free_cpu_data; + clear_page(page_address(sd->save_area)); if (svm_sev_enabled()) { sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index b3f03dede6ac1..b85c162a8a1e7 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -21,6 +21,8 @@ #include +#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) + static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, -- GitLab From 80675b3ad45f79d97ce47a0faac3a6d22ab7e876 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:05 -0600 Subject: [PATCH 0570/1894] KVM: SVM: Update ASID allocation to support SEV-ES guests SEV and SEV-ES guests each have dedicated ASID ranges. Update the ASID allocation routine to return an ASID in the respective range. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0e922741023bc..b81d12f1bd37e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -61,19 +61,19 @@ static int sev_flush_asids(void) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(void) +static bool __sev_recycle_asids(int min_asid, int max_asid) { int pos; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, - max_sev_asid, min_sev_asid - 1); - if (pos >= max_sev_asid) + pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid); + if (pos >= max_asid) return false; if (sev_flush_asids()) return false; + /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, max_sev_asid); bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); @@ -81,20 +81,23 @@ static bool __sev_recycle_asids(void) return true; } -static int sev_asid_new(void) +static int sev_asid_new(struct kvm_sev_info *sev) { + int pos, min_asid, max_asid; bool retry = true; - int pos; mutex_lock(&sev_bitmap_lock); /* - * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid. + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ + min_asid = sev->es_active ? 0 : min_sev_asid - 1; + max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1); - if (pos >= max_sev_asid) { - if (retry && __sev_recycle_asids()) { + pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); + if (pos >= max_asid) { + if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; } @@ -176,7 +179,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (unlikely(sev->active)) return ret; - asid = sev_asid_new(); + asid = sev_asid_new(sev); if (asid < 0) return ret; -- GitLab From 376c6d285017419e35c7177bc60abe7915fb7497 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:06 -0600 Subject: [PATCH 0571/1894] KVM: SVM: Provide support for SEV-ES vCPU creation/loading An SEV-ES vCPU requires additional VMCB initialization requirements for vCPU creation and vCPU load/put requirements. This includes: General VMCB initialization changes: - Set a VMCB control bit to enable SEV-ES support on the vCPU. - Set the VMCB encrypted VM save area address. - CRx registers are part of the encrypted register state and cannot be updated. Remove the CRx register read and write intercepts and replace them with CRx register write traps to track the CRx register values. - Certain MSR values are part of the encrypted register state and cannot be updated. Remove certain MSR intercepts (EFER, CR_PAT, etc.). - Remove the #GP intercept (no support for "enable_vmware_backdoor"). - Remove the XSETBV intercept since the hypervisor cannot modify XCR0. General vCPU creation changes: - Set the initial GHCB gpa value as per the GHCB specification. Signed-off-by: Tom Lendacky Message-Id: <3a8aef366416eddd5556dfa3fdc212aafa1ad0a2.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 15 +++++++++- arch/x86/kvm/svm/sev.c | 56 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 20 ++++++++++++-- arch/x86/kvm/svm/svm.h | 6 +++- 4 files changed, 92 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index caa8628f5fba6..a57331de59e22 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -98,6 +98,16 @@ enum { INTERCEPT_MWAIT_COND, INTERCEPT_XSETBV, INTERCEPT_RDPRU, + TRAP_EFER_WRITE, + TRAP_CR0_WRITE, + TRAP_CR1_WRITE, + TRAP_CR2_WRITE, + TRAP_CR3_WRITE, + TRAP_CR4_WRITE, + TRAP_CR5_WRITE, + TRAP_CR6_WRITE, + TRAP_CR7_WRITE, + TRAP_CR8_WRITE, /* Byte offset 014h (word 5) */ INTERCEPT_INVLPGB = 160, INTERCEPT_INVLPGB_ILLEGAL, @@ -144,6 +154,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u8 reserved_6[8]; /* Offset 0xe8 */ u64 avic_logical_id; /* Offset 0xf0 */ u64 avic_physical_id; /* Offset 0xf8 */ + u8 reserved_7[8]; + u64 vmsa_pa; /* Used for an SEV-ES guest */ }; @@ -198,6 +210,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_NESTED_CTL_NP_ENABLE BIT(0) #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) +#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) struct vmcb_seg { u16 selector; @@ -295,7 +308,7 @@ struct ghcb { #define EXPECTED_VMCB_SAVE_AREA_SIZE 1032 -#define EXPECTED_VMCB_CONTROL_AREA_SIZE 256 +#define EXPECTED_VMCB_CONTROL_AREA_SIZE 272 #define EXPECTED_GHCB_SIZE PAGE_SIZE static inline void __unused_size_checks(void) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b81d12f1bd37e..584fede0b733f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1796,3 +1796,59 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, svm->ghcb_sa_len, in); } + +void sev_es_init_vmcb(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; + svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; + + /* + * An SEV-ES guest requires a VMSA area that is a separate from the + * VMCB page. Do not include the encryption mask on the VMSA physical + * address since hardware will access it using the guest key. + */ + svm->vmcb->control.vmsa_pa = __pa(svm->vmsa); + + /* Can't intercept CR register access, HV can't modify CR registers */ + svm_clr_intercept(svm, INTERCEPT_CR0_READ); + svm_clr_intercept(svm, INTERCEPT_CR4_READ); + svm_clr_intercept(svm, INTERCEPT_CR8_READ); + svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR4_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); + + svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0); + + /* Track EFER/CR register changes */ + svm_set_intercept(svm, TRAP_EFER_WRITE); + svm_set_intercept(svm, TRAP_CR0_WRITE); + svm_set_intercept(svm, TRAP_CR4_WRITE); + svm_set_intercept(svm, TRAP_CR8_WRITE); + + /* No support for enable_vmware_backdoor */ + clr_exception_intercept(svm, GP_VECTOR); + + /* Can't intercept XSETBV, HV can't modify XCR0 directly */ + svm_clr_intercept(svm, INTERCEPT_XSETBV); + + /* Clear intercepts on selected MSRs */ + set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); +} + +void sev_es_create_vcpu(struct vcpu_svm *svm) +{ + /* + * Set the GHCB MSR value as per the GHCB specification when creating + * a vCPU for an SEV-ES guest. + */ + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + GHCB_VERSION_MIN, + sev_enc_bit)); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4be7d13d4462b..091f792498a25 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -90,7 +90,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ - bool always; /* True if intercept is always on */ + bool always; /* True if intercept is initially cleared */ } direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = { { .index = MSR_STAR, .always = true }, { .index = MSR_IA32_SYSENTER_CS, .always = true }, @@ -108,6 +108,9 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_EFER, .always = false }, + { .index = MSR_IA32_CR_PAT, .always = false }, + { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, { .index = MSR_INVALID, .always = false }, }; @@ -676,8 +679,8 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, msrpm[offset] = tmp; } -static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, - int read, int write) +void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write) { set_shadow_msr_intercept(vcpu, msr, read, write); set_msr_interception_bitmap(vcpu, msrpm, msr, read, write); @@ -1263,6 +1266,11 @@ static void init_vmcb(struct vcpu_svm *svm) if (sev_guest(svm->vcpu.kvm)) { svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; clr_exception_intercept(svm, UD_VECTOR); + + if (sev_es_guest(svm->vcpu.kvm)) { + /* Perform SEV-ES specific VMCB updates */ + sev_es_init_vmcb(svm); + } } vmcb_mark_all_dirty(svm->vmcb); @@ -1356,6 +1364,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x01000065; + if (sev_es_guest(svm->vcpu.kvm)) + /* Perform SEV-ES specific VMCB creation updates */ + sev_es_create_vcpu(svm); + return 0; error_free_vmsa_page: @@ -1451,6 +1463,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) loadsegment(gs, svm->host.gs); #endif #endif + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); } @@ -3101,6 +3114,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id); + pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa); pr_err("VMCB State Save Area:\n"); pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", "es:", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index b85c162a8a1e7..03359185331c2 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -34,7 +34,7 @@ static const u32 host_save_user_msrs[] = { #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) -#define MAX_DIRECT_ACCESS_MSRS 15 +#define MAX_DIRECT_ACCESS_MSRS 18 #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; @@ -417,6 +417,8 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu); bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); void svm_set_gif(struct vcpu_svm *svm, bool value); int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code); +void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write); /* nested.c */ @@ -576,5 +578,7 @@ void sev_hardware_teardown(void); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct vcpu_svm *svm); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); +void sev_es_init_vmcb(struct vcpu_svm *svm); +void sev_es_create_vcpu(struct vcpu_svm *svm); #endif -- GitLab From 861377730aa9db4cbaa0f3bd3f4d295c152732c4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:07 -0600 Subject: [PATCH 0572/1894] KVM: SVM: Provide support for SEV-ES vCPU loading An SEV-ES vCPU requires additional VMCB vCPU load/put requirements. SEV-ES hardware will restore certain registers on VMEXIT, but not save them on VMRUN (see Table B-3 and Table B-4 of the AMD64 APM Volume 2), so make the following changes: General vCPU load changes: - During vCPU loading, perform a VMSAVE to the per-CPU SVM save area and save the current values of XCR0, XSS and PKRU to the per-CPU SVM save area as these registers will be restored on VMEXIT. General vCPU put changes: - Do not attempt to restore registers that SEV-ES hardware has already restored on VMEXIT. Signed-off-by: Tom Lendacky Message-Id: <019390e9cb5e93cd73014fa5a040c17d42588733.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 10 ++++--- arch/x86/kvm/svm/sev.c | 54 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 36 ++++++++++++++++--------- arch/x86/kvm/svm/svm.h | 22 +++++++++++----- arch/x86/kvm/x86.c | 3 ++- arch/x86/kvm/x86.h | 1 + 6 files changed, 103 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index a57331de59e22..1c561945b4268 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -234,7 +234,8 @@ struct vmcb_save_area { u8 cpl; u8 reserved_2[4]; u64 efer; - u8 reserved_3[112]; + u8 reserved_3[104]; + u64 xss; /* Valid for SEV-ES only */ u64 cr4; u64 cr3; u64 cr0; @@ -265,9 +266,12 @@ struct vmcb_save_area { /* * The following part of the save area is valid only for - * SEV-ES guests when referenced through the GHCB. + * SEV-ES guests when referenced through the GHCB or for + * saving to the host save area. */ - u8 reserved_7[104]; + u8 reserved_7[80]; + u32 pkru; + u8 reserved_7a[20]; u64 reserved_8; /* rax already available at 0x01f8 */ u64 rcx; u64 rdx; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 584fede0b733f..0ddae41e48658 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -16,12 +16,15 @@ #include #include #include +#include #include "x86.h" #include "svm.h" #include "cpuid.h" #include "trace.h" +#define __ex(x) __kvm_handle_fault_on_reboot(x) + static u8 sev_enc_bit; static int sev_flush_asids(void); static DECLARE_RWSEM(sev_deactivate_lock); @@ -1852,3 +1855,54 @@ void sev_es_create_vcpu(struct vcpu_svm *svm) GHCB_VERSION_MIN, sev_enc_bit)); } + +void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu) +{ + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct vmcb_save_area *hostsa; + unsigned int i; + + /* + * As an SEV-ES guest, hardware will restore the host state on VMEXIT, + * of which one step is to perform a VMLOAD. Since hardware does not + * perform a VMSAVE on VMRUN, the host savearea must be updated. + */ + asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); + + /* + * Certain MSRs are restored on VMEXIT, only save ones that aren't + * restored. + */ + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { + if (host_save_user_msrs[i].sev_es_restored) + continue; + + rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); + } + + /* XCR0 is restored on VMEXIT, save the current host value */ + hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400); + hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + + /* PKRU is restored on VMEXIT, save the curent host value */ + hostsa->pkru = read_pkru(); + + /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ + hostsa->xss = host_xss; +} + +void sev_es_vcpu_put(struct vcpu_svm *svm) +{ + unsigned int i; + + /* + * Certain MSRs are restored on VMEXIT and were saved with vmsave in + * sev_es_vcpu_load() above. Only restore ones that weren't. + */ + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { + if (host_save_user_msrs[i].sev_es_restored) + continue; + + wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); + } +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 091f792498a25..3a2e48a8d05c0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1417,15 +1417,20 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmcb_mark_all_dirty(svm->vmcb); } + if (sev_es_guest(svm->vcpu.kvm)) { + sev_es_vcpu_load(svm, cpu); + } else { #ifdef CONFIG_X86_64 - rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); + rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); #endif - savesegment(fs, svm->host.fs); - savesegment(gs, svm->host.gs); - svm->host.ldt = kvm_read_ldt(); + savesegment(fs, svm->host.fs); + savesegment(gs, svm->host.gs); + svm->host.ldt = kvm_read_ldt(); - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + rdmsrl(host_save_user_msrs[i].index, + svm->host_user_msrs[i]); + } if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; @@ -1453,19 +1458,24 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) avic_vcpu_put(vcpu); ++vcpu->stat.host_state_reload; - kvm_load_ldt(svm->host.ldt); + if (sev_es_guest(svm->vcpu.kvm)) { + sev_es_vcpu_put(svm); + } else { + kvm_load_ldt(svm->host.ldt); #ifdef CONFIG_X86_64 - loadsegment(fs, svm->host.fs); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); - load_gs_index(svm->host.gs); + loadsegment(fs, svm->host.fs); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); + load_gs_index(svm->host.gs); #else #ifdef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); + loadsegment(gs, svm->host.gs); #endif #endif - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + wrmsrl(host_save_user_msrs[i].index, + svm->host_user_msrs[i]); + } } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 03359185331c2..df9fe11a632c2 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -23,15 +23,23 @@ #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) -static const u32 host_save_user_msrs[] = { +static const struct svm_host_save_msrs { + u32 index; /* Index of the MSR */ + bool sev_es_restored; /* True if MSR is restored on SEV-ES VMEXIT */ +} host_save_user_msrs[] = { #ifdef CONFIG_X86_64 - MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, - MSR_FS_BASE, + { .index = MSR_STAR, .sev_es_restored = true }, + { .index = MSR_LSTAR, .sev_es_restored = true }, + { .index = MSR_CSTAR, .sev_es_restored = true }, + { .index = MSR_SYSCALL_MASK, .sev_es_restored = true }, + { .index = MSR_KERNEL_GS_BASE, .sev_es_restored = true }, + { .index = MSR_FS_BASE, .sev_es_restored = true }, #endif - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_TSC_AUX, + { .index = MSR_IA32_SYSENTER_CS, .sev_es_restored = true }, + { .index = MSR_IA32_SYSENTER_ESP, .sev_es_restored = true }, + { .index = MSR_IA32_SYSENTER_EIP, .sev_es_restored = true }, + { .index = MSR_TSC_AUX, .sev_es_restored = false }, }; - #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) #define MAX_DIRECT_ACCESS_MSRS 18 @@ -580,5 +588,7 @@ int sev_handle_vmgexit(struct vcpu_svm *svm); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); +void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); +void sev_es_vcpu_put(struct vcpu_svm *svm); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eea0e9fed62a9..3e58612babfe0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -197,7 +197,8 @@ EXPORT_SYMBOL_GPL(host_efer); bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); -static u64 __read_mostly host_xss; +u64 __read_mostly host_xss; +EXPORT_SYMBOL_GPL(host_xss); u64 __read_mostly supported_xss; EXPORT_SYMBOL_GPL(supported_xss); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index fe7f3df91b2c3..c5ee0f5ce0f13 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -279,6 +279,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); extern u64 host_xcr0; extern u64 supported_xcr0; +extern u64 host_xss; extern u64 supported_xss; static inline bool kvm_mpx_supported(void) -- GitLab From 16809ecdc1e8ab7278f1d60021ac809edd17d060 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:08 -0600 Subject: [PATCH 0573/1894] KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests The run sequence is different for an SEV-ES guest compared to a legacy or even an SEV guest. The guest vCPU register state of an SEV-ES guest will be restored on VMRUN and saved on VMEXIT. There is no need to restore the guest registers directly and through VMLOAD before VMRUN and no need to save the guest registers directly and through VMSAVE on VMEXIT. Update the svm_vcpu_run() function to skip register state saving and restoring and provide an alternative function for running an SEV-ES guest in vmenter.S Additionally, certain host state is restored across an SEV-ES VMRUN. As a result certain register states are not required to be restored upon VMEXIT (e.g. FS, GS, etc.), so only do that if the guest is not an SEV-ES guest. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 25 ++++++++++++------- arch/x86/kvm/svm/svm.h | 5 ++++ arch/x86/kvm/svm/vmenter.S | 50 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 6 +++++ 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3a2e48a8d05c0..941e5251e13fe 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3700,16 +3700,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); - __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); + if (sev_es_guest(svm->vcpu.kvm)) { + __svm_sev_es_vcpu_run(svm->vmcb_pa); + } else { + __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); #ifdef CONFIG_X86_64 - native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); + native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else - loadsegment(fs, svm->host.fs); + loadsegment(fs, svm->host.fs); #ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); + loadsegment(gs, svm->host.gs); #endif #endif + } /* * VMEXIT disables interrupts (host state), but tracing and lockdep @@ -3807,14 +3811,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - reload_tss(vcpu); + if (!sev_es_guest(svm->vcpu.kvm)) + reload_tss(vcpu); x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); - vcpu->arch.cr2 = svm->vmcb->save.cr2; - vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; - vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; - vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + if (!sev_es_guest(svm->vcpu.kvm)) { + vcpu->arch.cr2 = svm->vmcb->save.cr2; + vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; + vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; + vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + } if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index df9fe11a632c2..a5067f776ce0c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -591,4 +591,9 @@ void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); void sev_es_vcpu_put(struct vcpu_svm *svm); +/* vmenter.S */ + +void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); +void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); + #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 1ec1ac40e3280..6feb8c08f45ab 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -168,3 +168,53 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP ret SYM_FUNC_END(__svm_vcpu_run) + +/** + * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode + * @vmcb_pa: unsigned long + */ +SYM_FUNC_START(__svm_sev_es_vcpu_run) + push %_ASM_BP +#ifdef CONFIG_X86_64 + push %r15 + push %r14 + push %r13 + push %r12 +#else + push %edi + push %esi +#endif + push %_ASM_BX + + /* Enter guest mode */ + mov %_ASM_ARG1, %_ASM_AX + sti + +1: vmrun %_ASM_AX + jmp 3f +2: cmpb $0, kvm_rebooting + jne 3f + ud2 + _ASM_EXTABLE(1b, 2b) + +3: cli + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + + pop %_ASM_BX + +#ifdef CONFIG_X86_64 + pop %r12 + pop %r13 + pop %r14 + pop %r15 +#else + pop %esi + pop %edi +#endif + pop %_ASM_BP + ret +SYM_FUNC_END(__svm_sev_es_vcpu_run) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3e58612babfe0..648c677b12e94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -880,6 +880,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw); void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return; + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { if (vcpu->arch.xcr0 != host_xcr0) @@ -900,6 +903,9 @@ EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return; + if (static_cpu_has(X86_FEATURE_PKU) && (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { -- GitLab From ad73109ae7ec30d5bfb76be108e304f9f0af4829 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:09 -0600 Subject: [PATCH 0574/1894] KVM: SVM: Provide support to launch and run an SEV-ES guest An SEV-ES guest is started by invoking a new SEV initialization ioctl, KVM_SEV_ES_INIT. This identifies the guest as an SEV-ES guest, which is used to drive the appropriate ASID allocation, VMSA encryption, etc. Before being able to run an SEV-ES vCPU, the vCPU VMSA must be encrypted and measured. This is done using the LAUNCH_UPDATE_VMSA command after all calls to LAUNCH_UPDATE_DATA have been performed, but before LAUNCH_MEASURE has been performed. In order to establish the encrypted VMSA, the current (traditional) VMSA and the GPRs are synced to the page that will hold the encrypted VMSA and then LAUNCH_UPDATE_VMSA is invoked. The vCPU is then marked as having protected guest state. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 104 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0ddae41e48658..6eb097714d43f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -201,6 +201,16 @@ e_free: return ret; } +static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + if (!sev_es) + return -ENOTTY; + + to_kvm_svm(kvm)->sev_info.es_active = true; + + return sev_guest_init(kvm, argp); +} + static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { struct sev_data_activate *data; @@ -500,6 +510,94 @@ e_free: return ret; } +static int sev_es_sync_vmsa(struct vcpu_svm *svm) +{ + struct vmcb_save_area *save = &svm->vmcb->save; + + /* Check some debug related fields before encrypting the VMSA */ + if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1)) + return -EINVAL; + + /* Sync registgers */ + save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; + save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; + save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; + save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; + save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; + save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; + save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; + save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; + save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; + save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; + save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; + save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; + save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; + save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; + save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; + save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; + + /* Sync some non-GPR registers before encrypting */ + save->xcr0 = svm->vcpu.arch.xcr0; + save->pkru = svm->vcpu.arch.pkru; + save->xss = svm->vcpu.arch.ia32_xss; + + /* + * SEV-ES will use a VMSA that is pointed to by the VMCB, not + * the traditional VMSA that is part of the VMCB. Copy the + * traditional VMSA as it has been built so far (in prep + * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. + */ + memcpy(svm->vmsa, save, sizeof(*save)); + + return 0; +} + +static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct sev_data_launch_update_vmsa *vmsa; + int i, ret; + + if (!sev_es_guest(kvm)) + return -ENOTTY; + + vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL); + if (!vmsa) + return -ENOMEM; + + for (i = 0; i < kvm->created_vcpus; i++) { + struct vcpu_svm *svm = to_svm(kvm->vcpus[i]); + + /* Perform some pre-encryption checks against the VMSA */ + ret = sev_es_sync_vmsa(svm); + if (ret) + goto e_free; + + /* + * The LAUNCH_UPDATE_VMSA command will perform in-place + * encryption of the VMSA memory content (i.e it will write + * the same memory region with the guest's key), so invalidate + * it first. + */ + clflush_cache_range(svm->vmsa, PAGE_SIZE); + + vmsa->handle = sev->handle; + vmsa->address = __sme_pa(svm->vmsa); + vmsa->len = PAGE_SIZE; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, + &argp->error); + if (ret) + goto e_free; + + svm->vcpu.arch.guest_state_protected = true; + } + +e_free: + kfree(vmsa); + return ret; +} + static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { void __user *measure = (void __user *)(uintptr_t)argp->data; @@ -957,12 +1055,18 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; + case KVM_SEV_ES_INIT: + r = sev_es_guest_init(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; case KVM_SEV_LAUNCH_UPDATE_DATA: r = sev_launch_update_data(kvm, &sev_cmd); break; + case KVM_SEV_LAUNCH_UPDATE_VMSA: + r = sev_launch_update_vmsa(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_MEASURE: r = sev_launch_measure(kvm, &sev_cmd); break; -- GitLab From ae7927023243dcc7389b2d59b16c09cbbeaecc36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Dec 2020 17:14:08 +0100 Subject: [PATCH 0575/1894] sched: Optimize finish_lock_switch() The kernel test robot measured a -1.6% performance regression on will-it-scale/sched_yield due to commit: 2558aacff858 ("sched/hotplug: Ensure only per-cpu kthreads run during hotplug") Even though we were careful to replace a single load with another single load from the same cacheline. Restore finish_lock_switch() to the exact state before the offending patch and solve the problem differently. Fixes: 2558aacff858 ("sched/hotplug: Ensure only per-cpu kthreads run during hotplug") Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201210161408.GX3021@hirez.programming.kicks-ass.net --- kernel/sched/core.c | 40 +++++++++++++++------------------------- kernel/sched/sched.h | 13 +++++-------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7af80c3fce12b..0ca7d2dc16d58 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3985,15 +3985,20 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) } } +static void balance_push(struct rq *rq); + +struct callback_head balance_push_callback = { + .next = NULL, + .func = (void (*)(struct callback_head *))balance_push, +}; + static inline struct callback_head *splice_balance_callbacks(struct rq *rq) { struct callback_head *head = rq->balance_callback; lockdep_assert_held(&rq->lock); - if (head) { + if (head) rq->balance_callback = NULL; - rq->balance_flags &= ~BALANCE_WORK; - } return head; } @@ -4014,21 +4019,6 @@ static inline void balance_callbacks(struct rq *rq, struct callback_head *head) } } -static void balance_push(struct rq *rq); - -static inline void balance_switch(struct rq *rq) -{ - if (likely(!rq->balance_flags)) - return; - - if (rq->balance_flags & BALANCE_PUSH) { - balance_push(rq); - return; - } - - __balance_callbacks(rq); -} - #else static inline void __balance_callbacks(struct rq *rq) @@ -4044,10 +4034,6 @@ static inline void balance_callbacks(struct rq *rq, struct callback_head *head) { } -static inline void balance_switch(struct rq *rq) -{ -} - #endif static inline void @@ -4075,7 +4061,7 @@ static inline void finish_lock_switch(struct rq *rq) * prev into current: */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - balance_switch(rq); + __balance_callbacks(rq); raw_spin_unlock_irq(&rq->lock); } @@ -7256,6 +7242,10 @@ static void balance_push(struct rq *rq) lockdep_assert_held(&rq->lock); SCHED_WARN_ON(rq->cpu != smp_processor_id()); + /* + * Ensure the thing is persistent until balance_push_set(.on = false); + */ + rq->balance_callback = &balance_push_callback; /* * Both the cpu-hotplug and stop task are in this case and are @@ -7305,9 +7295,9 @@ static void balance_push_set(int cpu, bool on) rq_lock_irqsave(rq, &rf); if (on) - rq->balance_flags |= BALANCE_PUSH; + rq->balance_callback = &balance_push_callback; else - rq->balance_flags &= ~BALANCE_PUSH; + rq->balance_callback = NULL; rq_unlock_irqrestore(rq, &rf); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f5acb6c5ce493..12ada79d40f33 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -975,7 +975,6 @@ struct rq { unsigned long cpu_capacity_orig; struct callback_head *balance_callback; - unsigned char balance_flags; unsigned char nohz_idle_balance; unsigned char idle_balance; @@ -1226,6 +1225,8 @@ struct rq_flags { #endif }; +extern struct callback_head balance_push_callback; + /* * Lockdep annotation that avoids accidental unlocks; it's like a * sticky/continuous lockdep_assert_held(). @@ -1243,9 +1244,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) #ifdef CONFIG_SCHED_DEBUG rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); rf->clock_update_flags = 0; -#endif #ifdef CONFIG_SMP - SCHED_WARN_ON(rq->balance_callback); + SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback); +#endif #endif } @@ -1408,9 +1409,6 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p) #ifdef CONFIG_SMP -#define BALANCE_WORK 0x01 -#define BALANCE_PUSH 0x02 - static inline void queue_balance_callback(struct rq *rq, struct callback_head *head, @@ -1418,13 +1416,12 @@ queue_balance_callback(struct rq *rq, { lockdep_assert_held(&rq->lock); - if (unlikely(head->next || (rq->balance_flags & BALANCE_PUSH))) + if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) return; head->func = (void (*)(struct callback_head *))func; head->next = rq->balance_callback; rq->balance_callback = head; - rq->balance_flags |= BALANCE_WORK; } #define rcu_dereference_check_sched_domain(p) \ -- GitLab From fe6ce6c394fb1ef1d8a6384c5180e70893157f22 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:05:11 +0200 Subject: [PATCH 0576/1894] MAINTAINERS: Update email address for TI ASoC and twl4030 codec drivers My employment with TI is coming to an end, it is my intention to look after the drivers I have worked with over the years. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215130512.8753-2-peter.ujfalusi@ti.com Signed-off-by: Mark Brown --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3da6d8c154e48..666b76a634a8d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12644,7 +12644,7 @@ F: include/misc/ocxl* F: include/uapi/misc/ocxl.h OMAP AUDIO SUPPORT -M: Peter Ujfalusi +M: Peter Ujfalusi M: Jarkko Nikula L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: linux-omap@vger.kernel.org @@ -17280,7 +17280,7 @@ F: arch/xtensa/ F: drivers/irqchip/irq-xtensa-* TEXAS INSTRUMENTS ASoC DRIVERS -M: Peter Ujfalusi +M: Peter Ujfalusi L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: sound/soc/ti/ @@ -17571,7 +17571,7 @@ F: Documentation/devicetree/bindings/net/nfc/trf7970a.txt F: drivers/nfc/trf7970a.c TI TWL4030 SERIES SOC CODEC DRIVER -M: Peter Ujfalusi +M: Peter Ujfalusi L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: sound/soc/codecs/twl4030* -- GitLab From 61fc03b6512b18f27a25002426d595f5a36645ed Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:05:12 +0200 Subject: [PATCH 0577/1894] ASoC: dt-bindings: ti, j721e: Update maintainer and author information My employment with TI is coming to an end, add the copyright and author comments as they due and change the maintainer mail address. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215130512.8753-3-peter.ujfalusi@ti.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/ti,j721e-cpb-audio.yaml | 4 +++- .../devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml index 805da4d6a88ed..ec06789b21dfc 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/sound/ti,j721e-cpb-audio.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments J721e Common Processor Board Audio Support maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The audio support on the board is using pcm3168a codec connected to McASP10 diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml index bb780f6216287..ee9f960de36b7 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/sound/ti,j721e-cpb-ivi-audio.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments J721e Common Processor Board Audio Support maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Infotainment board plugs into the Common Processor Board, the support of the -- GitLab From 5c6679b5cb120f07652418524ab186ac47680b49 Mon Sep 17 00:00:00 2001 From: Thomas Hebb Date: Sat, 12 Dec 2020 17:20:12 -0800 Subject: [PATCH 0578/1894] ASoC: dapm: remove widget from dirty list on free A widget's "dirty" list_head, much like its "list" list_head, eventually chains back to a list_head on the snd_soc_card itself. This means that the list can stick around even after the widget (or all widgets) have been freed. Currently, however, widgets that are in the dirty list when freed remain there, corrupting the entire list and leading to memory errors and undefined behavior when the list is next accessed or modified. I encountered this issue when a component failed to probe relatively late in snd_soc_bind_card(), causing it to bail out and call soc_cleanup_card_resources(), which eventually called snd_soc_dapm_free() with widgets that were still dirty from when they'd been added. Fixes: db432b414e20 ("ASoC: Do DAPM power checks only for widgets changed since last run") Cc: stable@vger.kernel.org Signed-off-by: Thomas Hebb Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/f8b5f031d50122bf1a9bfc9cae046badf4a7a31a.1607822410.git.tommyhebb@gmail.com Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 9f0c86cbdcca2..2b75d0139e478 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2486,6 +2486,7 @@ void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w) enum snd_soc_dapm_direction dir; list_del(&w->list); + list_del(&w->dirty); /* * remove source and sink paths associated to this widget. * While removing the path, remove reference to it from both -- GitLab From b2ce5dbc15819ea4bef47dbd368239cb1e965158 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 19 Nov 2020 20:54:11 +0530 Subject: [PATCH 0579/1894] perf test: Fix metric parsing test Commit e1c92a7fbbc5 ("perf tests: Add another metric parsing test") add another test for metric parsing. The test goes through all metrics compiled for arch within pmu events and try to parse them. Right now this test is failing in powerpc machine. Result in power9 platform: [command]# ./perf test 10 10: PMU events : 10.1: PMU event table sanity : Ok 10.2: PMU event map aliases : Ok 10.3: Parsing of PMU event table metrics : Skip (some metrics failed) 10.4: Parsing of PMU event table metrics with fake PMUs : FAILED! Issue is we are passing different runtime parameter value in "expr__find_other" and "expr__parse" function which is called from function `metric_parse_fake`. And because of this parsing of hv-24x7 metrics is failing. [command]# ./perf test 10 -vv ..... hv_24x7/pm_mcs01_128b_rd_disp_port01,chip=1/ not found expr__parse failed test child finished with -1 ---- end ---- PMU events subtest 4: FAILED! This patch fix this issue and change runtime parameter value to '0' in expr__parse function. Result in power9 platform after this patch: [command]# ./perf test 10 10: PMU events : 10.1: PMU event table sanity : Ok 10.2: PMU event map aliases : Ok 10.3: Parsing of PMU event table metrics : Skip (some metrics failed) 10.4: Parsing of PMU event table metrics with fake PMUs : Ok Fixes: e1c92a7fbbc5 ("perf tests: Add another metric parsing test") Signed-off-by: Kajol Jain Acked-by: Ian Rogers Acked-by: Jiri Olsa Cc: Madhavan Srinivasan Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20201119152411.46041-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index ad2b21591275b..0ca6a5a535237 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -575,7 +575,7 @@ static int metric_parse_fake(const char *str) } } - if (expr__parse(&result, &ctx, str, 1)) + if (expr__parse(&result, &ctx, str, 0)) pr_err("expr__parse failed\n"); else ret = 0; -- GitLab From a313357e704f2617f298333e3e617a38b1719760 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:37 +0100 Subject: [PATCH 0580/1894] genirq: Move irq_has_action() into core code This function uses irq_to_desc() and is going to be used by modules to replace the open coded irq_to_desc() (ab)usage. The final goal is to remove the export of irq_to_desc() so driver cannot fiddle with it anymore. Move it into the core code and fixup the usage sites to include the proper header. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.548936472@linutronix.de --- arch/alpha/kernel/sys_jensen.c | 2 +- arch/x86/kernel/topology.c | 1 + include/linux/interrupt.h | 1 + include/linux/irqdesc.h | 7 +------ kernel/irq/manage.c | 17 +++++++++++++++++ 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 0a2ab6cb18db4..e5d870ff225f4 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -7,7 +7,7 @@ * * Code supporting the Jensen. */ - +#include #include #include #include diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 0a2ec801b63fc..f5477eab5692b 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -25,6 +25,7 @@ * * Send feedback to */ +#include #include #include #include diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 870b3251e174e..bb8ff9083e7db 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -232,6 +232,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); # define local_irq_enable_in_hardirq() local_irq_enable() #endif +bool irq_has_action(unsigned int irq); extern void disable_irq_nosync(unsigned int irq); extern bool disable_hardirq(unsigned int irq); extern void disable_irq(unsigned int irq); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5745491303e03..385a4fafe6311 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -179,12 +179,7 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, /* Test to see if a driver has successfully requested an irq */ static inline int irq_desc_has_action(struct irq_desc *desc) { - return desc->action != NULL; -} - -static inline int irq_has_action(unsigned int irq) -{ - return irq_desc_has_action(irq_to_desc(irq)); + return desc && desc->action != NULL; } /** diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c826ba4141fef..a5a1cde5c1a28 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2822,3 +2822,20 @@ out_unlock: return err; } EXPORT_SYMBOL_GPL(irq_set_irqchip_state); + +/** + * irq_has_action - Check whether an interrupt is requested + * @irq: The linux irq number + * + * Returns: A snapshot of the current state + */ +bool irq_has_action(unsigned int irq) +{ + bool res; + + rcu_read_lock(); + res = irq_desc_has_action(irq_to_desc(irq)); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_has_action); -- GitLab From fdd029630434b434b127efc7fba337da28f45658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:38 +0100 Subject: [PATCH 0581/1894] genirq: Move status flag checks to core These checks are used by modules and prevent the removal of the export of irq_to_desc(). Move the accessor into the core. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.703779349@linutronix.de --- include/linux/irqdesc.h | 17 +++++------------ kernel/irq/manage.c | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 385a4fafe6311..308d7db8991f3 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -223,28 +223,21 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); + static inline bool irq_balancing_disabled(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; + return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); } static inline bool irq_is_percpu(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU; + return irq_check_status_bit(irq, IRQ_PER_CPU); } static inline bool irq_is_percpu_devid(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU_DEVID; + return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } static inline void diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a5a1cde5c1a28..ab8567f32501f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2839,3 +2839,23 @@ bool irq_has_action(unsigned int irq) return res; } EXPORT_SYMBOL_GPL(irq_has_action); + +/** + * irq_check_status_bit - Check whether bits in the irq descriptor status are set + * @irq: The linux irq number + * @bitmask: The bitmask to evaluate + * + * Returns: True if one of the bits in @bitmask is set + */ +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask) +{ + struct irq_desc *desc; + bool res = false; + + rcu_read_lock(); + desc = irq_to_desc(irq); + if (desc) + res = !!(desc->status_use_accessors & bitmask); + rcu_read_unlock(); + return res; +} -- GitLab From f1c6306c0d6b50844ba02c8a53e35405e9c0db05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:39 +0100 Subject: [PATCH 0582/1894] genirq: Move irq_set_lockdep_class() to core irq_set_lockdep_class() is used from modules and requires irq_to_desc() to be exported. Move it into the core code which lifts another requirement for the export. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.860029489@linutronix.de --- include/linux/irqdesc.h | 10 ++++------ kernel/irq/irqdesc.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 308d7db8991f3..4a1d016716f4e 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -240,16 +240,14 @@ static inline bool irq_is_percpu_devid(unsigned int irq) return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } +void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class); static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class) { - struct irq_desc *desc = irq_to_desc(irq); - - if (desc) { - lockdep_set_class(&desc->lock, lock_class); - lockdep_set_class(&desc->request_mutex, request_class); - } + if (IS_ENABLED(CONFIG_LOCKDEP)) + __irq_set_lockdep_class(irq, lock_class, request_class); } #endif diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index e810eb9906ea1..20a54fa7cd302 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -968,3 +968,17 @@ unsigned int kstat_irqs_usr(unsigned int irq) rcu_read_unlock(); return sum; } + +#ifdef CONFIG_LOCKDEP +void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } +} +EXPORT_SYMBOL_GPL(__irq_set_lockdep_class); +#endif -- GitLab From 3e2380123fb96987ce958f623207010c667ffa7c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:40 +0100 Subject: [PATCH 0583/1894] genirq: Provide irq_get_effective_affinity() Provide an accessor to the effective interrupt affinity mask. Going to be used to replace open coded fiddling with the irq descriptor. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.967177918@linutronix.de --- include/linux/irq.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index c332871d59da9..4aeb1c4c7e07b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -906,6 +906,13 @@ struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) } #endif +static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + + return d ? irq_data_get_effective_affinity_mask(d) : NULL; +} + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, -- GitLab From 9e42ad10cedf0632fc39860381375806092212bd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:41 +0100 Subject: [PATCH 0584/1894] genirq: Annotate irq stats data races Both the per cpu stats and the accumulated count are accessed lockless and can be concurrently modified. That's intentional and the stats are a rough estimate anyway. Annotate them with data_race(). Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194043.067097663@linutronix.de --- kernel/irq/irqdesc.c | 4 ++-- kernel/irq/proc.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 20a54fa7cd302..02b446a21ce64 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -943,10 +943,10 @@ unsigned int kstat_irqs(unsigned int irq) if (!irq_settings_is_per_cpu_devid(desc) && !irq_settings_is_per_cpu(desc) && !irq_is_nmi(desc)) - return desc->tot_count; + return data_race(desc->tot_count); for_each_possible_cpu(cpu) - sum += *per_cpu_ptr(desc->kstat_irqs, cpu); + sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); return sum; } diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 72513ed2a5fc6..98138788cb04d 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -488,9 +488,10 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc || irq_settings_is_hidden(desc)) goto outsparse; - if (desc->kstat_irqs) + if (desc->kstat_irqs) { for_each_online_cpu(j) - any_count |= *per_cpu_ptr(desc->kstat_irqs, j); + any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j)); + } if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) goto outsparse; -- GitLab From bb0e5192f59875031a0ad060bef2ea0f6c657474 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:42 +0100 Subject: [PATCH 0585/1894] parisc/irq: Simplify irq count output for /proc/interrupts The SMP variant works perfectly fine on UP as well. Signed-off-by: Thomas Gleixner Cc: linux-parisc@vger.kernel.org Link: https://lore.kernel.org/r/20201210194043.172893840@linutronix.de --- arch/parisc/kernel/irq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index e76c866199493..15c6207cf3d4e 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -216,12 +216,9 @@ int show_interrupts(struct seq_file *p, void *v) if (!action) goto skip; seq_printf(p, "%3d: ", i); -#ifdef CONFIG_SMP + for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#else - seq_printf(p, "%10u ", kstat_irqs(i)); -#endif seq_printf(p, " %14s", irq_desc_get_chip(desc)->name); #ifndef PARISC_IRQ_CR16_COUNTS -- GitLab From 26c19d0a8610fb233b31730fe26a31145f2d9796 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:43 +0100 Subject: [PATCH 0586/1894] genirq: Make kstat_irqs() static No more users outside the core code. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194043.268774449@linutronix.de --- include/linux/kernel_stat.h | 1 - kernel/irq/irqdesc.c | 19 ++++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 89f0745c096d4..44ae1a7eb9e39 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -67,7 +67,6 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ -extern unsigned int kstat_irqs(unsigned int irq); extern unsigned int kstat_irqs_usr(unsigned int irq); /* diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 02b446a21ce64..2eb076f4a5660 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -924,15 +924,7 @@ static bool irq_is_nmi(struct irq_desc *desc) return desc->istate & IRQS_NMI; } -/** - * kstat_irqs - Get the statistics for an interrupt - * @irq: The interrupt number - * - * Returns the sum of interrupt counts on all cpus since boot for - * @irq. The caller must ensure that the interrupt is not removed - * concurrently. - */ -unsigned int kstat_irqs(unsigned int irq) +static unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned int sum = 0; @@ -951,13 +943,14 @@ unsigned int kstat_irqs(unsigned int irq) } /** - * kstat_irqs_usr - Get the statistics for an interrupt + * kstat_irqs_usr - Get the statistics for an interrupt from thread context * @irq: The interrupt number * * Returns the sum of interrupt counts on all cpus since boot for @irq. - * Contrary to kstat_irqs() this can be called from any context. - * It uses rcu since a concurrent removal of an interrupt descriptor is - * observing an rcu grace period before delayed_free_desc()/irq_kobj_release(). + * + * It uses rcu to protect the access since a concurrent removal of an + * interrupt descriptor is observing an rcu grace period before + * delayed_free_desc()/irq_kobj_release(). */ unsigned int kstat_irqs_usr(unsigned int irq) { -- GitLab From 501e2db67fa4264b517de5c7934e94cca89b3a1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:44 +0100 Subject: [PATCH 0587/1894] genirq: Provide kstat_irqdesc_cpu() Most users of kstat_irqs_cpu() have the irq descriptor already. No point in calling into the core code and looking it up once more. Use it in per_cpu_count_show() to start with. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194043.362094758@linutronix.de --- include/linux/irqdesc.h | 6 ++++++ kernel/irq/irqdesc.c | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 4a1d016716f4e..891b323266dfc 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -113,6 +113,12 @@ static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif +static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, + unsigned int cpu) +{ + return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; +} + static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { return container_of(data->common, struct irq_desc, irq_common_data); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2eb076f4a5660..f509c4db20293 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -147,12 +147,12 @@ static ssize_t per_cpu_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - int cpu, irq = desc->irq_data.irq; ssize_t ret = 0; char *p = ""; + int cpu; for_each_possible_cpu(cpu) { - unsigned int c = kstat_irqs_cpu(irq, cpu); + unsigned int c = irq_desc_kstat_cpu(desc, cpu); ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c); p = ","; -- GitLab From 88c637748e3176dcfaa36185e5eaafe6098d43e0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:45 +0100 Subject: [PATCH 0588/1894] ARM: smp: Use irq_desc_kstat_cpu() in show_ipi_list() The irq descriptor is already there, no need to look it up again. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20201210194043.454288890@linutronix.de --- arch/arm/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 48099c6e1e4a6..e66c46aba5b44 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -550,7 +550,7 @@ void show_ipi_list(struct seq_file *p, int prec) seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); seq_printf(p, " %s\n", ipi_types[i]); } -- GitLab From 5089bc51f81f05ad7f0e46db2107be2311343852 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:46 +0100 Subject: [PATCH 0589/1894] arm64/smp: Use irq_desc_kstat_cpu() in arch_show_interrupts() The irq descriptor is already there, no need to look it up again. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20201210194043.546326568@linutronix.de --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 18e9727d3f645..b2e5dc11abedb 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -810,7 +810,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); seq_printf(p, " %s\n", ipi_types[i]); } -- GitLab From 7435248e6d66e4e853da093c939c28a9f4b92765 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:47 +0100 Subject: [PATCH 0590/1894] parisc/irq: Use irq_desc_kstat_cpu() in show_interrupts() The irq descriptor is already there, no need to look it up again. Signed-off-by: Thomas Gleixner Cc: linux-parisc@vger.kernel.org Link: https://lore.kernel.org/r/20201210194043.659522455@linutronix.de --- arch/parisc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 15c6207cf3d4e..49cd6d2caefb7 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -218,7 +218,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%3d: ", i); for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, j)); seq_printf(p, " %14s", irq_desc_get_chip(desc)->name); #ifndef PARISC_IRQ_CR16_COUNTS -- GitLab From ba22d0ede31779485f0d86d7dcf51387ba810a17 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:48 +0100 Subject: [PATCH 0591/1894] s390/irq: Use irq_desc_kstat_cpu() in show_msi_interrupt() The irq descriptor is already there, no need to look it up again. Signed-off-by: Thomas Gleixner Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20201210194043.769108348@linutronix.de --- arch/s390/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 3514420f02599..f8a8b9428ae27 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -124,7 +124,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq) raw_spin_lock_irqsave(&desc->lock, flags); seq_printf(p, "%3d: ", irq); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu)); if (desc->irq_data.chip) seq_printf(p, " %8s", desc->irq_data.chip->name); -- GitLab From 3afba095158269c281c49518f49da5a702878919 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:49 +0100 Subject: [PATCH 0592/1894] drm/i915/lpe_audio: Remove pointless irq_to_desc() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing uses the result and nothing should ever use it in driver code. Signed-off-by: Thomas Gleixner Reviewed-by: Ville Syrjälä Acked-by: Jani Nikula Link: https://lore.kernel.org/r/20201210194043.862572239@linutronix.de --- drivers/gpu/drm/i915/display/intel_lpe_audio.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index ad5cc13037ae1..1c939f9c9bc9e 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -297,13 +297,9 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv) */ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) { - struct irq_desc *desc; - if (!HAS_LPE_AUDIO(dev_priv)) return; - desc = irq_to_desc(dev_priv->lpe_audio.irq); - lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); -- GitLab From 9c6508b9d2091d14a8fde5d478e19e053bf46552 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:50 +0100 Subject: [PATCH 0593/1894] drm/i915/pmu: Replace open coded kstat_irqs() copy Driver code has no business with the internals of the irq descriptor. Aside of that the count is per interrupt line and therefore takes interrupts from other devices into account which share the interrupt line and are not handled by the graphics driver. Replace it with a pmu private count which only counts interrupts which originate from the graphics card. To avoid atomics or heuristics of some sort make the counter field 'unsigned long'. That limits the count to 4e9 on 32bit which is a lot and postprocessing can easily deal with the occasional wraparound. Signed-off-by: Thomas Gleixner Acked-by: Jani Nikula Cc: Tvrtko Ursulin Link: https://lore.kernel.org/r/20201210194043.957046529@linutronix.de --- drivers/gpu/drm/i915/i915_irq.c | 34 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_pmu.c | 19 +----------------- drivers/gpu/drm/i915/i915_pmu.h | 8 ++++++++ 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 759f523c6a6bf..e741cd7f7fc66 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -60,6 +60,24 @@ * and related files, but that will be described in separate chapters. */ +/* + * Interrupt statistic for PMU. Increments the counter only if the + * interrupt originated from the the GPU so interrupts from a device which + * shares the interrupt line are not accounted. + */ +static inline void pmu_irq_stats(struct drm_i915_private *i915, + irqreturn_t res) +{ + if (unlikely(res != IRQ_HANDLED)) + return; + + /* + * A clever compiler translates that into INC. A not so clever one + * should at least prevent store tearing. + */ + WRITE_ONCE(i915->pmu.irq_count, i915->pmu.irq_count + 1); +} + typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val); static const u32 hpd_ilk[HPD_NUM_PINS] = { @@ -1599,6 +1617,8 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) valleyview_pipestat_irq_handler(dev_priv, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -1676,6 +1696,8 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) valleyview_pipestat_irq_handler(dev_priv, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -2103,6 +2125,8 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) if (sde_ier) raw_reg_write(regs, SDEIER, sde_ier); + pmu_irq_stats(i915, ret); + /* IRQs are synced during runtime_suspend, we don't require a wakeref */ enable_rpm_wakeref_asserts(&i915->runtime_pm); @@ -2419,6 +2443,8 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) gen8_master_intr_enable(regs); + pmu_irq_stats(dev_priv, IRQ_HANDLED); + return IRQ_HANDLED; } @@ -2514,6 +2540,8 @@ __gen11_irq_handler(struct drm_i915_private * const i915, gen11_gu_misc_irq_handler(gt, gu_misc_iir); + pmu_irq_stats(i915, IRQ_HANDLED); + return IRQ_HANDLED; } @@ -3688,6 +3716,8 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -3796,6 +3826,8 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) i915_pipestat_irq_handler(dev_priv, iir, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -3941,6 +3973,8 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) i965_pipestat_irq_handler(dev_priv, iir, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, IRQ_HANDLED); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 69c0fa20eba17..3b88cb01b4da7 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -4,7 +4,6 @@ * Copyright © 2017-2018 Intel Corporation */ -#include #include #include "gt/intel_engine.h" @@ -423,22 +422,6 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static u64 count_interrupts(struct drm_i915_private *i915) -{ - /* open-coded kstat_irqs() */ - struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); - u64 sum = 0; - int cpu; - - if (!desc || !desc->kstat_irqs) - return 0; - - for_each_possible_cpu(cpu) - sum += *per_cpu_ptr(desc->kstat_irqs, cpu); - - return sum; -} - static void i915_pmu_event_destroy(struct perf_event *event) { struct drm_i915_private *i915 = @@ -581,7 +564,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event) USEC_PER_SEC /* to MHz */); break; case I915_PMU_INTERRUPTS: - val = count_interrupts(i915); + val = READ_ONCE(pmu->irq_count); break; case I915_PMU_RC6_RESIDENCY: val = get_rc6(&i915->gt); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 941f0c14037ca..9e49c64907806 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -107,6 +107,14 @@ struct i915_pmu { * @sleep_last: Last time GT parked for RC6 estimation. */ ktime_t sleep_last; + /** + * @irq_count: Number of interrupts + * + * Intentionally unsigned long to avoid atomics or heuristics on 32bit. + * 4e9 interrupts are a lot and postprocessing can really deal with an + * occasional wraparound easily. It's 32bit after all. + */ + unsigned long irq_count; /** * @events_attr_group: Device events attribute group. */ -- GitLab From f3925032d7fd4aa627ff10e780430269b3829f83 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:51 +0100 Subject: [PATCH 0594/1894] pinctrl: nomadik: Use irq_has_action() Let the core code do the fiddling with irq_desc. Signed-off-by: Thomas Gleixner Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20201210194044.065003856@linutronix.de --- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 657e35a75d84a..d4ea10803fd90 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -948,8 +948,8 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, (mode < 0) ? "unknown" : modes[mode]); } else { int irq = chip->to_irq(chip, offset); - struct irq_desc *desc = irq_to_desc(irq); const int pullidx = pull ? 1 : 0; + bool wake; int val; static const char * const pulls[] = { "none ", @@ -969,8 +969,9 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, * This races with request_irq(), set_irq_type(), * and set_irq_wake() ... but those are "rare". */ - if (irq > 0 && desc && desc->action) { + if (irq > 0 && irq_has_action(irq)) { char *trigger; + bool wake; if (nmk_chip->edge_rising & BIT(offset)) trigger = "edge-rising"; @@ -979,10 +980,10 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, else trigger = "edge-undefined"; + wake = !!(nmk_chip->real_wake & BIT(offset)); + seq_printf(s, " irq-%d %s%s", - irq, trigger, - irqd_is_wakeup_set(&desc->irq_data) - ? " wakeup" : ""); + irq, trigger, wake ? " wakeup" : ""); } } clk_disable(nmk_chip->clk); -- GitLab From 886c8121659dddb6dbfab4cdeb58d75e2d928731 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:52 +0100 Subject: [PATCH 0595/1894] mfd: ab8500-debugfs: Remove the racy fiddling with irq_desc First of all drivers have absolutely no business to dig into the internals of an irq descriptor. That's core code and subject to change. All of this information is readily available to /proc/interrupts in a safe and race free way. Remove the inspection code which is a blatant violation of subsystem boundaries and racy against concurrent modifications of the interrupt descriptor. Print the irq line instead so the information can be looked up in a sane way in /proc/interrupts. Signed-off-by: Thomas Gleixner Reviewed-by: Linus Walleij Acked-by: Lee Jones Link: https://lore.kernel.org/r/20201210194044.157283633@linutronix.de --- drivers/mfd/ab8500-debugfs.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index 6d1bf7c3ca3b1..a32039366a93a 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1513,24 +1513,14 @@ static int ab8500_interrupts_show(struct seq_file *s, void *p) { int line; - seq_puts(s, "name: number: number of: wake:\n"); + seq_puts(s, "name: number: irq: number of: wake:\n"); for (line = 0; line < num_interrupt_lines; line++) { - struct irq_desc *desc = irq_to_desc(line + irq_first); - - seq_printf(s, "%3i: %6i %4i", + seq_printf(s, "%3i: %4i %6i %4i\n", line, + line + irq_first, num_interrupts[line], num_wake_interrupts[line]); - - if (desc && desc->name) - seq_printf(s, "-%-8s", desc->name); - if (desc && desc->action) { - struct irqaction *action = desc->action; - - seq_printf(s, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(s, ", %s", action->name); } seq_putc(s, '\n'); } -- GitLab From 1110918e439fde69fdf2fe869f6499d56157fec9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:53 +0100 Subject: [PATCH 0596/1894] NTB/msi: Use irq_has_action() Use the proper core function. Signed-off-by: Thomas Gleixner Reviewed-by: Logan Gunthorpe Link: https://lore.kernel.org/r/20201210194044.255887860@linutronix.de --- drivers/ntb/msi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c index 0a5e884a920c0..3f05cfbc73afb 100644 --- a/drivers/ntb/msi.c +++ b/drivers/ntb/msi.c @@ -282,15 +282,13 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, struct ntb_msi_desc *msi_desc) { struct msi_desc *entry; - struct irq_desc *desc; int ret; if (!ntb->msi) return -EINVAL; for_each_pci_msi_entry(entry, ntb->pdev) { - desc = irq_to_desc(entry->irq); - if (desc->action) + if (irq_has_action(entry->irq)) continue; ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler, -- GitLab From e56427068a8d796bb7b8e297f2b6e947380e383f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:54 +0100 Subject: [PATCH 0597/1894] PCI: xilinx-nwl: Use irq_data_get_irq_chip_data() Going through a full irq descriptor lookup instead of just using the proper helper function which provides direct access is suboptimal. In fact it _is_ wrong because the chip callback needs to get the chip data which is relevant for the chip while using the irq descriptor variant returns the irq chip data of the top level chip of a hierarchy. It does not matter in this case because the chip is the top level chip, but that doesn't make it more correct. Signed-off-by: Thomas Gleixner Reviewed-by: Rob Herring Cc: Bjorn Helgaas Link: https://lore.kernel.org/r/20201210194044.364211860@linutronix.de --- drivers/pci/controller/pcie-xilinx-nwl.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index f3cf7d61924f1..22135df79d83b 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -379,13 +379,11 @@ static void nwl_pcie_msi_handler_low(struct irq_desc *desc) static void nwl_mask_leg_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct nwl_pcie *pcie; + struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); unsigned long flags; u32 mask; u32 val; - pcie = irq_desc_get_chip_data(desc); mask = 1 << (data->hwirq - 1); raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); val = nwl_bridge_readl(pcie, MSGF_LEG_MASK); @@ -395,13 +393,11 @@ static void nwl_mask_leg_irq(struct irq_data *data) static void nwl_unmask_leg_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct nwl_pcie *pcie; + struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); unsigned long flags; u32 mask; u32 val; - pcie = irq_desc_get_chip_data(desc); mask = 1 << (data->hwirq - 1); raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); val = nwl_bridge_readl(pcie, MSGF_LEG_MASK); -- GitLab From b8fecfdfb08dcbabf3d46cfaf7c2fed0e6802ce8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:55 +0100 Subject: [PATCH 0598/1894] PCI: mobiveil: Use irq_data_get_irq_chip_data() Going through a full irq descriptor lookup instead of just using the proper helper function which provides direct access is suboptimal. In fact it _is_ wrong because the chip callback needs to get the chip data which is relevant for the chip while using the irq descriptor variant returns the irq chip data of the top level chip of a hierarchy. It does not matter in this case because the chip is the top level chip, but that doesn't make it more correct. Signed-off-by: Thomas Gleixner Reviewed-by: Rob Herring Cc: Bjorn Helgaas Link: https://lore.kernel.org/r/20201210194044.473308721@linutronix.de --- drivers/pci/controller/mobiveil/pcie-mobiveil-host.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c index a2632d02ce8f8..c637de3a389bc 100644 --- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c +++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c @@ -306,13 +306,11 @@ int mobiveil_host_init(struct mobiveil_pcie *pcie, bool reinit) static void mobiveil_mask_intx_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct mobiveil_pcie *pcie; + struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data); struct mobiveil_root_port *rp; unsigned long flags; u32 mask, shifted_val; - pcie = irq_desc_get_chip_data(desc); rp = &pcie->rp; mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); raw_spin_lock_irqsave(&rp->intx_mask_lock, flags); @@ -324,13 +322,11 @@ static void mobiveil_mask_intx_irq(struct irq_data *data) static void mobiveil_unmask_intx_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct mobiveil_pcie *pcie; + struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data); struct mobiveil_root_port *rp; unsigned long flags; u32 shifted_val, mask; - pcie = irq_desc_get_chip_data(desc); rp = &pcie->rp; mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); raw_spin_lock_irqsave(&rp->intx_mask_lock, flags); -- GitLab From 80a62deedf9d449cb65655df39d34b7ef9321d79 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:56 +0100 Subject: [PATCH 0599/1894] net/mlx4: Replace irq_to_desc() abuse No driver has any business with the internals of an interrupt descriptor. Storing a pointer to it just to use yet another helper at the actual usage site to retrieve the affinity mask is creative at best. Just because C does not allow encapsulation does not mean that the kernel has no limits. Retrieve a pointer to the affinity mask itself and use that. It's still using an interface which is usually not for random drivers, but definitely less hideous than the previous hack. Signed-off-by: Thomas Gleixner Cc: Tariq Toukan Link: https://lore.kernel.org/r/20201210194044.580936243@linutronix.de --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 8 +++----- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 +----- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 ++- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 74d466796b7c7..2a250f32d5cba 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -90,7 +90,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx) { struct mlx4_en_dev *mdev = priv->mdev; - int err = 0; + int irq, err = 0; int timestamp_en = 0; bool assigned_eq = false; @@ -116,10 +116,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, assigned_eq = true; } - - cq->irq_desc = - irq_to_desc(mlx4_eq_get_irq(mdev->dev, - cq->vector)); + irq = mlx4_eq_get_irq(mdev->dev, cq->vector); + cq->aff_mask = irq_get_affinity_mask(irq); } else { /* For TX we use the same irq per ring we assigned for the RX */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 502d1b97855ca..399459adee7dc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -959,8 +959,6 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done == budget || !clean_complete) { - const struct cpumask *aff; - struct irq_data *idata; int cpu_curr; /* in case we got here because of !clean_complete */ @@ -969,10 +967,8 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) INC_PERF_COUNTER(priv->pstats.napi_quota); cpu_curr = smp_processor_id(); - idata = irq_desc_get_irq_data(cq->irq_desc); - aff = irq_data_get_affinity_mask(idata); - if (likely(cpumask_test_cpu(cpu_curr, aff))) + if (likely(cpumask_test_cpu(cpu_curr, cq->aff_mask))) return budget; /* Current cpu is not according to smp_irq_affinity - diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a46efe37cfa90..48d71e0c71e4a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -46,6 +46,7 @@ #endif #include #include +#include #include #include @@ -380,7 +381,7 @@ struct mlx4_en_cq { struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e - struct irq_desc *irq_desc; + const struct cpumask *aff_mask; }; struct mlx4_en_port_profile { -- GitLab From 197d237077295793a3e4ea0abcbea106f8b4217c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:57 +0100 Subject: [PATCH 0600/1894] net/mlx4: Use effective interrupt affinity Using the interrupt affinity mask for checking locality is not really working well on architectures which support effective affinity masks. The affinity mask is either the system wide default or set by user space, but the architecture can or even must reduce the mask to the effective set, which means that checking the affinity mask itself does not really tell about the actual target CPUs. Signed-off-by: Thomas Gleixner Cc: Tariq Toukan Link: https://lore.kernel.org/r/20201210194044.672935978@linutronix.de --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 2a250f32d5cba..d5fc72b1a36fb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -117,7 +117,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, assigned_eq = true; } irq = mlx4_eq_get_irq(mdev->dev, cq->vector); - cq->aff_mask = irq_get_affinity_mask(irq); + cq->aff_mask = irq_get_effective_affinity_mask(irq); } else { /* For TX we use the same irq per ring we assigned for the RX */ -- GitLab From 6e745db4ddd072c7f67b37d850bc5aaedcf35400 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:58 +0100 Subject: [PATCH 0601/1894] net/mlx5: Replace irq_to_desc() abuse No driver has any business with the internals of an interrupt descriptor. Storing a pointer to it just to use yet another helper at the actual usage site to retrieve the affinity mask is creative at best. Just because C does not allow encapsulation does not mean that the kernel has no limits. Retrieve a pointer to the affinity mask itself and use that. It's still using an interface which is usually not for random drivers, but definitely less hideous than the previous hack. Signed-off-by: Thomas Gleixner Cc: Saeed Mahameed Link: https://lore.kernel.org/r/20201210194044.769458162@linutronix.de --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 6 +----- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2f05b0f9de019..45fd585d101b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -669,7 +669,7 @@ struct mlx5e_channel { spinlock_t async_icosq_lock; /* data path - accessed per napi poll */ - struct irq_desc *irq_desc; + const struct cpumask *aff_mask; struct mlx5e_ch_stats *stats; /* control */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ebce97921e03c..4345bc41b2be5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1998,7 +1998,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); + c->aff_mask = irq_get_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index d5868670f8a58..793e313dcb8b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -40,12 +40,8 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { int current_cpu = smp_processor_id(); - const struct cpumask *aff; - struct irq_data *idata; - idata = irq_desc_get_irq_data(c->irq_desc); - aff = irq_data_get_affinity_mask(idata); - return cpumask_test_cpu(current_cpu, aff); + return cpumask_test_cpu(current_cpu, c->aff_mask); } static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) -- GitLab From ec7b37b6f08fac3eb9a733efa3d8eae5c3fb0383 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:59 +0100 Subject: [PATCH 0602/1894] net/mlx5: Use effective interrupt affinity Using the interrupt affinity mask for checking locality is not really working well on architectures which support effective affinity masks. The affinity mask is either the system wide default or set by user space, but the architecture can or even must reduce the mask to the effective set, which means that checking the affinity mask itself does not really tell about the actual target CPUs. Signed-off-by: Thomas Gleixner Cc: Saeed Mahameed Link: https://lore.kernel.org/r/20201210194044.876342330@linutronix.de --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4345bc41b2be5..9bcf73f417d0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1998,7 +1998,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; - c->aff_mask = irq_get_affinity_mask(irq); + c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); -- GitLab From 3bd5371a4da68613fb3d4aaf961ed8244bcbd741 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:00 +0100 Subject: [PATCH 0603/1894] xen/events: Remove unused bind_evtchn_to_irq_lateeoi() Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194044.972064156@linutronix.de --- drivers/xen/events/events_base.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6038c4c35db5a..d6458e439c782 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1132,12 +1132,6 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); -int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) -{ - return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); -} -EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); - static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; -- GitLab From 67473b8194bc3ecc42d60a4f5dc1ed479f28ed6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:01 +0100 Subject: [PATCH 0604/1894] xen/events: Remove disfunct affinity spreading This function can only ever work when the event channels: - are already established - interrupts assigned to them - the affinity has been set by user space already because any newly set up event channel is forced to be bound to CPU0 and the affinity mask of the interrupt is forced to contain cpumask_of(0). As the CPU0 enforcement was in place _before_ this was implemented it's entirely unclear how that can ever have worked at all. Remove it as preparation for doing it proper. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.065115500@linutronix.de --- drivers/xen/events/events_base.c | 9 --------- drivers/xen/evtchn.c | 34 +------------------------------- 2 files changed, 1 insertion(+), 42 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index d6458e439c782..9cade1994785b 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1696,15 +1696,6 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, return ret; } -/* To be called with desc->lock held. */ -int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - - return set_affinity_irq(d, cpumask_of(tcpu), false); -} -EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn); - static void enable_dynirq(struct irq_data *data) { evtchn_port_t evtchn = evtchn_from_irq(data->irq); diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 5dc016d68f833..a7a85719a8c8a 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -421,36 +421,6 @@ static void evtchn_unbind_from_user(struct per_user_data *u, del_evtchn(u, evtchn); } -static DEFINE_PER_CPU(int, bind_last_selected_cpu); - -static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn) -{ - unsigned int selected_cpu, irq; - struct irq_desc *desc; - unsigned long flags; - - irq = irq_from_evtchn(evtchn); - desc = irq_to_desc(irq); - - if (!desc) - return; - - raw_spin_lock_irqsave(&desc->lock, flags); - selected_cpu = this_cpu_read(bind_last_selected_cpu); - selected_cpu = cpumask_next_and(selected_cpu, - desc->irq_common_data.affinity, cpu_online_mask); - - if (unlikely(selected_cpu >= nr_cpu_ids)) - selected_cpu = cpumask_first_and(desc->irq_common_data.affinity, - cpu_online_mask); - - this_cpu_write(bind_last_selected_cpu, selected_cpu); - - /* unmask expects irqs to be disabled */ - xen_set_affinity_evtchn(desc, selected_cpu); - raw_spin_unlock_irqrestore(&desc->lock, flags); -} - static long evtchn_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -508,10 +478,8 @@ static long evtchn_ioctl(struct file *file, break; rc = evtchn_bind_to_user(u, bind_interdomain.local_port); - if (rc == 0) { + if (rc == 0) rc = bind_interdomain.local_port; - evtchn_bind_interdom_next_vcpu(rc); - } break; } -- GitLab From 1ca1b4e2c0cbc88ce3939910ac36dca51d326fe4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:02 +0100 Subject: [PATCH 0605/1894] xen/events: Use immediate affinity setting There is absolutely no reason to mimic the x86 deferred affinity setting. This mechanism is required to handle the hardware induced issues of IO/APIC and MSI and is not in use when the interrupts are remapped. XEN does not need this and can simply change the affinity from the calling context. The core code invokes this with the interrupt descriptor lock held so it is fully serialized against any other operation. Mark the interrupts with IRQ_MOVE_PCNTXT to disable the deferred affinity setting. The conditional mask/unmask operation is already handled in xen_rebind_evtchn_to_cpu(). This makes XEN on x86 use the same mechanics as on e.g. ARM64 where deferred affinity setting is not required and not implemented and the code path in the ack functions is compiled out. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.157601122@linutronix.de --- drivers/xen/events/events_base.c | 35 ++++++++------------------------ 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 9cade1994785b..eaba42a89c8ff 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -628,6 +628,11 @@ static void xen_irq_init(unsigned irq) info->refcnt = -1; set_info_for_irq(irq, info); + /* + * Interrupt affinity setting can be immediate. No point + * in delaying it until an interrupt is handled. + */ + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); @@ -739,18 +744,7 @@ static void eoi_pirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - if (unlikely(irqd_is_setaffinity_pending(data)) && - likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); - - clear_evtchn(evtchn); - - irq_move_masked_irq(data); - - if (!masked) - unmask_evtchn(evtchn); - } else - clear_evtchn(evtchn); + clear_evtchn(evtchn); if (pirq_needs_eoi(data->irq)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -1641,7 +1635,6 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_unlock(&irq_mapping_update_lock); bind_evtchn_to_cpu(evtchn, info->cpu); - /* This will be deferred until interrupt is processed */ irq_set_affinity(irq, cpumask_of(info->cpu)); /* Unmask the event channel. */ @@ -1688,8 +1681,9 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); - int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); + int ret; + ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); if (!ret) irq_data_update_effective_affinity(data, cpumask_of(tcpu)); @@ -1719,18 +1713,7 @@ static void ack_dynirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - if (unlikely(irqd_is_setaffinity_pending(data)) && - likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); - - clear_evtchn(evtchn); - - irq_move_masked_irq(data); - - if (!masked) - unmask_evtchn(evtchn); - } else - clear_evtchn(evtchn); + clear_evtchn(evtchn); } static void mask_ack_dynirq(struct irq_data *data) -- GitLab From f7a6f994b4f0ee69c656dda3da11431d92d6b08f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:03 +0100 Subject: [PATCH 0606/1894] xen/events: Only force affinity mask for percpu interrupts All event channel setups bind the interrupt on CPU0 or the target CPU for percpu interrupts and overwrite the affinity mask with the corresponding cpumask. That does not make sense. The XEN implementation of irqchip::irq_set_affinity() already picks a single target CPU out of the affinity mask and the actual target is stored in the effective CPU mask, so destroying the user chosen affinity mask which might contain more than one CPU is wrong. Change the implementation so that the channel is bound to CPU0 at the XEN level and leave the affinity mask alone. At startup of the interrupt affinity will be assigned out of the affinity mask and the XEN binding will be updated. Only keep the enforcement for real percpu interrupts. On resume the overwrite is not required either because info->cpu and the affinity mask are still the same as at the time of suspend. Same for rebind_evtchn_irq(). This also prepares for proper interrupt spreading. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.250321315@linutronix.de --- drivers/xen/events/events_base.c | 48 ++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index eaba42a89c8ff..679b2cbd2de4c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -433,15 +433,20 @@ static bool pirq_needs_eoi_flag(unsigned irq) return info->u.pirq.flags & PIRQ_NEEDS_EOI; } -static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu) +static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, + bool force_affinity) { int irq = get_evtchn_to_irq(evtchn); struct irq_info *info = info_for_irq(irq); BUG_ON(irq == -1); -#ifdef CONFIG_SMP - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); -#endif + + if (IS_ENABLED(CONFIG_SMP) && force_affinity) { + cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); + cpumask_copy(irq_get_effective_affinity_mask(irq), + cpumask_of(cpu)); + } + xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); info->cpu = cpu; @@ -788,7 +793,7 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; info->evtchn = evtchn; - bind_evtchn_to_cpu(evtchn, 0); + bind_evtchn_to_cpu(evtchn, 0, false); rc = xen_evtchn_port_setup(evtchn); if (rc) @@ -1107,8 +1112,14 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) irq = ret; goto out; } - /* New interdomain events are bound to VCPU 0. */ - bind_evtchn_to_cpu(evtchn, 0); + /* + * New interdomain events are initially bound to vCPU0 This + * is required to setup the event channel in the first + * place and also important for UP guests because the + * affinity setting is not invoked on them so nothing would + * bind the channel. + */ + bind_evtchn_to_cpu(evtchn, 0, false); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_EVTCHN); @@ -1156,7 +1167,11 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq = ret; goto out; } - bind_evtchn_to_cpu(evtchn, cpu); + /* + * Force the affinity mask to the target CPU so proc shows + * the correct target. + */ + bind_evtchn_to_cpu(evtchn, cpu, true); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_IPI); @@ -1269,7 +1284,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) goto out; } - bind_evtchn_to_cpu(evtchn, cpu); + /* + * Force the affinity mask for percpu interrupts so proc + * shows the correct target. + */ + bind_evtchn_to_cpu(evtchn, cpu, percpu); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_VIRQ); @@ -1634,8 +1653,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_unlock(&irq_mapping_update_lock); - bind_evtchn_to_cpu(evtchn, info->cpu); - irq_set_affinity(irq, cpumask_of(info->cpu)); + bind_evtchn_to_cpu(evtchn, info->cpu, false); /* Unmask the event channel. */ enable_irq(irq); @@ -1669,7 +1687,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) * it, but don't do the xenlinux-level rebind in that case. */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu); + bind_evtchn_to_cpu(evtchn, tcpu, false); if (!masked) unmask_evtchn(evtchn); @@ -1798,7 +1816,8 @@ static void restore_cpu_virqs(unsigned int cpu) /* Record the new mapping. */ (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); - bind_evtchn_to_cpu(evtchn, cpu); + /* The affinity mask is still valid */ + bind_evtchn_to_cpu(evtchn, cpu, false); } } @@ -1823,7 +1842,8 @@ static void restore_cpu_ipis(unsigned int cpu) /* Record the new mapping. */ (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); - bind_evtchn_to_cpu(evtchn, cpu); + /* The affinity mask is still valid */ + bind_evtchn_to_cpu(evtchn, cpu, false); } } -- GitLab From 62ebcda8a8dfa4aeaa3288020a082787910afebc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:04 +0100 Subject: [PATCH 0607/1894] xen/events: Reduce irq_info:: Spurious_cnt storage size To prepare for interrupt spreading reduce the storage size of irq_info::spurious_cnt to u8 so the required flag for the spreading logic will not increase the storage size. Protect the usage site against overruns. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.360198201@linutronix.de --- drivers/xen/events/events_base.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 679b2cbd2de4c..b352440963ee6 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -95,7 +95,7 @@ struct irq_info { struct list_head list; struct list_head eoi_list; short refcnt; - short spurious_cnt; + u8 spurious_cnt; enum xen_irq_type type; /* type */ unsigned irq; evtchn_port_t evtchn; /* event channel */ @@ -528,8 +528,10 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) return; if (spurious) { - if ((1 << info->spurious_cnt) < (HZ << 2)) - info->spurious_cnt++; + if ((1 << info->spurious_cnt) < (HZ << 2)) { + if (info->spurious_cnt != 0xFF) + info->spurious_cnt++; + } if (info->spurious_cnt > 1) { delay = 1 << (info->spurious_cnt - 2); if (delay > HZ) -- GitLab From 88f0a9d066443118261adf7e049781476f09dac1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:05 +0100 Subject: [PATCH 0608/1894] xen/events: Implement irq distribution Keep track of the assignments of event channels to CPUs and select the online CPU with the least assigned channels in the affinity mask which is handed to irq_chip::irq_set_affinity() from the core code. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.457218278@linutronix.de --- drivers/xen/events/events_base.c | 76 ++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b352440963ee6..a8030332a1916 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -96,6 +96,7 @@ struct irq_info { struct list_head eoi_list; short refcnt; u8 spurious_cnt; + u8 is_accounted; enum xen_irq_type type; /* type */ unsigned irq; evtchn_port_t evtchn; /* event channel */ @@ -161,6 +162,9 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; +/* Event channel distribution data */ +static atomic_t channels_on_cpu[NR_CPUS]; + static int **evtchn_to_irq; #ifdef CONFIG_X86 static unsigned long *pirq_eoi_map; @@ -257,6 +261,32 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info) irq_set_chip_data(irq, info); } +/* Per CPU channel accounting */ +static void channels_on_cpu_dec(struct irq_info *info) +{ + if (!info->is_accounted) + return; + + info->is_accounted = 0; + + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) + return; + + WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0)); +} + +static void channels_on_cpu_inc(struct irq_info *info) +{ + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) + return; + + if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1, + INT_MAX))) + return; + + info->is_accounted = 1; +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -339,6 +369,7 @@ static void xen_irq_info_cleanup(struct irq_info *info) { set_evtchn_to_irq(info->evtchn, -1); info->evtchn = 0; + channels_on_cpu_dec(info); } /* @@ -449,7 +480,9 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); + channels_on_cpu_dec(info); info->cpu = cpu; + channels_on_cpu_inc(info); } /** @@ -622,11 +655,6 @@ static void xen_irq_init(unsigned irq) { struct irq_info *info; -#ifdef CONFIG_SMP - /* By default all event channels notify CPU#0. */ - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); -#endif - info = kzalloc(sizeof(*info), GFP_KERNEL); if (info == NULL) panic("Unable to allocate metadata for IRQ%d\n", irq); @@ -1697,10 +1725,38 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) return 0; } +/* + * Find the CPU within @dest mask which has the least number of channels + * assigned. This is not precise as the per cpu counts can be modified + * concurrently. + */ +static unsigned int select_target_cpu(const struct cpumask *dest) +{ + unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX; + + for_each_cpu_and(cpu, dest, cpu_online_mask) { + unsigned int curch = atomic_read(&channels_on_cpu[cpu]); + + if (curch < minch) { + minch = curch; + best_cpu = cpu; + } + } + + /* + * Catch the unlikely case that dest contains no online CPUs. Can't + * recurse. + */ + if (best_cpu == UINT_MAX) + return select_target_cpu(cpu_online_mask); + + return best_cpu; +} + static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { - unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); + unsigned int tcpu = select_target_cpu(dest); int ret; ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); @@ -1928,8 +1984,12 @@ void xen_irq_resume(void) xen_evtchn_resume(); /* No IRQ <-> event-channel mappings. */ - list_for_each_entry(info, &xen_irq_list_head, list) - info->evtchn = 0; /* zap event-channel binding */ + list_for_each_entry(info, &xen_irq_list_head, list) { + /* Zap event-channel binding */ + info->evtchn = 0; + /* Adjust accounting */ + channels_on_cpu_dec(info); + } clear_evtchn_to_irq_all(); -- GitLab From 64a1b95bb9fe3ec76e1a2cd803eff06389341ae4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:06 +0100 Subject: [PATCH 0609/1894] genirq: Restrict export of irq_to_desc() No more (ab)use in drivers finally. There is still the modular build of PPC/KVM which needs it, so restrict it to this case which still makes it unavailable for most drivers. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194045.551428291@linutronix.de --- kernel/irq/irqdesc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index f509c4db20293..3d0bc38a0bcfc 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -352,7 +352,9 @@ struct irq_desc *irq_to_desc(unsigned int irq) { return radix_tree_lookup(&irq_desc_tree, irq); } -EXPORT_SYMBOL(irq_to_desc); +#ifdef CONFIG_KVM_BOOK3S_64_HV +EXPORT_SYMBOL_GPL(irq_to_desc); +#endif static void delete_irq_desc(unsigned int irq) { -- GitLab From 8640ca588b032166d6be6b4d3632d565d6d88e89 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 15 Dec 2020 12:44:07 -0500 Subject: [PATCH 0610/1894] KVM: SVM: Add AP_JUMP_TABLE support in prep for AP booting The GHCB specification requires the hypervisor to save the address of an AP Jump Table so that, for example, vCPUs that have been parked by UEFI can be started by the OS. Provide support for the AP Jump Table set/get exit code. Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 28 ++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.h | 1 + 2 files changed, 29 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6eb097714d43f..8b5ef0fe44901 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -18,6 +18,8 @@ #include #include +#include + #include "x86.h" #include "svm.h" #include "cpuid.h" @@ -1559,6 +1561,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; break; case SVM_VMGEXIT_NMI_COMPLETE: + case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: @@ -1883,6 +1886,31 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_NMI_COMPLETE: ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); break; + case SVM_VMGEXIT_AP_JUMP_TABLE: { + struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + + switch (control->exit_info_1) { + case 0: + /* Set AP jump table address */ + sev->ap_jump_table = control->exit_info_2; + break; + case 1: + /* Get AP jump table address */ + ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table); + break; + default: + pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", + control->exit_info_1); + ghcb_set_sw_exit_info_1(ghcb, 1); + ghcb_set_sw_exit_info_2(ghcb, + X86_TRAP_UD | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID); + } + + ret = 1; + break; + } case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(&svm->vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index a5067f776ce0c..5431e6335e2e8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -78,6 +78,7 @@ struct kvm_sev_info { int fd; /* SEV device fd */ unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ + u64 ap_jump_table; /* SEV-ES AP Jump Table address */ }; struct kvm_svm { -- GitLab From bca3e43c903f5c58daeab1fea0af566233ea003c Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Mon, 14 Dec 2020 12:07:40 +0000 Subject: [PATCH 0611/1894] ACPI: processor: fix NONE coordination for domain mapping failure For errors parsing the _PSD domains, a separate domain is returned for each CPU in the failed _PSD domain with no coordination (as per previous comment). But contrary to the intention, the code was setting CPUFREQ_SHARED_TYPE_ALL as coordination type. Change shared_type to CPUFREQ_SHARED_TYPE_NONE in case of errors parsing the domain information. The function still returns the error and the caller is free to bail out the domain initialisation altogether in that case. Given that both functions return domains with a single CPU, this change does not affect the functionality, but clarifies the intention. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- drivers/acpi/processor_perflib.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 7a99b19bb893d..9e335f0d2595f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -511,7 +511,7 @@ err_ret: /* Assume no coordination on any error parsing domain info */ cpumask_clear(pr->shared_cpu_map); cpumask_set_cpu(i, pr->shared_cpu_map); - pr->shared_type = CPUFREQ_SHARED_TYPE_ALL; + pr->shared_type = CPUFREQ_SHARED_TYPE_NONE; } out: free_cpumask_var(covered_cpus); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index b0d320f181637..1338925c93598 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -709,7 +709,7 @@ err_ret: if (retval) { cpumask_clear(pr->performance->shared_cpu_map); cpumask_set_cpu(i, pr->performance->shared_cpu_map); - pr->performance->shared_type = CPUFREQ_SHARED_TYPE_ALL; + pr->performance->shared_type = CPUFREQ_SHARED_TYPE_NONE; } pr->performance = NULL; /* Will be set for real in register */ } -- GitLab From d2641a5c3d5ecaa1078225e493c7fed821715a04 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Mon, 14 Dec 2020 12:38:20 +0000 Subject: [PATCH 0612/1894] cppc_cpufreq: use policy->cpu as driver of frequency setting Considering only the currently supported coordination types (ANY, HW, NONE), this change only makes a difference for the ANY type, when policy->cpu is hotplugged out. In that case the new policy->cpu will be different from ((struct cppc_cpudata *)policy->driver_data)->cpu. While in this case the controls of *ANY* CPU could be used to drive frequency changes, it's more consistent to use policy->cpu as the leading CPU, as used in all other cppc_cpufreq functions. Additionally, the debug prints in cppc_set_perf() would no longer create confusion when referring to a CPU that is hotplugged out. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Tested-by: Mian Yousaf Kaukab Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 7cc9bd8568dee..2700fc71d4e8d 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -150,6 +150,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int relation) { struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + unsigned int cpu = policy->cpu; struct cpufreq_freqs freqs; u32 desired_perf; int ret = 0; @@ -164,12 +165,12 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, freqs.new = target_freq; cpufreq_freq_transition_begin(policy, &freqs); - ret = cppc_set_perf(cpu_data->cpu, &cpu_data->perf_ctrls); + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); cpufreq_freq_transition_end(policy, &freqs, ret != 0); if (ret) pr_debug("Failed to set target on CPU:%d. ret:%d\n", - cpu_data->cpu, ret); + cpu, ret); return ret; } -- GitLab From bf76bb208f2b653306f2fc8f9c2a22f9890702bd Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Mon, 14 Dec 2020 12:38:21 +0000 Subject: [PATCH 0613/1894] cppc_cpufreq: clarify support for coordination types The previous coordination type handling in the cppc_cpufreq init code created some confusion: the comment mentioned "Support only SW_ANY for now" while only the SW_ALL/ALL case resulted in a failure. The other coordination types (HW_ALL/HW, NONE) were silently supported. Clarify support for coordination types while describing in comments the intended behavior. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Tested-by: Mian Yousaf Kaukab Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 2700fc71d4e8d..f15a44c8b6b71 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -244,7 +244,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; struct cppc_perf_caps *caps = &cpu_data->perf_caps; unsigned int cpu = policy->cpu; - int ret = 0; + int i, ret = 0; cpu_data->cpu = cpu; ret = cppc_get_perf_caps(cpu, caps); @@ -281,9 +281,13 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - int i; - + switch (policy->shared_type) { + case CPUFREQ_SHARED_TYPE_HW: + case CPUFREQ_SHARED_TYPE_NONE: + /* Nothing to be done - we'll have a policy for each CPU */ + break; + case CPUFREQ_SHARED_TYPE_ANY: + /* All CPUs in the domain will share a policy */ cpumask_copy(policy->cpus, cpu_data->shared_cpu_map); for_each_cpu(i, policy->cpus) { @@ -293,9 +297,10 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) memcpy(&all_cpu_data[i]->perf_caps, caps, sizeof(cpu_data->perf_caps)); } - } else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) { - /* Support only SW_ANY for now. */ - pr_debug("Unsupported CPU co-ord type\n"); + break; + default: + pr_debug("Unsupported CPU co-ord type: %d\n", + policy->shared_type); return -EFAULT; } -- GitLab From cfdc589f4b5f94bf1a975b4a67d8163d533f6e9b Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Mon, 14 Dec 2020 12:38:22 +0000 Subject: [PATCH 0614/1894] cppc_cpufreq: expose information on frequency domains Use the existing sysfs attribute "freqdomain_cpus" to expose information to userspace about CPUs in the same frequency domain. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Tested-by: Mian Yousaf Kaukab Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-devices-system-cpu | 3 ++- drivers/cpufreq/cppc_cpufreq.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 1a04ca8162ad8..0eee30b27ab60 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -264,7 +264,8 @@ Description: Discover CPUs in the same CPU frequency coordination domain attribute is useful for user space DVFS controllers to get better power/performance results for platforms using acpi-cpufreq. - This file is only present if the acpi-cpufreq driver is in use. + This file is only present if the acpi-cpufreq or the cppc-cpufreq + drivers are in use. What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1} diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index f15a44c8b6b71..40b58d2dbbc6a 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -402,6 +402,19 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) return 0; } +static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) +{ + unsigned int cpu = policy->cpu; + + return cpufreq_show_cpus(all_cpu_data[cpu]->shared_cpu_map, buf); +} +cpufreq_freq_attr_ro(freqdomain_cpus); + +static struct freq_attr *cppc_cpufreq_attr[] = { + &freqdomain_cpus, + NULL, +}; + static struct cpufreq_driver cppc_cpufreq_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = cppc_verify_policy, @@ -410,6 +423,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .init = cppc_cpufreq_cpu_init, .stop_cpu = cppc_cpufreq_stop_cpu, .set_boost = cppc_cpufreq_set_boost, + .attr = cppc_cpufreq_attr, .name = "cppc_cpufreq", }; -- GitLab From a28b2bfc099c6b9caa6ef697660408e076a32019 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Mon, 14 Dec 2020 12:38:23 +0000 Subject: [PATCH 0615/1894] cppc_cpufreq: replace per-cpu data array with a list The cppc_cpudata per-cpu storage was inefficient (1) additional to causing functional issues (2) when CPUs are hotplugged out, due to per-cpu data being improperly initialised. (1) The amount of information needed for CPPC performance control in its cpufreq driver depends on the domain (PSD) coordination type: ANY: One set of CPPC control and capability data (e.g desired performance, highest/lowest performance, etc) applies to all CPUs in the domain. ALL: Same as ANY. To be noted that this type is not currently supported. When supported, information about which CPUs belong to a domain is needed in order for frequency change requests to be sent to each of them. HW: It's necessary to store CPPC control and capability information for all the CPUs. HW will then coordinate the performance state based on their limitations and requests. NONE: Same as HW. No HW coordination is expected. Despite this, the previous initialisation code would indiscriminately allocate memory for all CPUs (all_cpu_data) and unnecessarily duplicate performance capabilities and the domain sharing mask and type for each possible CPU. (2) With the current per-cpu structure, when having ANY coordination, the cppc_cpudata cpu information is not initialised (will remain 0) for all CPUs in a policy, other than policy->cpu. When policy->cpu is hotplugged out, the driver will incorrectly use the uninitialised (0) value of the other CPUs when making frequency changes. Additionally, the previous values stored in the perf_ctrls.desired_perf will be lost when policy->cpu changes. Therefore replace the array of per cpu data with a list. The memory for each structure is allocated at policy init, where a single structure can be allocated per policy, not per cpu. In order to accommodate the struct list_head node in the cppc_cpudata structure, the now unused cpu and cur_policy variables are removed. For example, on a arm64 Juno platform with 6 CPUs: (0, 1, 2, 3) in PSD1, (4, 5) in PSD2 - ANY coordination, the memory allocation comparison shows: Before patch: - ANY coordination: total slack req alloc/free caller 0 0 0 0/1 _kernel_size_le_hi32+0x0xffff800008ff7810 0 0 0 0/6 _kernel_size_le_hi32+0x0xffff800008ff7808 128 80 48 1/0 _kernel_size_le_hi32+0x0xffff800008ffc070 768 0 768 6/0 _kernel_size_le_hi32+0x0xffff800008ffc0e4 After patch: - ANY coordination: total slack req alloc/free caller 256 0 256 2/0 _kernel_size_le_hi32+0x0xffff800008fed410 0 0 0 0/2 _kernel_size_le_hi32+0x0xffff800008fed274 Additional notes: - A pointer to the policy's cppc_cpudata is stored in policy->driver_data - Driver registration is skipped if _CPC entries are not present. Signed-off-by: Ionela Voinescu Tested-by: Mian Yousaf Kaukab Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 141 ++++++++++++-------------- drivers/cpufreq/cppc_cpufreq.c | 174 +++++++++++++++++---------------- include/acpi/cppc_acpi.h | 6 +- 3 files changed, 154 insertions(+), 167 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 9e335f0d2595f..8c1d62e88c465 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -413,109 +413,88 @@ end: return result; } +bool acpi_cpc_valid(void) +{ + struct cpc_desc *cpc_ptr; + int cpu; + + for_each_possible_cpu(cpu) { + cpc_ptr = per_cpu(cpc_desc_ptr, cpu); + if (!cpc_ptr) + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(acpi_cpc_valid); + /** - * acpi_get_psd_map - Map the CPUs in a common freq domain. - * @all_cpu_data: Ptrs to CPU specific CPPC data including PSD info. + * acpi_get_psd_map - Map the CPUs in the freq domain of a given cpu + * @cpu: Find all CPUs that share a domain with cpu. + * @cpu_data: Pointer to CPU specific CPPC data including PSD info. * * Return: 0 for success or negative value for err. */ -int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data) +int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data) { - int count_target; - int retval = 0; - unsigned int i, j; - cpumask_var_t covered_cpus; - struct cppc_cpudata *pr, *match_pr; - struct acpi_psd_package *pdomain; - struct acpi_psd_package *match_pdomain; struct cpc_desc *cpc_ptr, *match_cpc_ptr; - - if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)) - return -ENOMEM; + struct acpi_psd_package *match_pdomain; + struct acpi_psd_package *pdomain; + int count_target, i; /* * Now that we have _PSD data from all CPUs, let's setup P-state * domain info. */ - for_each_possible_cpu(i) { - if (cpumask_test_cpu(i, covered_cpus)) - continue; - - pr = all_cpu_data[i]; - cpc_ptr = per_cpu(cpc_desc_ptr, i); - if (!cpc_ptr) { - retval = -EFAULT; - goto err_ret; - } + cpc_ptr = per_cpu(cpc_desc_ptr, cpu); + if (!cpc_ptr) + return -EFAULT; - pdomain = &(cpc_ptr->domain_info); - cpumask_set_cpu(i, pr->shared_cpu_map); - cpumask_set_cpu(i, covered_cpus); - if (pdomain->num_processors <= 1) - continue; + pdomain = &(cpc_ptr->domain_info); + cpumask_set_cpu(cpu, cpu_data->shared_cpu_map); + if (pdomain->num_processors <= 1) + return 0; - /* Validate the Domain info */ - count_target = pdomain->num_processors; - if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) - pr->shared_type = CPUFREQ_SHARED_TYPE_ALL; - else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) - pr->shared_type = CPUFREQ_SHARED_TYPE_HW; - else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) - pr->shared_type = CPUFREQ_SHARED_TYPE_ANY; - - for_each_possible_cpu(j) { - if (i == j) - continue; - - match_cpc_ptr = per_cpu(cpc_desc_ptr, j); - if (!match_cpc_ptr) { - retval = -EFAULT; - goto err_ret; - } + /* Validate the Domain info */ + count_target = pdomain->num_processors; + if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ALL; + else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_HW; + else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ANY; - match_pdomain = &(match_cpc_ptr->domain_info); - if (match_pdomain->domain != pdomain->domain) - continue; + for_each_possible_cpu(i) { + if (i == cpu) + continue; - /* Here i and j are in the same domain */ - if (match_pdomain->num_processors != count_target) { - retval = -EFAULT; - goto err_ret; - } + match_cpc_ptr = per_cpu(cpc_desc_ptr, i); + if (!match_cpc_ptr) + goto err_fault; - if (pdomain->coord_type != match_pdomain->coord_type) { - retval = -EFAULT; - goto err_ret; - } + match_pdomain = &(match_cpc_ptr->domain_info); + if (match_pdomain->domain != pdomain->domain) + continue; - cpumask_set_cpu(j, covered_cpus); - cpumask_set_cpu(j, pr->shared_cpu_map); - } + /* Here i and cpu are in the same domain */ + if (match_pdomain->num_processors != count_target) + goto err_fault; - for_each_cpu(j, pr->shared_cpu_map) { - if (i == j) - continue; + if (pdomain->coord_type != match_pdomain->coord_type) + goto err_fault; - match_pr = all_cpu_data[j]; - match_pr->shared_type = pr->shared_type; - cpumask_copy(match_pr->shared_cpu_map, - pr->shared_cpu_map); - } + cpumask_set_cpu(i, cpu_data->shared_cpu_map); } - goto out; -err_ret: - for_each_possible_cpu(i) { - pr = all_cpu_data[i]; + return 0; - /* Assume no coordination on any error parsing domain info */ - cpumask_clear(pr->shared_cpu_map); - cpumask_set_cpu(i, pr->shared_cpu_map); - pr->shared_type = CPUFREQ_SHARED_TYPE_NONE; - } -out: - free_cpumask_var(covered_cpus); - return retval; +err_fault: + /* Assume no coordination on any error parsing domain info */ + cpumask_clear(cpu_data->shared_cpu_map); + cpumask_set_cpu(cpu, cpu_data->shared_cpu_map); + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_NONE; + + return -EFAULT; } EXPORT_SYMBOL_GPL(acpi_get_psd_map); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 40b58d2dbbc6a..8a482c434ea6a 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -30,13 +30,13 @@ #define DMI_PROCESSOR_MAX_SPEED 0x14 /* - * These structs contain information parsed from per CPU - * ACPI _CPC structures. - * e.g. For each CPU the highest, lowest supported - * performance capabilities, desired performance level - * requested etc. + * This list contains information parsed from per CPU ACPI _CPC and _PSD + * structures: e.g. the highest and lowest supported performance, capabilities, + * desired performance, level requested etc. Depending on the share_type, not + * all CPUs will have an entry in the list. */ -static struct cppc_cpudata **all_cpu_data; +static LIST_HEAD(cpu_data_list); + static bool boost_supported; struct cppc_workaround_oem_info { @@ -148,8 +148,9 @@ static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) + { - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + struct cppc_cpudata *cpu_data = policy->driver_data; unsigned int cpu = policy->cpu; struct cpufreq_freqs freqs; u32 desired_perf; @@ -183,7 +184,7 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy) static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) { - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + struct cppc_cpudata *cpu_data = policy->driver_data; struct cppc_perf_caps *caps = &cpu_data->perf_caps; unsigned int cpu = policy->cpu; int ret; @@ -194,6 +195,12 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) if (ret) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", caps->lowest_perf, cpu, ret); + + /* Remove CPU node from list and free driver data for policy */ + free_cpumask_var(cpu_data->shared_cpu_map); + list_del(&cpu_data->node); + kfree(policy->driver_data); + policy->driver_data = NULL; } /* @@ -239,25 +246,61 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) } #endif -static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) + +static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) { - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; - struct cppc_perf_caps *caps = &cpu_data->perf_caps; - unsigned int cpu = policy->cpu; - int i, ret = 0; + struct cppc_cpudata *cpu_data; + int ret; + + cpu_data = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL); + if (!cpu_data) + goto out; - cpu_data->cpu = cpu; - ret = cppc_get_perf_caps(cpu, caps); + if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL)) + goto free_cpu; + ret = acpi_get_psd_map(cpu, cpu_data); if (ret) { - pr_debug("Err reading CPU%d perf capabilities. ret:%d\n", - cpu, ret); - return ret; + pr_debug("Err parsing CPU%d PSD data: ret:%d\n", cpu, ret); + goto free_mask; + } + + ret = cppc_get_perf_caps(cpu, &cpu_data->perf_caps); + if (ret) { + pr_debug("Err reading CPU%d perf caps: ret:%d\n", cpu, ret); + goto free_mask; } /* Convert the lowest and nominal freq from MHz to KHz */ - caps->lowest_freq *= 1000; - caps->nominal_freq *= 1000; + cpu_data->perf_caps.lowest_freq *= 1000; + cpu_data->perf_caps.nominal_freq *= 1000; + + list_add(&cpu_data->node, &cpu_data_list); + + return cpu_data; + +free_mask: + free_cpumask_var(cpu_data->shared_cpu_map); +free_cpu: + kfree(cpu_data); +out: + return NULL; +} + +static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct cppc_cpudata *cpu_data; + struct cppc_perf_caps *caps; + int ret; + + cpu_data = cppc_cpufreq_get_cpu_data(cpu); + if (!cpu_data) { + pr_err("Error in acquiring _CPC/_PSD data for CPU%d.\n", cpu); + return -ENODEV; + } + caps = &cpu_data->perf_caps; + policy->driver_data = cpu_data; /* * Set min to lowest nonlinear perf to avoid any efficiency penalty (see @@ -287,16 +330,12 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) /* Nothing to be done - we'll have a policy for each CPU */ break; case CPUFREQ_SHARED_TYPE_ANY: - /* All CPUs in the domain will share a policy */ + /* + * All CPUs in the domain will share a policy and all cpufreq + * operations will use a single cppc_cpudata structure stored + * in policy->driver_data. + */ cpumask_copy(policy->cpus, cpu_data->shared_cpu_map); - - for_each_cpu(i, policy->cpus) { - if (unlikely(i == cpu)) - continue; - - memcpy(&all_cpu_data[i]->perf_caps, caps, - sizeof(cpu_data->perf_caps)); - } break; default: pr_debug("Unsupported CPU co-ord type: %d\n", @@ -304,8 +343,6 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) return -EFAULT; } - cpu_data->cur_policy = policy; - /* * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost * is supported. @@ -360,9 +397,12 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) { struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; - struct cppc_cpudata *cpu_data = all_cpu_data[cpu]; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cppc_cpudata *cpu_data = policy->driver_data; int ret; + cpufreq_cpu_put(policy); + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); if (ret) return ret; @@ -378,7 +418,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) { - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + struct cppc_cpudata *cpu_data = policy->driver_data; struct cppc_perf_caps *caps = &cpu_data->perf_caps; int ret; @@ -404,9 +444,9 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) { - unsigned int cpu = policy->cpu; + struct cppc_cpudata *cpu_data = policy->driver_data; - return cpufreq_show_cpus(all_cpu_data[cpu]->shared_cpu_map, buf); + return cpufreq_show_cpus(cpu_data->shared_cpu_map, buf); } cpufreq_freq_attr_ro(freqdomain_cpus); @@ -435,10 +475,13 @@ static struct cpufreq_driver cppc_cpufreq_driver = { */ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) { - struct cppc_cpudata *cpu_data = all_cpu_data[cpu]; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cppc_cpudata *cpu_data = policy->driver_data; u64 desired_perf; int ret; + cpufreq_cpu_put(policy); + ret = cppc_get_desired_perf(cpu, &desired_perf); if (ret < 0) return -EIO; @@ -471,68 +514,33 @@ static void cppc_check_hisi_workaround(void) static int __init cppc_cpufreq_init(void) { - struct cppc_cpudata *cpu_data; - int i, ret = 0; - - if (acpi_disabled) + if ((acpi_disabled) || !acpi_cpc_valid()) return -ENODEV; - all_cpu_data = kcalloc(num_possible_cpus(), sizeof(void *), - GFP_KERNEL); - if (!all_cpu_data) - return -ENOMEM; - - for_each_possible_cpu(i) { - all_cpu_data[i] = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL); - if (!all_cpu_data[i]) - goto out; - - cpu_data = all_cpu_data[i]; - if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL)) - goto out; - } - - ret = acpi_get_psd_map(all_cpu_data); - if (ret) { - pr_debug("Error parsing PSD data. Aborting cpufreq registration.\n"); - goto out; - } + INIT_LIST_HEAD(&cpu_data_list); cppc_check_hisi_workaround(); - ret = cpufreq_register_driver(&cppc_cpufreq_driver); - if (ret) - goto out; + return cpufreq_register_driver(&cppc_cpufreq_driver); +} - return ret; +static inline void free_cpu_data(void) +{ + struct cppc_cpudata *iter, *tmp; -out: - for_each_possible_cpu(i) { - cpu_data = all_cpu_data[i]; - if (!cpu_data) - break; - free_cpumask_var(cpu_data->shared_cpu_map); - kfree(cpu_data); + list_for_each_entry_safe(iter, tmp, &cpu_data_list, node) { + free_cpumask_var(iter->shared_cpu_map); + list_del(&iter->node); + kfree(iter); } - kfree(all_cpu_data); - return -ENODEV; } static void __exit cppc_cpufreq_exit(void) { - struct cppc_cpudata *cpu_data; - int i; - cpufreq_unregister_driver(&cppc_cpufreq_driver); - for_each_possible_cpu(i) { - cpu_data = all_cpu_data[i]; - free_cpumask_var(cpu_data->shared_cpu_map); - kfree(cpu_data); - } - - kfree(all_cpu_data); + free_cpu_data(); } module_exit(cppc_cpufreq_exit); diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index a6a9373ab8634..232838d28f506 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -124,11 +124,10 @@ struct cppc_perf_fb_ctrs { /* Per CPU container for runtime CPPC management. */ struct cppc_cpudata { - int cpu; + struct list_head node; struct cppc_perf_caps perf_caps; struct cppc_perf_ctrls perf_ctrls; struct cppc_perf_fb_ctrs perf_fb_ctrs; - struct cpufreq_policy *cur_policy; unsigned int shared_type; cpumask_var_t shared_cpu_map; }; @@ -137,7 +136,8 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); -extern int acpi_get_psd_map(struct cppc_cpudata **); +extern bool acpi_cpc_valid(void); +extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); extern unsigned int cppc_get_transition_latency(int cpu); extern bool cpc_ffh_supported(void); extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); -- GitLab From ca6827de4b67367e73fdf43d2ea0a0064423edfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:04:11 +0100 Subject: [PATCH 0616/1894] cpufreq: schedutil: Add util to struct sg_cpu Instead of passing util and max between functions while computing the utilization and capacity, store the former in struct sg_cpu (along with the latter and bw_dl). This will allow the current utilization value to be compared with the one obtained previously (which is requisite for some code changes to follow this one), but also it causes the code to look slightly more consistent and cleaner. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- kernel/sched/cpufreq_schedutil.c | 42 +++++++++++++++----------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 77736058d8e4d..319a270d13c12 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -53,6 +53,7 @@ struct sugov_cpu { unsigned int iowait_boost; u64 last_update; + unsigned long util; unsigned long bw_dl; unsigned long max; @@ -276,16 +277,15 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, return min(max, util); } -static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) +static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); - unsigned long util = cpu_util_cfs(rq); unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->max = max; sg_cpu->bw_dl = cpu_bw_dl(rq); - - return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL); + sg_cpu->util = schedutil_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max, + FREQUENCY_UTIL, NULL); } /** @@ -362,8 +362,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, * sugov_iowait_apply() - Apply the IO boost to a CPU. * @sg_cpu: the sugov data for the cpu to boost * @time: the update time from the caller - * @util: the utilization to (eventually) boost - * @max: the maximum value the utilization can be boosted to * * A CPU running a task which woken up after an IO operation can have its * utilization boosted to speed up the completion of those IO operations. @@ -377,18 +375,17 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, * This mechanism is designed to boost high frequently IO waiting tasks, while * being more conservative on tasks which does sporadic IO operations. */ -static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, - unsigned long util, unsigned long max) +static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { unsigned long boost; /* No boost currently required */ if (!sg_cpu->iowait_boost) - return util; + return; /* Reset boost if the CPU appears to have been idle enough */ if (sugov_iowait_reset(sg_cpu, time, false)) - return util; + return; if (!sg_cpu->iowait_boost_pending) { /* @@ -397,18 +394,19 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, sg_cpu->iowait_boost >>= 1; if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { sg_cpu->iowait_boost = 0; - return util; + return; } } sg_cpu->iowait_boost_pending = false; /* - * @util is already in capacity scale; convert iowait_boost + * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT; - return max(boost, util); + boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + if (sg_cpu->util < boost) + sg_cpu->util = boost; } #ifdef CONFIG_NO_HZ_COMMON @@ -439,9 +437,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; - unsigned long util, max; - unsigned int next_f; unsigned int cached_freq = sg_policy->cached_raw_freq; + unsigned int next_f; sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; @@ -451,10 +448,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; - util = sugov_get_util(sg_cpu); - max = sg_cpu->max; - util = sugov_iowait_apply(sg_cpu, time, util, max); - next_f = get_next_freq(sg_policy, util, max); + sugov_get_util(sg_cpu); + sugov_iowait_apply(sg_cpu, time); + + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -491,9 +488,10 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); unsigned long j_util, j_max; - j_util = sugov_get_util(j_sg_cpu); + sugov_get_util(j_sg_cpu); + sugov_iowait_apply(j_sg_cpu, time); + j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; - j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); if (j_util * max > j_max * util) { util = j_util; -- GitLab From ee2cc4276ba4909438f5894a218877660e1536d9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:08:00 +0100 Subject: [PATCH 0617/1894] cpufreq: Add special-purpose fast-switching callback for drivers First off, some cpufreq drivers (eg. intel_pstate) can pass hints beyond the current target frequency to the hardware and there are no provisions for doing that in the cpufreq framework. In particular, today the driver has to assume that it should not allow the frequency to fall below the one requested by the governor (or the required capacity may not be provided) which may not be the case and which may lead to excessive energy usage in some scenarios. Second, the hints passed by these drivers to the hardware need not be in terms of the frequency, so representing the utilization numbers coming from the scheduler as frequency before passing them to those drivers is not really useful. Address the two points above by adding a special-purpose replacement for the ->fast_switch callback, called ->adjust_perf, allowing the governor to pass abstract performance level (rather than frequency) values for the minimum (required) and target (desired) performance along with the CPU capacity to compare them to. Also update the schedutil governor to use the new callback instead of ->fast_switch if present and if the utilization mertics are frequency-invariant (that is requisite for the direct mapping between the utilization and the CPU performance levels to be a reasonable approximation). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 40 +++++++++++++++++++ include/linux/cpufreq.h | 14 +++++++ include/linux/sched/cpufreq.h | 5 +++ kernel/sched/cpufreq_schedutil.c | 68 +++++++++++++++++++++++++++----- 4 files changed, 117 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c17aa2973c443..d0a3525ce27f8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2097,6 +2097,46 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch); +/** + * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go. + * @cpu: Target CPU. + * @min_perf: Minimum (required) performance level (units of @capacity). + * @target_perf: Terget (desired) performance level (units of @capacity). + * @capacity: Capacity of the target CPU. + * + * Carry out a fast performance level switch of @cpu without sleeping. + * + * The driver's ->adjust_perf() callback invoked by this function must be + * suitable for being called from within RCU-sched read-side critical sections + * and it is expected to select a suitable performance level equal to or above + * @min_perf and preferably equal to or below @target_perf. + * + * This function must not be called if policy->fast_switch_enabled is unset. + * + * Governors calling this function must guarantee that it will never be invoked + * twice in parallel for the same CPU and that it will never be called in + * parallel with either ->target() or ->target_index() or ->fast_switch() for + * the same CPU. + */ +void cpufreq_driver_adjust_perf(unsigned int cpu, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity) +{ + cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity); +} + +/** + * cpufreq_driver_has_adjust_perf - Check "direct fast switch" callback. + * + * Return 'true' if the ->adjust_perf callback is present for the + * current driver or 'false' otherwise. + */ +bool cpufreq_driver_has_adjust_perf(void) +{ + return !!cpufreq_driver->adjust_perf; +} + /* Must set freqs->new to intermediate frequency */ static int __target_intermediate(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, int index) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 584fccd4fcabb..9c8b7437b6cd3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -320,6 +320,15 @@ struct cpufreq_driver { unsigned int index); unsigned int (*fast_switch)(struct cpufreq_policy *policy, unsigned int target_freq); + /* + * ->fast_switch() replacement for drivers that use an internal + * representation of performance levels and can pass hints other than + * the target performance level to the hardware. + */ + void (*adjust_perf)(unsigned int cpu, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity); /* * Caches and returns the lowest driver-supported frequency greater than @@ -588,6 +597,11 @@ struct cpufreq_governor { /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); +void cpufreq_driver_adjust_perf(unsigned int cpu, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity); +bool cpufreq_driver_has_adjust_perf(void); int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 3ed5aa18593f2..6205578ab6ee6 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -28,6 +28,11 @@ static inline unsigned long map_util_freq(unsigned long util, { return (freq + (freq >> 2)) * util / cap; } + +static inline unsigned long map_util_perf(unsigned long util) +{ + return util + (util >> 2); +} #endif /* CONFIG_CPU_FREQ */ #endif /* _LINUX_SCHED_CPUFREQ_H */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 319a270d13c12..803bcb30db27b 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -432,13 +432,10 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_p sg_policy->limits_changed = true; } -static void sugov_update_single(struct update_util_data *hook, u64 time, - unsigned int flags) +static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, + u64 time, unsigned int flags) { - struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; - unsigned int cached_freq = sg_policy->cached_raw_freq; - unsigned int next_f; sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; @@ -446,11 +443,25 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, ignore_dl_rate_limit(sg_cpu, sg_policy); if (!sugov_should_update_freq(sg_policy, time)) - return; + return false; sugov_get_util(sg_cpu); sugov_iowait_apply(sg_cpu, time); + return true; +} + +static void sugov_update_single_freq(struct update_util_data *hook, u64 time, + unsigned int flags) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned int cached_freq = sg_policy->cached_raw_freq; + unsigned int next_f; + + if (!sugov_update_single_common(sg_cpu, time, flags)) + return; + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); /* * Do not reduce the frequency if the CPU has not been idle @@ -477,6 +488,38 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, } } +static void sugov_update_single_perf(struct update_util_data *hook, u64 time, + unsigned int flags) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + unsigned long prev_util = sg_cpu->util; + + /* + * Fall back to the "frequency" path if frequency invariance is not + * supported, because the direct mapping between the utilization and + * the performance levels depends on the frequency invariance. + */ + if (!arch_scale_freq_invariant()) { + sugov_update_single_freq(hook, time, flags); + return; + } + + if (!sugov_update_single_common(sg_cpu, time, flags)) + return; + + /* + * Do not reduce the target performance level if the CPU has not been + * idle recently, as the reduction is likely to be premature then. + */ + if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) + sg_cpu->util = prev_util; + + cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), + map_util_perf(sg_cpu->util), sg_cpu->max); + + sg_cpu->sg_policy->last_freq_update_time = time; +} + static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; @@ -815,6 +858,7 @@ static void sugov_exit(struct cpufreq_policy *policy) static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; + void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); unsigned int cpu; sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; @@ -834,13 +878,17 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->sg_policy = sg_policy; } + if (policy_is_shared(policy)) + uu = sugov_update_shared; + else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf()) + uu = sugov_update_single_perf; + else + uu = sugov_update_single_freq; + for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - policy_is_shared(policy) ? - sugov_update_shared : - sugov_update_single); + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu); } return 0; } -- GitLab From a365ab6b9dfbaf8fb4fb4cd5d8a4c55dc4fb8b1c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:09:26 +0100 Subject: [PATCH 0618/1894] cpufreq: intel_pstate: Implement the ->adjust_perf() callback Make intel_pstate expose the ->adjust_perf() callback when it operates in the passive mode with HWP enabled which causes the schedutil governor to use that callback instead of ->fast_switch(). The minimum and target performance-level values passed by the governor to ->adjust_perf() are converted to HWP.REQ.MIN and HWP.REQ.DESIRED, respectively, which allows the processor to adjust its configuration to maximize energy-efficiency while providing sufficient capacity. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada Acked-by: Viresh Kumar --- drivers/cpufreq/intel_pstate.c | 70 ++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2a4db856222fb..d5ec0c962ec52 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2526,20 +2526,19 @@ static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, in fp_toint(cpu->iowait_boost * 100)); } -static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 target_pstate, - bool strict, bool fast_switch) +static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 min, u32 max, + u32 desired, bool fast_switch) { u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev; value &= ~HWP_MIN_PERF(~0L); - value |= HWP_MIN_PERF(target_pstate); + value |= HWP_MIN_PERF(min); - /* - * The entire MSR needs to be updated in order to update the HWP min - * field in it, so opportunistically update the max too if needed. - */ value &= ~HWP_MAX_PERF(~0L); - value |= HWP_MAX_PERF(strict ? target_pstate : cpu->max_perf_ratio); + value |= HWP_MAX_PERF(max); + + value &= ~HWP_DESIRED_PERF(~0L); + value |= HWP_DESIRED_PERF(desired); if (value == prev) return; @@ -2569,11 +2568,15 @@ static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy, int old_pstate = cpu->pstate.current_pstate; target_pstate = intel_pstate_prepare_request(cpu, target_pstate); - if (hwp_active) - intel_cpufreq_adjust_hwp(cpu, target_pstate, - policy->strict_target, fast_switch); - else if (target_pstate != old_pstate) + if (hwp_active) { + int max_pstate = policy->strict_target ? + target_pstate : cpu->max_perf_ratio; + + intel_cpufreq_adjust_hwp(cpu, target_pstate, max_pstate, 0, + fast_switch); + } else if (target_pstate != old_pstate) { intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch); + } cpu->pstate.current_pstate = target_pstate; @@ -2634,6 +2637,47 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, return target_pstate * cpu->pstate.scaling; } +static void intel_cpufreq_adjust_perf(unsigned int cpunum, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity) +{ + struct cpudata *cpu = all_cpu_data[cpunum]; + int old_pstate = cpu->pstate.current_pstate; + int cap_pstate, min_pstate, max_pstate, target_pstate; + + update_turbo_state(); + cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate : + cpu->pstate.turbo_pstate; + + /* Optimization: Avoid unnecessary divisions. */ + + target_pstate = cap_pstate; + if (target_perf < capacity) + target_pstate = DIV_ROUND_UP(cap_pstate * target_perf, capacity); + + min_pstate = cap_pstate; + if (min_perf < capacity) + min_pstate = DIV_ROUND_UP(cap_pstate * min_perf, capacity); + + if (min_pstate < cpu->pstate.min_pstate) + min_pstate = cpu->pstate.min_pstate; + + if (min_pstate < cpu->min_perf_ratio) + min_pstate = cpu->min_perf_ratio; + + max_pstate = min(cap_pstate, cpu->max_perf_ratio); + if (max_pstate < min_pstate) + max_pstate = min_pstate; + + target_pstate = clamp_t(int, target_pstate, min_pstate, max_pstate); + + intel_cpufreq_adjust_hwp(cpu, min_pstate, max_pstate, target_pstate, true); + + cpu->pstate.current_pstate = target_pstate; + intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate); +} + static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) { int max_state, turbo_max, min_freq, max_freq, ret; @@ -3032,6 +3076,8 @@ static int __init intel_pstate_init(void) intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS; + intel_cpufreq.fast_switch = NULL; + intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf; if (!default_driver) default_driver = &intel_pstate; -- GitLab From b08221c40febcbda9309dd70c61cf1b0ebb0e351 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 11 Dec 2020 10:18:14 +0800 Subject: [PATCH 0619/1894] ACPI: PNP: compare the string length in the matching_id() Recently we met a touchscreen problem on some Thinkpad machines, the touchscreen driver (i2c-hid) is not loaded and the touchscreen can't work. An i2c ACPI device with the name WACF2200 is defined in the BIOS, with the current rule in matching_id(), this device will be regarded as a PNP device since there is WACFXXX in the acpi_pnp_device_ids[] and this PNP device is attached to the acpi device as the 1st physical_node, this will make the i2c bus match fail when i2c bus calls acpi_companion_match() to match the acpi_id_table in the i2c-hid driver. WACF2200 is an i2c device instead of a PNP device, after adding the string length comparing, the matching_id() will return false when matching WACF2200 and WACFXXX, and it is reasonable to compare the string length when matching two IDs. Suggested-by: Rafael J. Wysocki Signed-off-by: Hui Wang Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_pnp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c index 4ed755a963aa5..8f2dc176bb412 100644 --- a/drivers/acpi/acpi_pnp.c +++ b/drivers/acpi/acpi_pnp.c @@ -319,6 +319,9 @@ static bool matching_id(const char *idstr, const char *list_id) { int i; + if (strlen(idstr) != strlen(list_id)) + return false; + if (memcmp(idstr, list_id, 3)) return false; -- GitLab From b784c77075023e1a71bc06e6b4f711acb99e9c73 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 15 Dec 2020 13:24:59 +0100 Subject: [PATCH 0620/1894] coccinnelle: Remove ptr_ret script The ptr_ret script script addresses a number of situations where we end up testing an error pointer, and if it's an error returning it, or return 0 otherwise to transform it into a PTR_ERR_OR_ZERO call. So it will convert a block like this: if (IS_ERR(err)) return PTR_ERR(err); return 0; into return PTR_ERR_OR_ZERO(err); While this is technically correct, it has a number of drawbacks. First, it merges the error and success path, which will make it harder for a reviewer or reader to grasp. It's also more difficult to extend if we were to add some code between the error check and the function return, making the author essentially revert that patch before adding new lines, while it would have been a trivial addition otherwise for the rewiever. Therefore, since that script is only about cosmetic in the first place, let's remove it since it's not worth it. Acked-by: Jani Nikula Acked-by: Thierry Reding Acked-by: Julia Lawall Reviewed-by: Wolfram Sang Reviewed-by: Mark Brown Signed-off-by: Maxime Ripard Signed-off-by: Julia Lawall --- scripts/coccinelle/api/ptr_ret.cocci | 97 ---------------------------- 1 file changed, 97 deletions(-) delete mode 100644 scripts/coccinelle/api/ptr_ret.cocci diff --git a/scripts/coccinelle/api/ptr_ret.cocci b/scripts/coccinelle/api/ptr_ret.cocci deleted file mode 100644 index e76cd5d90a8a9..0000000000000 --- a/scripts/coccinelle/api/ptr_ret.cocci +++ /dev/null @@ -1,97 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/// -/// Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR -/// -// Confidence: High -// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. -// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. -// URL: http://coccinelle.lip6.fr/ -// Options: --no-includes --include-headers -// -// Keywords: ERR_PTR, PTR_ERR, PTR_ERR_OR_ZERO -// Version min: 2.6.39 -// - -virtual context -virtual patch -virtual org -virtual report - -@depends on patch@ -expression ptr; -@@ - -- if (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0; -+ return PTR_ERR_OR_ZERO(ptr); - -@depends on patch@ -expression ptr; -@@ - -- if (IS_ERR(ptr)) return PTR_ERR(ptr); return 0; -+ return PTR_ERR_OR_ZERO(ptr); - -@depends on patch@ -expression ptr; -@@ - -- (IS_ERR(ptr) ? PTR_ERR(ptr) : 0) -+ PTR_ERR_OR_ZERO(ptr) - -@r1 depends on !patch@ -expression ptr; -position p1; -@@ - -* if@p1 (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0; - -@r2 depends on !patch@ -expression ptr; -position p2; -@@ - -* if@p2 (IS_ERR(ptr)) return PTR_ERR(ptr); return 0; - -@r3 depends on !patch@ -expression ptr; -position p3; -@@ - -* IS_ERR@p3(ptr) ? PTR_ERR(ptr) : 0 - -@script:python depends on org@ -p << r1.p1; -@@ - -coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - - -@script:python depends on org@ -p << r2.p2; -@@ - -coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on org@ -p << r3.p3; -@@ - -coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on report@ -p << r1.p1; -@@ - -coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on report@ -p << r2.p2; -@@ - -coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on report@ -p << r3.p3; -@@ - -coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") -- GitLab From 57f04815fd95bb8c46f6ec5c9d25430bb52d419f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Dec 2020 09:40:28 -0800 Subject: [PATCH 0621/1894] drm/msm: Fix WARN_ON() splat in _free_object() [ 192.062000] ------------[ cut here ]------------ [ 192.062498] WARNING: CPU: 3 PID: 2039 at drivers/gpu/drm/msm/msm_gem.c:381 put_iova_vmas+0x94/0xa0 [msm] [ 192.062870] Modules linked in: snd_hrtimer snd_seq snd_seq_device rfcomm algif_hash algif_skcipher af_alg bnep xt_CHECKSUM nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter xt_tcpudp nft_compat cpufreq_powersave cpufreq_conservative q6asm_dai q6routing q6afe_dai q6adm bridge q6afe q6asm q6dsp_common q6core stp llc nf_tables libcrc32c nfnetlink snd_soc_wsa881x regmap_sdw soundwire_qcom gpio_wcd934x snd_soc_wcd934x wcd934x regmap_slimbus venus_enc venus_dec apr videobuf2_dma_sg qrtr_smd uvcvideo videobuf2_vmalloc videobuf2_memops ath10k_snoc ath10k_core hci_uart btqca btbcm mac80211 bluetooth snd_soc_sdm845 ath snd_soc_rt5663 snd_soc_qcom_common snd_soc_rl6231 soundwire_bus ecdh_generic ecc qcom_spmi_adc5 venus_core qcom_pon qcom_spmi_temp_alarm qcom_vadc_common v4l2_mem2mem videobuf2_v4l2 cfg80211 videobuf2_common hid_multitouch reset_qcom_pdc qcrypto qcom_rng rfkill qcom_q6v5_mss libarc4 libdes qrtr ns qcom_wdt socinfo slim_qcom_ngd_ctrl [ 192.065739] pdr_interface qcom_q6v5_pas slimbus qcom_pil_info qcom_q6v5 qcom_sysmon qcom_common qcom_glink_smem qmi_helpers rmtfs_mem tcp_bbr sch_fq fuse ip_tables x_tables ipv6 crc_ccitt ti_sn65dsi86 i2c_hid msm mdt_loader llcc_qcom rtc_pm8xxx ocmem drm_kms_helper crct10dif_ce phy_qcom_qusb2 i2c_qcom_geni panel_simple drm pwm_bl [ 192.066066] CPU: 3 PID: 2039 Comm: gnome-shell Tainted: G W 5.10.0-rc7-next-20201208 #1 [ 192.066068] Hardware name: LENOVO 81JL/LNVNB161216, BIOS 9UCN33WW(V2.06) 06/ 4/2019 [ 192.066072] pstate: 40400005 (nZcv daif +PAN -UAO -TCO BTYPE=--) [ 192.066099] pc : put_iova_vmas+0x94/0xa0 [msm] [ 192.066262] lr : put_iova_vmas+0x1c/0xa0 [msm] [ 192.066403] sp : ffff800019efbbb0 [ 192.066405] x29: ffff800019efbbb0 x28: ffff800019efbd88 [ 192.066411] x27: 0000000000000000 x26: ffff109582efa400 [ 192.066417] x25: 0000000000000009 x24: 000000000000012b [ 192.066422] x23: ffff109582efa438 x22: ffff109582efa450 [ 192.066427] x21: ffff109582efa528 x20: ffff1095cbd4f200 [ 192.066432] x19: ffff1095cbd4f200 x18: 0000000000000000 [ 192.066438] x17: 0000000000000000 x16: ffffc26c200ca750 [ 192.066727] x15: 0000000000000000 x14: 0000000000000000 [ 192.066741] x13: ffff1096fb8c9100 x12: 0000000000000002 [ 192.066754] x11: ffffffffffffffff x10: 0000000000000002 [ 192.067046] x9 : 0000000000000001 x8 : 0000000000000a36 [ 192.067060] x7 : ffff4e2ad9f11000 x6 : ffffc26c216d4000 [ 192.067212] x5 : ffffc26c2022661c x4 : ffff1095c2b98000 [ 192.067367] x3 : ffff1095cbd4f300 x2 : 0000000000000000 [ 192.067380] x1 : ffff1095c2b98000 x0 : 0000000000000000 [ 192.067667] Call trace: [ 192.067734] put_iova_vmas+0x94/0xa0 [msm] [ 192.068078] msm_gem_free_object+0xb4/0x110 [msm] [ 192.068399] drm_gem_object_free+0x1c/0x30 [drm] [ 192.068717] drm_gem_object_handle_put_unlocked+0xf0/0xf8 [drm] [ 192.069032] drm_gem_object_release_handle+0x6c/0x88 [drm] [ 192.069349] drm_gem_handle_delete+0x68/0xc0 [drm] [ 192.069666] drm_gem_close_ioctl+0x30/0x48 [drm] [ 192.069984] drm_ioctl_kernel+0xc0/0x110 [drm] [ 192.070303] drm_ioctl+0x210/0x440 [drm] [ 192.070588] __arm64_sys_ioctl+0xa8/0xf0 [ 192.070599] el0_svc_common.constprop.0+0x74/0x190 [ 192.070608] do_el0_svc+0x24/0x90 [ 192.070618] el0_svc+0x14/0x20 [ 192.070903] el0_sync_handler+0xb0/0xb8 [ 192.070911] el0_sync+0x174/0x180 [ 192.070918] ---[ end trace bee6b12a899001a3 ]--- [ 192.072140] ------------[ cut here ]------------ Fixes: 9b73bde39cf2 ("drm/msm: Fix use-after-free in msm_gem with carveout") Signed-off-by: Rob Clark Acked-by: Iskren Chernev --- drivers/gpu/drm/msm/msm_gem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 68a6c7eacc0a7..a21be5b910ff6 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -990,6 +990,8 @@ void msm_gem_free_object(struct drm_gem_object *obj) if (msm_obj->pages) kvfree(msm_obj->pages); + put_iova_vmas(obj); + /* dma_buf_detach() grabs resv lock, so we need to unlock * prior to drm_prime_gem_destroy */ @@ -999,11 +1001,10 @@ void msm_gem_free_object(struct drm_gem_object *obj) } else { msm_gem_vunmap(obj); put_pages(obj); + put_iova_vmas(obj); msm_gem_unlock(obj); } - put_iova_vmas(obj); - drm_gem_object_release(obj); kfree(msm_obj); -- GitLab From f70cc33029fca3cf62bffb15102ea42eb4d097ac Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 6 Dec 2020 00:14:48 +0100 Subject: [PATCH 0622/1894] rtc: fix RTC removal Since the rtc_register_device, removing an RTC device will end with a refcount_t: underflow; use-after-free warning since put_device is called twice in the device tear down path. Fixes: fdcfd854333b ("rtc: rework rtc_register_device() resource management") Signed-off-by: Alexandre Belloni Reviewed-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20201205231449.610980-1-alexandre.belloni@bootlin.com --- drivers/rtc/class.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index e6b44b7c4ad3a..5c6748dfa55df 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -335,7 +335,6 @@ static void devm_rtc_unregister_device(void *data) cdev_device_del(&rtc->char_dev, &rtc->dev); rtc->ops = NULL; mutex_unlock(&rtc->ops_lock); - put_device(&rtc->dev); } static void devm_rtc_release_device(void *res) -- GitLab From a00a3f29b2a6572108572359558c441da51dad70 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 26 Nov 2020 11:00:43 +0800 Subject: [PATCH 0623/1894] dt-bindings: fu740: prci: add YAML documentation for the FU740 PRCI Add YAML DT binding documentation for the SiFive FU740 PRCI. The link of unmatched board as follow, the U740-C000 manual would be present in the same page later. https://www.sifive.com/boards/hifive-unmatched Passes dt_binding_check. Signed-off-by: Zong Li Link: https://lore.kernel.org/r/20201126030043.67390-1-zong.li@sifive.com Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../bindings/clock/sifive/fu740-prci.yaml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml diff --git a/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml b/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml new file mode 100644 index 0000000000000..e17143cac316f --- /dev/null +++ b/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 SiFive, Inc. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/sifive/fu740-prci.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SiFive FU740 Power Reset Clock Interrupt Controller (PRCI) + +maintainers: + - Zong Li + - Paul Walmsley + +description: + On the FU740 family of SoCs, most system-wide clock and reset integration + is via the PRCI IP block. + The clock consumer should specify the desired clock via the clock ID + macros defined in include/dt-bindings/clock/sifive-fu740-prci.h. + These macros begin with PRCI_CLK_. + + The hfclk and rtcclk nodes are required, and represent physical + crystals or resonators located on the PCB. These nodes should be present + underneath /, rather than /soc. + +properties: + compatible: + const: sifive,fu740-c000-prci + + reg: + maxItems: 1 + + clocks: + items: + - description: high frequency clock. + - description: RTL clock. + + clock-names: + items: + - const: hfclk + - const: rtcclk + + "#clock-cells": + const: 1 + +required: + - compatible + - reg + - clocks + - "#clock-cells" + +additionalProperties: false + +examples: + - | + prci: clock-controller@10000000 { + compatible = "sifive,fu740-c000-prci"; + reg = <0x10000000 0x1000>; + clocks = <&hfclk>, <&rtcclk>; + #clock-cells = <1>; + }; -- GitLab From 5ae96d779ff3eeb2977919ff311a6c8849943c2d Mon Sep 17 00:00:00 2001 From: Enrico Weigelt Date: Tue, 15 Dec 2020 17:35:31 +0100 Subject: [PATCH 0624/1894] libnvdimm: Cleanup include of badblocks.h * drivers/nvdimm/core.c doesn't use anything from badblocks.h on its own, thus including it isn't needed. There's indeed indirect use, via funcs in nd.h, but this one already includes badblocks.h. * drivers/nvdimm/claim.c calls stuff from badblocks.h and therefore should include it on its own (instead of relying any other header doing that) * drivers/nvdimm/btt.h doesn't really need anything from badblocks.h and can easily live with a forward declaration of struct badblocks (just having pointers to it, but not dereferencing it anywhere) Signed-off-by: Enrico Weigelt Link: https://lore.kernel.org/r/20201215163531.21446-1-info@metux.net Signed-off-by: Dan Williams --- drivers/nvdimm/btt.h | 3 ++- drivers/nvdimm/claim.c | 1 + drivers/nvdimm/core.c | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 2e258bee7db24..aa53e0b769bd3 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -7,7 +7,6 @@ #ifndef _LINUX_BTT_H #define _LINUX_BTT_H -#include #include #define BTT_SIG_LEN 16 @@ -197,6 +196,8 @@ struct arena_info { int log_index[2]; }; +struct badblocks; + /** * struct btt - handle for a BTT instance * @btt_disk: Pointer to the gendisk for BTT device diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 5a7c80053c624..030dbde6b0882 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -4,6 +4,7 @@ */ #include #include +#include #include "nd-core.h" #include "pmem.h" #include "pfn.h" diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index c21ba0602029c..7de592d7eff45 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -3,7 +3,6 @@ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ #include -#include #include #include #include -- GitLab From 4e6a7b3bbd5a6f9e6f0c5c3ad976ed116c7ade79 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Mon, 14 Dec 2020 21:45:06 +0800 Subject: [PATCH 0625/1894] device-dax/pmem: Convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Zheng Yongjun Link: https://lore.kernel.org/r/20201214134506.4831-1-zhengyongjun3@huawei.com Signed-off-by: Dan Williams --- drivers/dax/pmem/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c index 62b26bfceab12..062e8bc14223c 100644 --- a/drivers/dax/pmem/core.c +++ b/drivers/dax/pmem/core.c @@ -52,7 +52,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) /* adjust the dax_region range to the start of data */ range = pgmap.range; - range.start += offset, + range.start += offset; dax_region = alloc_dax_region(dev, region_id, &range, nd_region->target_node, le32_to_cpu(pfn_sb->align), IORESOURCE_DAX_STATIC); -- GitLab From 1aa574312518ef1d60d2dc62d58f7021db3b163a Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 1 Dec 2020 21:59:29 +0800 Subject: [PATCH 0626/1894] device-dax/core: Fix memory leak when rmmod dax.ko When I repeatedly modprobe and rmmod dax.ko, kmemleak report a memory leak as follows: unreferenced object 0xffff9a5588c05088 (size 8): comm "modprobe", pid 261, jiffies 4294693644 (age 42.063s) ... backtrace: [<00000000e007ced0>] kstrdup+0x35/0x70 [<000000002ae73897>] kstrdup_const+0x3d/0x50 [<000000002b00c9c3>] kvasprintf_const+0xbc/0xf0 [<000000008023282f>] kobject_set_name_vargs+0x3b/0xd0 [<00000000d2cbaa4e>] kobject_set_name+0x62/0x90 [<00000000202e7a22>] bus_register+0x7f/0x2b0 [<000000000b77792c>] 0xffffffffc02840f7 [<000000002d5be5ac>] 0xffffffffc02840b4 [<00000000dcafb7cd>] do_one_initcall+0x58/0x240 [<00000000049fe480>] do_init_module+0x56/0x1e2 [<0000000022671491>] load_module+0x2517/0x2840 [<000000001a2201cb>] __do_sys_finit_module+0x9c/0xe0 [<000000003eb304e7>] do_syscall_64+0x33/0x40 [<0000000051c5fd06>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 When rmmod dax is executed, dax_bus_exit() is missing. This patch can fix this bug. Fixes: 9567da0b408a ("device-dax: Introduce bus + driver model") Cc: Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201201135929.66530-1-wanghai38@huawei.com Signed-off-by: Dan Williams --- drivers/dax/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index edc279be3e596..cadbd0a1a1ef0 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -752,6 +752,7 @@ err_chrdev: static void __exit dax_core_exit(void) { + dax_bus_exit(); unregister_chrdev_region(dax_devt, MINORMASK+1); ida_destroy(&dax_minor_ida); dax_fs_exit(); -- GitLab From 161b838e25c6f83495e27e3f546b893622d442bf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 23:40:15 +0000 Subject: [PATCH 0627/1894] netfilter: nftables: fix incorrect increment of loop counter The intention of the err_expr cleanup path is to iterate over the allocated expr_array objects and free them, starting from i - 1 and working down to the start of the array. Currently the loop counter is being incremented instead of decremented and also the index i is being used instead of k, repeatedly destroying the same expr_array element. Fix this by decrementing k and using k as the index into expr_array. Addresses-Coverity: ("Infinite loop") Fixes: 8cfd9b0f8515 ("netfilter: nftables: generalize set expressions support") Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8d5aa0ac45f4d..4186b1e52d584 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5254,8 +5254,8 @@ static int nft_set_elem_expr_clone(const struct nft_ctx *ctx, return 0; err_expr: - for (k = i - 1; k >= 0; k++) - nft_expr_destroy(ctx, expr_array[i]); + for (k = i - 1; k >= 0; k--) + nft_expr_destroy(ctx, expr_array[k]); return -ENOMEM; } -- GitLab From f12ad423c4af877b2e4b5a80928b95195fccab04 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:12:54 +0100 Subject: [PATCH 0628/1894] tick: Remove pointless cpu valid check in hotplug code tick_handover_do_timer() which is invoked when a CPU is unplugged has a check for cpumask_first(cpu_online_mask) when it tries to hand over the tick update duty. Checking the result of cpumask_first() there is pointless because if the online mask is empty at this point, then this would be the last CPU in the system going offline, which is impossible. There is always at least one CPU remaining. If online mask would be really empty then the timer duty would be the least of the resulting problems. Remove the well meant check simply because it is pointless and confusing. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201206212002.582579516@linutronix.de --- kernel/time/tick-common.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a03764df5366e..9d3a22510babb 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -407,17 +407,13 @@ EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); /* * Transfer the do_timer job away from a dying cpu. * - * Called with interrupts disabled. Not locking required. If + * Called with interrupts disabled. No locking required. If * tick_do_timer_cpu is owned by this cpu, nothing can change it. */ void tick_handover_do_timer(void) { - if (tick_do_timer_cpu == smp_processor_id()) { - int cpu = cpumask_first(cpu_online_mask); - - tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : - TICK_DO_TIMER_NONE; - } + if (tick_do_timer_cpu == smp_processor_id()) + tick_do_timer_cpu = cpumask_first(cpu_online_mask); } /* -- GitLab From ba8ea8e7dd6e1662e34e730eadfc52aa6816f9dd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:12:55 +0100 Subject: [PATCH 0629/1894] tick/sched: Remove bogus boot "safety" check can_stop_idle_tick() checks whether the do_timer() duty has been taken over by a CPU on boot. That's silly because the boot CPU always takes over with the initial clockevent device. But even if no CPU would have installed a clockevent and taken over the duty then the question whether the tick on the current CPU can be stopped or not is moot. In that case the current CPU would have no clockevent either, so there would be nothing to keep ticking. Remove it. Signed-off-by: Thomas Gleixner Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201206212002.725238293@linutronix.de --- kernel/time/tick-sched.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a9e68936822d4..5fbc748f00580 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -991,13 +991,6 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) */ if (tick_do_timer_cpu == cpu) return false; - /* - * Boot safety: make sure the timekeeping duty has been - * assigned before entering dyntick-idle mode, - * tick_do_timer_cpu is TICK_DO_TIMER_BOOT - */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT)) - return false; /* Should not happen for nohz-full */ if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) -- GitLab From 44f6a7c0755d8dd453c70557e11687bb080a6f21 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 14 Dec 2020 16:04:20 -0600 Subject: [PATCH 0630/1894] objtool: Fix seg fault with Clang non-section symbols The Clang assembler likes to strip section symbols, which means objtool can't reference some text code by its section. This confuses objtool greatly, causing it to seg fault. The fix is similar to what was done before, for ORC reloc generation: e81e07244325 ("objtool: Support Clang non-section symbols in ORC generation") Factor out that code into a common helper and use it for static call reloc generation as well. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nick Desaulniers Reviewed-by: Miroslav Benes Link: https://github.com/ClangBuiltLinux/linux/issues/1207 Link: https://lkml.kernel.org/r/ba6b6c0f0dd5acbba66e403955a967d9fdd1726a.1607983452.git.jpoimboe@redhat.com --- tools/objtool/check.c | 11 +++++++++-- tools/objtool/elf.c | 26 ++++++++++++++++++++++++++ tools/objtool/elf.h | 2 ++ tools/objtool/orc_gen.c | 29 +++++------------------------ 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c6ab44543c92a..5f8d3eed78a18 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -467,13 +467,20 @@ static int create_static_call_sections(struct objtool_file *file) /* populate reloc for 'addr' */ reloc = malloc(sizeof(*reloc)); + if (!reloc) { perror("malloc"); return -1; } memset(reloc, 0, sizeof(*reloc)); - reloc->sym = insn->sec->sym; - reloc->addend = insn->offset; + + insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); + if (!reloc->sym) { + WARN_FUNC("static call tramp: missing containing symbol", + insn->sec, insn->offset); + return -1; + } + reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(struct static_call_site); reloc->sec = reloc_sec; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4e1d7460574b4..be89c741ba9a0 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -262,6 +262,32 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns return find_reloc_by_dest_range(elf, sec, offset, 1); } +void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc) +{ + if (sec->sym) { + reloc->sym = sec->sym; + reloc->addend = offset; + return; + } + + /* + * The Clang assembler strips section symbols, so we have to reference + * the function symbol instead: + */ + reloc->sym = find_symbol_containing(sec, offset); + if (!reloc->sym) { + /* + * Hack alert. This happens when we need to reference the NOP + * pad insn immediately after the function. + */ + reloc->sym = find_symbol_containing(sec, offset - 1); + } + + if (reloc->sym) + reloc->addend = offset - reloc->sym->offset; +} + static int read_sections(struct elf *elf) { Elf_Scn *s = NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 807f8c6700974..e6890cc70a25b 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -140,6 +140,8 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); struct symbol *find_func_containing(struct section *sec, unsigned long offset); +void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc); int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); #define for_each_sec(file, sec) \ diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 235663b96adc7..9ce68b385a1b8 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -105,30 +105,11 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti } memset(reloc, 0, sizeof(*reloc)); - if (insn_sec->sym) { - reloc->sym = insn_sec->sym; - reloc->addend = insn_off; - } else { - /* - * The Clang assembler doesn't produce section symbols, so we - * have to reference the function symbol instead: - */ - reloc->sym = find_symbol_containing(insn_sec, insn_off); - if (!reloc->sym) { - /* - * Hack alert. This happens when we need to reference - * the NOP pad insn immediately after the function. - */ - reloc->sym = find_symbol_containing(insn_sec, - insn_off - 1); - } - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx\n", - insn_off); - return -1; - } - - reloc->addend = insn_off - reloc->sym->offset; + insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); + if (!reloc->sym) { + WARN("missing symbol for insn at offset 0x%lx", + insn_off); + return -1; } reloc->type = R_X86_64_PC32; -- GitLab From 4ad2d3cf2a299645bdc6d72e5b8ee11b2ed147ac Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:28:59 +0000 Subject: [PATCH 0631/1894] ASoC: codecs: fix spelling mistake in Kconfig "comunicate" -> "communicate" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201216112859.11564-1-colin.king@canonical.com Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 5e4e681127912..ac63e7c176c1a 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -458,7 +458,7 @@ config SND_SOC_ADAU7118_HW help Enable support for the Analog Devices ADAU7118 8 Channel PDM-to-I2S/TDM Converter. In this mode, the device works in standalone mode which - means that there is no bus to comunicate with it. Stereo mode is not + means that there is no bus to communicate with it. Stereo mode is not supported in this mode. To compile this driver as a module, choose M here: the module -- GitLab From e49037ad12e47cd34239b99b010c5438844923af Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 12:59:13 +0000 Subject: [PATCH 0632/1894] ASoC: SOF: Fix spelling mistake in Kconfig "ond" -> "and" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Acked-by: Kai Vehmanen Link: https://lore.kernel.org/r/20201216125913.16041-1-colin.king@canonical.com Signed-off-by: Mark Brown --- sound/soc/sof/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig index 031dad5fc4c70..3e8b6c035ce3f 100644 --- a/sound/soc/sof/Kconfig +++ b/sound/soc/sof/Kconfig @@ -122,7 +122,7 @@ config SND_SOC_SOF_DEBUG_XRUN_STOP bool "SOF stop on XRUN" help This option forces PCMs to stop on any XRUN event. This is useful to - preserve any trace data ond pipeline status prior to the XRUN. + preserve any trace data and pipeline status prior to the XRUN. Say Y if you are debugging SOF FW pipeline XRUNs. If unsure select "N". -- GitLab From acd894aee3149c15847bc4f0690fccba59ced5e7 Mon Sep 17 00:00:00 2001 From: shengjiu wang Date: Wed, 16 Dec 2020 18:44:24 +0800 Subject: [PATCH 0633/1894] ASoC: imx-hdmi: Fix warning of the uninitialized variable ret When condition ((hdmi_out && hdmi_in) || (!hdmi_out && !hdmi_in)) is true, then goto fail, the uninitialized variable ret will be returned. Signed-off-by: shengjiu wang Reported-by: kernel test robot Acked-by: Nicolin Chen Fixes: 6a5f850aa83a ("ASoC: fsl: Add imx-hdmi machine driver") Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/1608115464-18710-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/imx-hdmi.c b/sound/soc/fsl/imx-hdmi.c index 2c2a76a719401..ede4a9ad1054c 100644 --- a/sound/soc/fsl/imx-hdmi.c +++ b/sound/soc/fsl/imx-hdmi.c @@ -164,6 +164,7 @@ static int imx_hdmi_probe(struct platform_device *pdev) if ((hdmi_out && hdmi_in) || (!hdmi_out && !hdmi_in)) { dev_err(&pdev->dev, "Invalid HDMI DAI link\n"); + ret = -EINVAL; goto fail; } -- GitLab From 13733775326ea9eb81c6148ad60c43b8d231a343 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:26:08 +0000 Subject: [PATCH 0634/1894] ASoC: atmel: fix spelling mistake in Kconfig "programable" -> "programmable" There are a couple of spelling mistakes in the Kconfig help text. Fix them. Signed-off-by: Colin Ian King Reviewed-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20201216112608.11385-1-colin.king@canonical.com Signed-off-by: Mark Brown --- sound/soc/atmel/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 142373ec411ad..9fe9471f4514d 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -143,7 +143,7 @@ config SND_MCHP_SOC_SPDIFTX - sama7g5 This S/PDIF TX driver is compliant with IEC-60958 standard and - includes programable User Data and Channel Status fields. + includes programmable User Data and Channel Status fields. config SND_MCHP_SOC_SPDIFRX tristate "Microchip ASoC driver for boards using S/PDIF RX" @@ -157,5 +157,5 @@ config SND_MCHP_SOC_SPDIFRX - sama7g5 This S/PDIF RX driver is compliant with IEC-60958 standard and - includes programable User Data and Channel Status fields. + includes programmable User Data and Channel Status fields. endif -- GitLab From df9716ec9ade3d2e190a2aac199557d30a3a8416 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Dec 2020 13:20:24 +0000 Subject: [PATCH 0635/1894] regulator: pf8x00: Use specific compatible strings for devices The pf8x00 driver supports three devices, the DT compatible strings and I2C IDs should enumerate these specifically rather than using a wildcard so that we don't collide with anything incompatible in the same ID range in the future and so that we can handle any software visible differences between the variants we find. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20201215132024.13356-1-broonie@kernel.org Signed-off-by: Mark Brown --- .../bindings/regulator/nxp,pf8x00-regulator.yaml | 6 ++++-- drivers/regulator/pf8x00-regulator.c | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml index a6c259ce97850..956156fe52a3e 100644 --- a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml @@ -19,7 +19,9 @@ description: | properties: compatible: enum: - - nxp,pf8x00 + - nxp,pf8100 + - nxp,pf8121a + - nxp,pf8200 reg: maxItems: 1 @@ -118,7 +120,7 @@ examples: #size-cells = <0>; pmic@8 { - compatible = "nxp,pf8x00"; + compatible = "nxp,pf8100"; reg = <0x08>; regulators { diff --git a/drivers/regulator/pf8x00-regulator.c b/drivers/regulator/pf8x00-regulator.c index 308c27fa6ea80..af9918cd27aa4 100644 --- a/drivers/regulator/pf8x00-regulator.c +++ b/drivers/regulator/pf8x00-regulator.c @@ -469,13 +469,17 @@ static int pf8x00_i2c_probe(struct i2c_client *client) } static const struct of_device_id pf8x00_dt_ids[] = { - { .compatible = "nxp,pf8x00",}, + { .compatible = "nxp,pf8100",}, + { .compatible = "nxp,pf8121a",}, + { .compatible = "nxp,pf8200",}, { } }; MODULE_DEVICE_TABLE(of, pf8x00_dt_ids); static const struct i2c_device_id pf8x00_i2c_id[] = { - { "pf8x00", 0 }, + { "pf8100", 0 }, + { "pf8121a", 0 }, + { "pf8200", 0 }, {}, }; MODULE_DEVICE_TABLE(i2c, pf8x00_i2c_id); -- GitLab From 2d18e54dd8662442ef5898c6bdadeaf90b3cebbc Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Thu, 10 Dec 2020 09:29:43 +0800 Subject: [PATCH 0636/1894] cgroup: Fix memory leak when parsing multiple source parameters A memory leak is found in cgroup1_parse_param() when multiple source parameters overwrite fc->source in the fs_context struct without free. unreferenced object 0xffff888100d930e0 (size 16): comm "mount", pid 520, jiffies 4303326831 (age 152.783s) hex dump (first 16 bytes): 74 65 73 74 6c 65 61 6b 00 00 00 00 00 00 00 00 testleak........ backtrace: [<000000003e5023ec>] kmemdup_nul+0x2d/0xa0 [<00000000377dbdaa>] vfs_parse_fs_string+0xc0/0x150 [<00000000cb2b4882>] generic_parse_monolithic+0x15a/0x1d0 [<000000000f750198>] path_mount+0xee1/0x1820 [<0000000004756de2>] do_mount+0xea/0x100 [<0000000094cafb0a>] __x64_sys_mount+0x14b/0x1f0 Fix this bug by permitting a single source parameter and rejecting with an error all subsequent ones. Fixes: 8d2451f4994f ("cgroup1: switch to option-by-option parsing") Reported-by: Hulk Robot Signed-off-by: Qinglang Miao Reviewed-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 191c329e482ad..32596fdbcd5b8 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -908,6 +908,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { if (strcmp(param->key, "source") == 0) { + if (fc->source) + return invalf(fc, "Multiple sources not supported"); fc->source = param->string; param->string = NULL; return 0; -- GitLab From 846f151d03f796bf1b303784edaf3a22e3f51377 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 4 Dec 2020 17:51:52 +0100 Subject: [PATCH 0637/1894] drm/ttm: fix unused function warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ttm_pool_type_count() is not used when debugfs is disabled: drivers/gpu/drm/ttm/ttm_pool.c:243:21: error: unused function 'ttm_pool_type_count' [-Werror,-Wunused-function] static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) Move the definition into the #ifdef block. Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Signed-off-by: Arnd Bergmann Reviewed-by: Martin Peres Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/405695/ Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_pool.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 5455b20447594..7b2f60616750d 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -239,21 +239,6 @@ static struct page *ttm_pool_type_take(struct ttm_pool_type *pt) return p; } -/* Count the number of pages available in a pool_type */ -static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) -{ - unsigned int count = 0; - struct page *p; - - spin_lock(&pt->lock); - /* Only used for debugfs, the overhead doesn't matter */ - list_for_each_entry(p, &pt->pages, lru) - ++count; - spin_unlock(&pt->lock); - - return count; -} - /* Initialize and add a pool type to the global shrinker list */ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, enum ttm_caching caching, unsigned int order) @@ -543,6 +528,20 @@ void ttm_pool_fini(struct ttm_pool *pool) EXPORT_SYMBOL(ttm_pool_fini); #ifdef CONFIG_DEBUG_FS +/* Count the number of pages available in a pool_type */ +static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) +{ + unsigned int count = 0; + struct page *p; + + spin_lock(&pt->lock); + /* Only used for debugfs, the overhead doesn't matter */ + list_for_each_entry(p, &pt->pages, lru) + ++count; + spin_unlock(&pt->lock); + + return count; +} /* Dump information about the different pool types */ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, -- GitLab From 34cdf405aa5de827b8bef79a6c82c39120b3729b Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Wed, 16 Dec 2020 20:52:00 +0800 Subject: [PATCH 0638/1894] ALSA: hda/realtek: Remove dummy lineout on Acer TravelMate P648/P658 Acer TravelMate laptops P648/P658 series with codec ALC282 only have one physical jack for headset but there's a confusing lineout pin on NID 0x1b reported. Audio applications hence misunderstand that there are a speaker and a lineout, and take the lineout as the default audio output. Add a new quirk to remove the useless lineout and enable the pin 0x18 for jack sensing and headset microphone. Signed-off-by: Chris Chiu Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20201216125200.27053-1-chiu@endlessos.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 41cc64036f227..71da03f950cc0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6368,6 +6368,7 @@ enum { ALC287_FIXUP_HP_GPIO_LED, ALC256_FIXUP_HP_HEADSET_MIC, ALC236_FIXUP_DELL_AIO_HEADSET_MIC, + ALC282_FIXUP_ACER_DISABLE_LINEOUT, }; static const struct hda_fixup alc269_fixups[] = { @@ -7791,6 +7792,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC282_FIXUP_ACER_DISABLE_LINEOUT] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x411111f0 }, + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { }, + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8564,6 +8575,22 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60140}, {0x19, 0x04a11030}, {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC282_STANDARD_PINS, + {0x12, 0x90a609c0}, + {0x18, 0x03a11830}, + {0x19, 0x04a19831}, + {0x1a, 0x0481303f}, + {0x1b, 0x04211020}, + {0x21, 0x0321101f}), + SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC282_STANDARD_PINS, + {0x12, 0x90a60940}, + {0x18, 0x03a11830}, + {0x19, 0x04a19831}, + {0x1a, 0x0481303f}, + {0x1b, 0x04211020}, + {0x21, 0x0321101f}), SND_HDA_PIN_QUIRK(0x10ec0283, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, ALC282_STANDARD_PINS, {0x12, 0x90a60130}, -- GitLab From 8075c3005e4b1efa12dbbf6e84bc412a713de92c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 16 Dec 2020 00:49:31 +0000 Subject: [PATCH 0639/1894] dma-buf: cma_heap: Include linux/vmalloc.h to fix build failures on MIPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to include in order for MIPS to find vmap(), as it doesn't otherwise get included there. Without this patch, one can hit the following build error: drivers/dma-buf/heaps/cma_heap.c: In function 'cma_heap_do_vmap': drivers/dma-buf/heaps/cma_heap.c:195:10: error: implicit declaration of function 'vmap' Cc: Sumit Semwal Cc: Liam Mark Cc: Laura Abbott Cc: Brian Starkey Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: Chris Goldsworthy Cc: Ørjan Eide Cc: Robin Murphy Cc: Ezequiel Garcia Cc: Simon Ser Cc: James Jones Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation") Reported-by: Guenter Roeck Signed-off-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20201216004931.113505-1-john.stultz@linaro.org --- drivers/dma-buf/heaps/cma_heap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 5e7c3436310c9..3c4e343011721 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -20,6 +20,7 @@ #include #include #include +#include struct cma_heap { -- GitLab From 1a3449c19407a28f7019a887cdf0d6ba2444751a Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Tue, 15 Dec 2020 10:20:10 -0800 Subject: [PATCH 0640/1894] selftests/bpf: Clarify build error if no vmlinux If Makefile cannot find any of the vmlinux's in its VMLINUX_BTF_PATHS list, it tries to run btftool incorrectly, with VMLINUX_BTF unset: bpftool btf dump file $(VMLINUX_BTF) format c Such that the keyword 'format' is misinterpreted as the path to vmlinux. The resulting build error message is fairly cryptic: GEN vmlinux.h Error: failed to load BTF from format: No such file or directory This patch makes the failure reason clearer by yielding this instead: Makefile:...: *** Cannot find a vmlinux for VMLINUX_BTF at any of "{paths}". Stop. Fixes: acbd06206bbb ("selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls") Signed-off-by: Kamal Mostafa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201215182011.15755-1-kamal@canonical.com --- tools/testing/selftests/bpf/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8c33e999319a9..c51df6b91befe 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif # Define simple and short `make test_progs`, `make test_sysctl`, etc targets # to build individual tests. -- GitLab From d45f89f7437d0f2c8275b4434096164db106384d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 16 Dec 2020 13:08:21 -0500 Subject: [PATCH 0641/1894] KVM: SVM: fix 32-bit compilation VCPU_REGS_R8...VCPU_REGS_R15 are not defined on 32-bit x86, so cull them from the synchronization of the VMSA. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 8b5ef0fe44901..e57847ff8bd20 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -529,6 +529,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; +#ifdef CONFIG_X86_64 save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; @@ -537,6 +538,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; +#endif save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; /* Sync some non-GPR registers before encrypting */ -- GitLab From 146f1ed852a87b802ed6e71c31e189c64871383c Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Fri, 20 Nov 2020 11:37:52 +0530 Subject: [PATCH 0642/1894] ACPI: PM: s2idle: Add AMD support to handle _DSM Initial support for S2Idle based on the Intel implementation [1] does not work for AMD as the BIOS implementation for ACPI methods like the _DSM are not standardized. So, the way in which the UUID's were parsed and the ACPI packages were retrieved out of the ACPI objects are not the same between Intel and AMD. Add AMD support for S2Idle to parse the UUID, evaluate the _DSM methods, prepare the Idle constraint list etc. Link: https://uefi.org/sites/default/files/resources/Intel_ACPI_Low_Power_S0_Idle.pdf # [1] Signed-off-by: Shyam Sundar S K [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 166 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 157 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index aff13bf4d9470..b929fd0d2e047 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -710,6 +710,11 @@ static const struct acpi_device_id lps0_device_ids[] = { #define ACPI_LPS0_ENTRY 5 #define ACPI_LPS0_EXIT 6 +/* AMD */ +#define ACPI_LPS0_DSM_UUID_AMD "e3f32452-febc-43ce-9039-932122d37721" +#define ACPI_LPS0_SCREEN_OFF_AMD 4 +#define ACPI_LPS0_SCREEN_ON_AMD 5 + static acpi_handle lps0_device_handle; static guid_t lps0_dsm_guid; static char lps0_dsm_func_mask; @@ -733,8 +738,124 @@ struct lpi_constraints { int min_dstate; }; +/* AMD */ +/* Device constraint entry structure */ +struct lpi_device_info_amd { + int revision; + int count; + union acpi_object *package; +}; + +/* Constraint package structure */ +struct lpi_device_constraint_amd { + char *name; + int enabled; + int function_states; + int min_dstate; +}; + static struct lpi_constraints *lpi_constraints_table; static int lpi_constraints_table_size; +static int rev_id; + +static void lpi_device_get_constraints_amd(void) +{ + union acpi_object *out_obj; + int i, j, k; + + out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid, + 1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS, + NULL, ACPI_TYPE_PACKAGE); + + if (!out_obj) + return; + + acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n", + out_obj ? "successful" : "failed"); + + for (i = 0; i < out_obj->package.count; i++) { + union acpi_object *package = &out_obj->package.elements[i]; + struct lpi_device_info_amd info = { }; + + if (package->type == ACPI_TYPE_INTEGER) { + switch (i) { + case 0: + info.revision = package->integer.value; + break; + case 1: + info.count = package->integer.value; + break; + } + } else if (package->type == ACPI_TYPE_PACKAGE) { + lpi_constraints_table = kcalloc(package->package.count, + sizeof(*lpi_constraints_table), + GFP_KERNEL); + + if (!lpi_constraints_table) + goto free_acpi_buffer; + + acpi_handle_info(lps0_device_handle, + "LPI: constraints list begin:\n"); + + for (j = 0; j < package->package.count; ++j) { + union acpi_object *info_obj = &package->package.elements[j]; + struct lpi_device_constraint_amd dev_info = {}; + struct lpi_constraints *list; + acpi_status status; + + for (k = 0; k < info_obj->package.count; ++k) { + union acpi_object *obj = &info_obj->package.elements[k]; + union acpi_object *obj_new; + + list = &lpi_constraints_table[lpi_constraints_table_size]; + list->min_dstate = -1; + + obj_new = &obj[k]; + switch (k) { + case 0: + dev_info.enabled = obj->integer.value; + break; + case 1: + dev_info.name = obj->string.pointer; + break; + case 2: + dev_info.function_states = obj->integer.value; + break; + case 3: + dev_info.min_dstate = obj->integer.value; + break; + } + + if (!dev_info.enabled || !dev_info.name || + !dev_info.min_dstate) + continue; + + status = acpi_get_handle(NULL, dev_info.name, + &list->handle); + if (ACPI_FAILURE(status)) + continue; + + acpi_handle_info(lps0_device_handle, + "Name:%s\n", dev_info.name); + + list->min_dstate = dev_info.min_dstate; + + if (list->min_dstate < 0) { + acpi_handle_info(lps0_device_handle, + "Incomplete constraint defined\n"); + continue; + } + } + lpi_constraints_table_size++; + } + } + } + + acpi_handle_info(lps0_device_handle, "LPI: constraints list end\n"); + +free_acpi_buffer: + ACPI_FREE(out_obj); +} static void lpi_device_get_constraints(void) { @@ -883,13 +1004,22 @@ static void acpi_sleep_run_lps0_dsm(unsigned int func) if (!(lps0_dsm_func_mask & (1 << func))) return; - out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, 1, func, NULL); + out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, rev_id, func, NULL); ACPI_FREE(out_obj); acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n", func, out_obj ? "successful" : "failed"); } +static bool acpi_match_vendor_name(void) +{ +#ifdef CONFIG_X86 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return true; +#endif + return false; +} + static int lps0_device_attach(struct acpi_device *adev, const struct acpi_device_id *not_used) { @@ -901,9 +1031,17 @@ static int lps0_device_attach(struct acpi_device *adev, if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) return 0; - guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid); + if (acpi_match_vendor_name()) { + guid_parse(ACPI_LPS0_DSM_UUID_AMD, &lps0_dsm_guid); + out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 0, 0, NULL); + rev_id = 0; + } else { + guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid); + out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL); + rev_id = 1; + } + /* Check if the _DSM is present and as expected. */ - out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL); if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER) { acpi_handle_debug(adev->handle, "_DSM function 0 evaluation failed\n"); @@ -919,7 +1057,10 @@ static int lps0_device_attach(struct acpi_device *adev, lps0_device_handle = adev->handle; - lpi_device_get_constraints(); + if (acpi_match_vendor_name()) + lpi_device_get_constraints_amd(); + else + lpi_device_get_constraints(); /* * Use suspend-to-idle by default if the default suspend mode was not @@ -974,9 +1115,12 @@ static int acpi_s2idle_prepare_late(void) if (pm_debug_messages_on) lpi_check_constraints(); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); - + if (acpi_match_vendor_name()) { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD); + } else { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); + } return 0; } @@ -1051,8 +1195,12 @@ static void acpi_s2idle_restore_early(void) if (!lps0_device_handle || sleep_no_lps0) return; - acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); + if (acpi_match_vendor_name()) { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD); + } else { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); + } } static void acpi_s2idle_restore(void) -- GitLab From c816e1ddf2b60b31d121118488c5a854d9a2fad9 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 9 Dec 2020 17:49:12 +0800 Subject: [PATCH 0643/1894] clk: sifive: Extract prci core to common base Extract common core of prci driver to an independent file, it could allow other chips to reuse it. Separate SoCs-dependent code 'fu540' from prci core, then we can easily add 'fu740' later. Almost these changes are code movement. The different is adding the private data for each SoC use, so it needs to get match data in probe callback function, then use the data for initialization. Signed-off-by: Zong Li Reviewed-by: Pragnesh Patel Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20201209094916.17383-2-zong.li@sifive.com [sboyd@kernel.org: Include header to silence sparse] Signed-off-by: Stephen Boyd --- drivers/clk/sifive/Makefile | 2 +- drivers/clk/sifive/fu540-prci.c | 593 ++----------------------------- drivers/clk/sifive/fu540-prci.h | 21 ++ drivers/clk/sifive/sifive-prci.c | 395 ++++++++++++++++++++ drivers/clk/sifive/sifive-prci.h | 201 +++++++++++ 5 files changed, 641 insertions(+), 571 deletions(-) create mode 100644 drivers/clk/sifive/fu540-prci.h create mode 100644 drivers/clk/sifive/sifive-prci.c create mode 100644 drivers/clk/sifive/sifive-prci.h diff --git a/drivers/clk/sifive/Makefile b/drivers/clk/sifive/Makefile index 0797f14fef6b6..51b6ebc359e44 100644 --- a/drivers/clk/sifive/Makefile +++ b/drivers/clk/sifive/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CLK_SIFIVE_FU540_PRCI) += fu540-prci.o +obj-$(CONFIG_CLK_SIFIVE_FU540_PRCI) += sifive-prci.o fu540-prci.o diff --git a/drivers/clk/sifive/fu540-prci.c b/drivers/clk/sifive/fu540-prci.c index a8901f90a61ac..83ced24b0b949 100644 --- a/drivers/clk/sifive/fu540-prci.c +++ b/drivers/clk/sifive/fu540-prci.c @@ -1,17 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018-2019 SiFive, Inc. - * Wesley Terpstra - * Paul Walmsley - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Copyright (C) 2018-2019 Wesley Terpstra + * Copyright (C) 2018-2019 Paul Walmsley + * Copyright (C) 2020 Zong Li * * The FU540 PRCI implements clock and reset control for the SiFive * FU540-C000 chip. This driver assumes that it has sole control @@ -24,464 +16,47 @@ * - SiFive FU540-C000 manual v1p0, Chapter 7 "Clocking and Reset" */ -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include - -/* - * EXPECTED_CLK_PARENT_COUNT: how many parent clocks this driver expects: - * hfclk and rtcclk - */ -#define EXPECTED_CLK_PARENT_COUNT 2 - -/* - * Register offsets and bitmasks - */ - -/* COREPLLCFG0 */ -#define PRCI_COREPLLCFG0_OFFSET 0x4 -# define PRCI_COREPLLCFG0_DIVR_SHIFT 0 -# define PRCI_COREPLLCFG0_DIVR_MASK (0x3f << PRCI_COREPLLCFG0_DIVR_SHIFT) -# define PRCI_COREPLLCFG0_DIVF_SHIFT 6 -# define PRCI_COREPLLCFG0_DIVF_MASK (0x1ff << PRCI_COREPLLCFG0_DIVF_SHIFT) -# define PRCI_COREPLLCFG0_DIVQ_SHIFT 15 -# define PRCI_COREPLLCFG0_DIVQ_MASK (0x7 << PRCI_COREPLLCFG0_DIVQ_SHIFT) -# define PRCI_COREPLLCFG0_RANGE_SHIFT 18 -# define PRCI_COREPLLCFG0_RANGE_MASK (0x7 << PRCI_COREPLLCFG0_RANGE_SHIFT) -# define PRCI_COREPLLCFG0_BYPASS_SHIFT 24 -# define PRCI_COREPLLCFG0_BYPASS_MASK (0x1 << PRCI_COREPLLCFG0_BYPASS_SHIFT) -# define PRCI_COREPLLCFG0_FSE_SHIFT 25 -# define PRCI_COREPLLCFG0_FSE_MASK (0x1 << PRCI_COREPLLCFG0_FSE_SHIFT) -# define PRCI_COREPLLCFG0_LOCK_SHIFT 31 -# define PRCI_COREPLLCFG0_LOCK_MASK (0x1 << PRCI_COREPLLCFG0_LOCK_SHIFT) -/* DDRPLLCFG0 */ -#define PRCI_DDRPLLCFG0_OFFSET 0xc -# define PRCI_DDRPLLCFG0_DIVR_SHIFT 0 -# define PRCI_DDRPLLCFG0_DIVR_MASK (0x3f << PRCI_DDRPLLCFG0_DIVR_SHIFT) -# define PRCI_DDRPLLCFG0_DIVF_SHIFT 6 -# define PRCI_DDRPLLCFG0_DIVF_MASK (0x1ff << PRCI_DDRPLLCFG0_DIVF_SHIFT) -# define PRCI_DDRPLLCFG0_DIVQ_SHIFT 15 -# define PRCI_DDRPLLCFG0_DIVQ_MASK (0x7 << PRCI_DDRPLLCFG0_DIVQ_SHIFT) -# define PRCI_DDRPLLCFG0_RANGE_SHIFT 18 -# define PRCI_DDRPLLCFG0_RANGE_MASK (0x7 << PRCI_DDRPLLCFG0_RANGE_SHIFT) -# define PRCI_DDRPLLCFG0_BYPASS_SHIFT 24 -# define PRCI_DDRPLLCFG0_BYPASS_MASK (0x1 << PRCI_DDRPLLCFG0_BYPASS_SHIFT) -# define PRCI_DDRPLLCFG0_FSE_SHIFT 25 -# define PRCI_DDRPLLCFG0_FSE_MASK (0x1 << PRCI_DDRPLLCFG0_FSE_SHIFT) -# define PRCI_DDRPLLCFG0_LOCK_SHIFT 31 -# define PRCI_DDRPLLCFG0_LOCK_MASK (0x1 << PRCI_DDRPLLCFG0_LOCK_SHIFT) - -/* DDRPLLCFG1 */ -#define PRCI_DDRPLLCFG1_OFFSET 0x10 -# define PRCI_DDRPLLCFG1_CKE_SHIFT 24 -# define PRCI_DDRPLLCFG1_CKE_MASK (0x1 << PRCI_DDRPLLCFG1_CKE_SHIFT) - -/* GEMGXLPLLCFG0 */ -#define PRCI_GEMGXLPLLCFG0_OFFSET 0x1c -# define PRCI_GEMGXLPLLCFG0_DIVR_SHIFT 0 -# define PRCI_GEMGXLPLLCFG0_DIVR_MASK (0x3f << PRCI_GEMGXLPLLCFG0_DIVR_SHIFT) -# define PRCI_GEMGXLPLLCFG0_DIVF_SHIFT 6 -# define PRCI_GEMGXLPLLCFG0_DIVF_MASK (0x1ff << PRCI_GEMGXLPLLCFG0_DIVF_SHIFT) -# define PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT 15 -# define PRCI_GEMGXLPLLCFG0_DIVQ_MASK (0x7 << PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT) -# define PRCI_GEMGXLPLLCFG0_RANGE_SHIFT 18 -# define PRCI_GEMGXLPLLCFG0_RANGE_MASK (0x7 << PRCI_GEMGXLPLLCFG0_RANGE_SHIFT) -# define PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT 24 -# define PRCI_GEMGXLPLLCFG0_BYPASS_MASK (0x1 << PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT) -# define PRCI_GEMGXLPLLCFG0_FSE_SHIFT 25 -# define PRCI_GEMGXLPLLCFG0_FSE_MASK (0x1 << PRCI_GEMGXLPLLCFG0_FSE_SHIFT) -# define PRCI_GEMGXLPLLCFG0_LOCK_SHIFT 31 -# define PRCI_GEMGXLPLLCFG0_LOCK_MASK (0x1 << PRCI_GEMGXLPLLCFG0_LOCK_SHIFT) - -/* GEMGXLPLLCFG1 */ -#define PRCI_GEMGXLPLLCFG1_OFFSET 0x20 -# define PRCI_GEMGXLPLLCFG1_CKE_SHIFT 24 -# define PRCI_GEMGXLPLLCFG1_CKE_MASK (0x1 << PRCI_GEMGXLPLLCFG1_CKE_SHIFT) - -/* CORECLKSEL */ -#define PRCI_CORECLKSEL_OFFSET 0x24 -# define PRCI_CORECLKSEL_CORECLKSEL_SHIFT 0 -# define PRCI_CORECLKSEL_CORECLKSEL_MASK (0x1 << PRCI_CORECLKSEL_CORECLKSEL_SHIFT) - -/* DEVICESRESETREG */ -#define PRCI_DEVICESRESETREG_OFFSET 0x28 -# define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT 0 -# define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_MASK (0x1 << PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT) -# define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT 1 -# define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_MASK (0x1 << PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT) -# define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT 2 -# define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_MASK (0x1 << PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT) -# define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT 3 -# define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_MASK (0x1 << PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT) -# define PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT 5 -# define PRCI_DEVICESRESETREG_GEMGXL_RST_N_MASK (0x1 << PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT) +#include -/* CLKMUXSTATUSREG */ -#define PRCI_CLKMUXSTATUSREG_OFFSET 0x2c -# define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT 1 -# define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK (0x1 << PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT) +#include "fu540-prci.h" +#include "sifive-prci.h" -/* - * Private structures - */ +/* PRCI integration data for each WRPLL instance */ -/** - * struct __prci_data - per-device-instance data - * @va: base virtual address of the PRCI IP block - * @hw_clks: encapsulates struct clk_hw records - * - * PRCI per-device instance data - */ -struct __prci_data { - void __iomem *va; - struct clk_hw_onecell_data hw_clks; +static struct __prci_wrpll_data __prci_corepll_data = { + .cfg0_offs = PRCI_COREPLLCFG0_OFFSET, + .enable_bypass = sifive_prci_coreclksel_use_hfclk, + .disable_bypass = sifive_prci_coreclksel_use_corepll, }; -/** - * struct __prci_wrpll_data - WRPLL configuration and integration data - * @c: WRPLL current configuration record - * @enable_bypass: fn ptr to code to bypass the WRPLL (if applicable; else NULL) - * @disable_bypass: fn ptr to code to not bypass the WRPLL (or NULL) - * @cfg0_offs: WRPLL CFG0 register offset (in bytes) from the PRCI base address - * - * @enable_bypass and @disable_bypass are used for WRPLL instances - * that contain a separate external glitchless clock mux downstream - * from the PLL. The WRPLL internal bypass mux is not glitchless. - */ -struct __prci_wrpll_data { - struct wrpll_cfg c; - void (*enable_bypass)(struct __prci_data *pd); - void (*disable_bypass)(struct __prci_data *pd); - u8 cfg0_offs; +static struct __prci_wrpll_data __prci_ddrpll_data = { + .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET, }; -/** - * struct __prci_clock - describes a clock device managed by PRCI - * @name: user-readable clock name string - should match the manual - * @parent_name: parent name for this clock - * @ops: struct clk_ops for the Linux clock framework to use for control - * @hw: Linux-private clock data - * @pwd: WRPLL-specific data, associated with this clock (if not NULL) - * @pd: PRCI-specific data associated with this clock (if not NULL) - * - * PRCI clock data. Used by the PRCI driver to register PRCI-provided - * clocks to the Linux clock infrastructure. - */ -struct __prci_clock { - const char *name; - const char *parent_name; - const struct clk_ops *ops; - struct clk_hw hw; - struct __prci_wrpll_data *pwd; - struct __prci_data *pd; +static struct __prci_wrpll_data __prci_gemgxlpll_data = { + .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET, }; -#define clk_hw_to_prci_clock(pwd) container_of(pwd, struct __prci_clock, hw) - -/* - * Private functions - */ - -/** - * __prci_readl() - read from a PRCI register - * @pd: PRCI context - * @offs: register offset to read from (in bytes, from PRCI base address) - * - * Read the register located at offset @offs from the base virtual - * address of the PRCI register target described by @pd, and return - * the value to the caller. - * - * Context: Any context. - * - * Return: the contents of the register described by @pd and @offs. - */ -static u32 __prci_readl(struct __prci_data *pd, u32 offs) -{ - return readl_relaxed(pd->va + offs); -} - -static void __prci_writel(u32 v, u32 offs, struct __prci_data *pd) -{ - writel_relaxed(v, pd->va + offs); -} - -/* WRPLL-related private functions */ - -/** - * __prci_wrpll_unpack() - unpack WRPLL configuration registers into parameters - * @c: ptr to a struct wrpll_cfg record to write config into - * @r: value read from the PRCI PLL configuration register - * - * Given a value @r read from an FU540 PRCI PLL configuration register, - * split it into fields and populate it into the WRPLL configuration record - * pointed to by @c. - * - * The COREPLLCFG0 macros are used below, but the other *PLLCFG0 macros - * have the same register layout. - * - * Context: Any context. - */ -static void __prci_wrpll_unpack(struct wrpll_cfg *c, u32 r) -{ - u32 v; - - v = r & PRCI_COREPLLCFG0_DIVR_MASK; - v >>= PRCI_COREPLLCFG0_DIVR_SHIFT; - c->divr = v; - - v = r & PRCI_COREPLLCFG0_DIVF_MASK; - v >>= PRCI_COREPLLCFG0_DIVF_SHIFT; - c->divf = v; - - v = r & PRCI_COREPLLCFG0_DIVQ_MASK; - v >>= PRCI_COREPLLCFG0_DIVQ_SHIFT; - c->divq = v; - - v = r & PRCI_COREPLLCFG0_RANGE_MASK; - v >>= PRCI_COREPLLCFG0_RANGE_SHIFT; - c->range = v; - - c->flags &= (WRPLL_FLAGS_INT_FEEDBACK_MASK | - WRPLL_FLAGS_EXT_FEEDBACK_MASK); - - /* external feedback mode not supported */ - c->flags |= WRPLL_FLAGS_INT_FEEDBACK_MASK; -} - -/** - * __prci_wrpll_pack() - pack PLL configuration parameters into a register value - * @c: pointer to a struct wrpll_cfg record containing the PLL's cfg - * - * Using a set of WRPLL configuration values pointed to by @c, - * assemble a PRCI PLL configuration register value, and return it to - * the caller. - * - * Context: Any context. Caller must ensure that the contents of the - * record pointed to by @c do not change during the execution - * of this function. - * - * Returns: a value suitable for writing into a PRCI PLL configuration - * register - */ -static u32 __prci_wrpll_pack(const struct wrpll_cfg *c) -{ - u32 r = 0; - - r |= c->divr << PRCI_COREPLLCFG0_DIVR_SHIFT; - r |= c->divf << PRCI_COREPLLCFG0_DIVF_SHIFT; - r |= c->divq << PRCI_COREPLLCFG0_DIVQ_SHIFT; - r |= c->range << PRCI_COREPLLCFG0_RANGE_SHIFT; - - /* external feedback mode not supported */ - r |= PRCI_COREPLLCFG0_FSE_MASK; - - return r; -} - -/** - * __prci_wrpll_read_cfg() - read the WRPLL configuration from the PRCI - * @pd: PRCI context - * @pwd: PRCI WRPLL metadata - * - * Read the current configuration of the PLL identified by @pwd from - * the PRCI identified by @pd, and store it into the local configuration - * cache in @pwd. - * - * Context: Any context. Caller must prevent the records pointed to by - * @pd and @pwd from changing during execution. - */ -static void __prci_wrpll_read_cfg(struct __prci_data *pd, - struct __prci_wrpll_data *pwd) -{ - __prci_wrpll_unpack(&pwd->c, __prci_readl(pd, pwd->cfg0_offs)); -} - -/** - * __prci_wrpll_write_cfg() - write WRPLL configuration into the PRCI - * @pd: PRCI context - * @pwd: PRCI WRPLL metadata - * @c: WRPLL configuration record to write - * - * Write the WRPLL configuration described by @c into the WRPLL - * configuration register identified by @pwd in the PRCI instance - * described by @c. Make a cached copy of the WRPLL's current - * configuration so it can be used by other code. - * - * Context: Any context. Caller must prevent the records pointed to by - * @pd and @pwd from changing during execution. - */ -static void __prci_wrpll_write_cfg(struct __prci_data *pd, - struct __prci_wrpll_data *pwd, - struct wrpll_cfg *c) -{ - __prci_writel(__prci_wrpll_pack(c), pwd->cfg0_offs, pd); - - memcpy(&pwd->c, c, sizeof(*c)); -} - -/* Core clock mux control */ - -/** - * __prci_coreclksel_use_hfclk() - switch the CORECLK mux to output HFCLK - * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg - * - * Switch the CORECLK mux to the HFCLK input source; return once complete. - * - * Context: Any context. Caller must prevent concurrent changes to the - * PRCI_CORECLKSEL_OFFSET register. - */ -static void __prci_coreclksel_use_hfclk(struct __prci_data *pd) -{ - u32 r; - - r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); - r |= PRCI_CORECLKSEL_CORECLKSEL_MASK; - __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd); - - r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */ -} - -/** - * __prci_coreclksel_use_corepll() - switch the CORECLK mux to output COREPLL - * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg - * - * Switch the CORECLK mux to the PLL output clock; return once complete. - * - * Context: Any context. Caller must prevent concurrent changes to the - * PRCI_CORECLKSEL_OFFSET register. - */ -static void __prci_coreclksel_use_corepll(struct __prci_data *pd) -{ - u32 r; - - r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); - r &= ~PRCI_CORECLKSEL_CORECLKSEL_MASK; - __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd); - - r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */ -} - -/* - * Linux clock framework integration - * - * See the Linux clock framework documentation for more information on - * these functions. - */ - -static unsigned long sifive_fu540_prci_wrpll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct __prci_clock *pc = clk_hw_to_prci_clock(hw); - struct __prci_wrpll_data *pwd = pc->pwd; - - return wrpll_calc_output_rate(&pwd->c, parent_rate); -} - -static long sifive_fu540_prci_wrpll_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) -{ - struct __prci_clock *pc = clk_hw_to_prci_clock(hw); - struct __prci_wrpll_data *pwd = pc->pwd; - struct wrpll_cfg c; - - memcpy(&c, &pwd->c, sizeof(c)); - - wrpll_configure_for_rate(&c, rate, *parent_rate); - - return wrpll_calc_output_rate(&c, *parent_rate); -} - -static int sifive_fu540_prci_wrpll_set_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long parent_rate) -{ - struct __prci_clock *pc = clk_hw_to_prci_clock(hw); - struct __prci_wrpll_data *pwd = pc->pwd; - struct __prci_data *pd = pc->pd; - int r; - - r = wrpll_configure_for_rate(&pwd->c, rate, parent_rate); - if (r) - return r; - - if (pwd->enable_bypass) - pwd->enable_bypass(pd); - - __prci_wrpll_write_cfg(pd, pwd, &pwd->c); - - udelay(wrpll_calc_max_lock_us(&pwd->c)); - - if (pwd->disable_bypass) - pwd->disable_bypass(pd); - - return 0; -} +/* Linux clock framework integration */ static const struct clk_ops sifive_fu540_prci_wrpll_clk_ops = { - .set_rate = sifive_fu540_prci_wrpll_set_rate, - .round_rate = sifive_fu540_prci_wrpll_round_rate, - .recalc_rate = sifive_fu540_prci_wrpll_recalc_rate, + .set_rate = sifive_prci_wrpll_set_rate, + .round_rate = sifive_prci_wrpll_round_rate, + .recalc_rate = sifive_prci_wrpll_recalc_rate, }; static const struct clk_ops sifive_fu540_prci_wrpll_ro_clk_ops = { - .recalc_rate = sifive_fu540_prci_wrpll_recalc_rate, + .recalc_rate = sifive_prci_wrpll_recalc_rate, }; -/* TLCLKSEL clock integration */ - -static unsigned long sifive_fu540_prci_tlclksel_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct __prci_clock *pc = clk_hw_to_prci_clock(hw); - struct __prci_data *pd = pc->pd; - u32 v; - u8 div; - - v = __prci_readl(pd, PRCI_CLKMUXSTATUSREG_OFFSET); - v &= PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK; - div = v ? 1 : 2; - - return div_u64(parent_rate, div); -} - static const struct clk_ops sifive_fu540_prci_tlclksel_clk_ops = { - .recalc_rate = sifive_fu540_prci_tlclksel_recalc_rate, -}; - -/* - * PRCI integration data for each WRPLL instance - */ - -static struct __prci_wrpll_data __prci_corepll_data = { - .cfg0_offs = PRCI_COREPLLCFG0_OFFSET, - .enable_bypass = __prci_coreclksel_use_hfclk, - .disable_bypass = __prci_coreclksel_use_corepll, -}; - -static struct __prci_wrpll_data __prci_ddrpll_data = { - .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET, + .recalc_rate = sifive_prci_tlclksel_recalc_rate, }; -static struct __prci_wrpll_data __prci_gemgxlpll_data = { - .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET, -}; - -/* - * List of clock controls provided by the PRCI - */ - -static struct __prci_clock __prci_init_clocks[] = { +/* List of clock controls provided by the PRCI */ +struct __prci_clock __prci_init_clocks_fu540[] = { [PRCI_CLK_COREPLL] = { .name = "corepll", .parent_name = "hfclk", @@ -506,125 +81,3 @@ static struct __prci_clock __prci_init_clocks[] = { .ops = &sifive_fu540_prci_tlclksel_clk_ops, }, }; - -/** - * __prci_register_clocks() - register clock controls in the PRCI with Linux - * @dev: Linux struct device * - * - * Register the list of clock controls described in __prci_init_plls[] with - * the Linux clock framework. - * - * Return: 0 upon success or a negative error code upon failure. - */ -static int __prci_register_clocks(struct device *dev, struct __prci_data *pd) -{ - struct clk_init_data init = { }; - struct __prci_clock *pic; - int parent_count, i, r; - - parent_count = of_clk_get_parent_count(dev->of_node); - if (parent_count != EXPECTED_CLK_PARENT_COUNT) { - dev_err(dev, "expected only two parent clocks, found %d\n", - parent_count); - return -EINVAL; - } - - /* Register PLLs */ - for (i = 0; i < ARRAY_SIZE(__prci_init_clocks); ++i) { - pic = &__prci_init_clocks[i]; - - init.name = pic->name; - init.parent_names = &pic->parent_name; - init.num_parents = 1; - init.ops = pic->ops; - pic->hw.init = &init; - - pic->pd = pd; - - if (pic->pwd) - __prci_wrpll_read_cfg(pd, pic->pwd); - - r = devm_clk_hw_register(dev, &pic->hw); - if (r) { - dev_warn(dev, "Failed to register clock %s: %d\n", - init.name, r); - return r; - } - - r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev)); - if (r) { - dev_warn(dev, "Failed to register clkdev for %s: %d\n", - init.name, r); - return r; - } - - pd->hw_clks.hws[i] = &pic->hw; - } - - pd->hw_clks.num = i; - - r = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, - &pd->hw_clks); - if (r) { - dev_err(dev, "could not add hw_provider: %d\n", r); - return r; - } - - return 0; -} - -/* - * Linux device model integration - * - * See the Linux device model documentation for more information about - * these functions. - */ -static int sifive_fu540_prci_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct resource *res; - struct __prci_data *pd; - int r; - - pd = devm_kzalloc(dev, - struct_size(pd, hw_clks.hws, - ARRAY_SIZE(__prci_init_clocks)), - GFP_KERNEL); - if (!pd) - return -ENOMEM; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pd->va = devm_ioremap_resource(dev, res); - if (IS_ERR(pd->va)) - return PTR_ERR(pd->va); - - r = __prci_register_clocks(dev, pd); - if (r) { - dev_err(dev, "could not register clocks: %d\n", r); - return r; - } - - dev_dbg(dev, "SiFive FU540 PRCI probed\n"); - - return 0; -} - -static const struct of_device_id sifive_fu540_prci_of_match[] = { - { .compatible = "sifive,fu540-c000-prci", }, - {} -}; -MODULE_DEVICE_TABLE(of, sifive_fu540_prci_of_match); - -static struct platform_driver sifive_fu540_prci_driver = { - .driver = { - .name = "sifive-fu540-prci", - .of_match_table = sifive_fu540_prci_of_match, - }, - .probe = sifive_fu540_prci_probe, -}; - -static int __init sifive_fu540_prci_init(void) -{ - return platform_driver_register(&sifive_fu540_prci_driver); -} -core_initcall(sifive_fu540_prci_init); diff --git a/drivers/clk/sifive/fu540-prci.h b/drivers/clk/sifive/fu540-prci.h new file mode 100644 index 0000000000000..c8271efa7bdc7 --- /dev/null +++ b/drivers/clk/sifive/fu540-prci.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 SiFive, Inc. + * Zong Li + */ + +#ifndef __SIFIVE_CLK_FU540_PRCI_H +#define __SIFIVE_CLK_FU540_PRCI_H + +#include "sifive-prci.h" + +#define NUM_CLOCK_FU540 4 + +extern struct __prci_clock __prci_init_clocks_fu540[NUM_CLOCK_FU540]; + +static const struct prci_clk_desc prci_clk_fu540 = { + .clks = __prci_init_clocks_fu540, + .num_clks = ARRAY_SIZE(__prci_init_clocks_fu540), +}; + +#endif /* __SIFIVE_CLK_FU540_PRCI_H */ diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c new file mode 100644 index 0000000000000..70653d33f33fe --- /dev/null +++ b/drivers/clk/sifive/sifive-prci.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 SiFive, Inc. + * Copyright (C) 2020 Zong Li + */ + +#include +#include +#include +#include +#include "sifive-prci.h" +#include "fu540-prci.h" + +/* + * Private functions + */ + +/** + * __prci_readl() - read from a PRCI register + * @pd: PRCI context + * @offs: register offset to read from (in bytes, from PRCI base address) + * + * Read the register located at offset @offs from the base virtual + * address of the PRCI register target described by @pd, and return + * the value to the caller. + * + * Context: Any context. + * + * Return: the contents of the register described by @pd and @offs. + */ +static u32 __prci_readl(struct __prci_data *pd, u32 offs) +{ + return readl_relaxed(pd->va + offs); +} + +static void __prci_writel(u32 v, u32 offs, struct __prci_data *pd) +{ + writel_relaxed(v, pd->va + offs); +} + +/* WRPLL-related private functions */ + +/** + * __prci_wrpll_unpack() - unpack WRPLL configuration registers into parameters + * @c: ptr to a struct wrpll_cfg record to write config into + * @r: value read from the PRCI PLL configuration register + * + * Given a value @r read from an FU740 PRCI PLL configuration register, + * split it into fields and populate it into the WRPLL configuration record + * pointed to by @c. + * + * The COREPLLCFG0 macros are used below, but the other *PLLCFG0 macros + * have the same register layout. + * + * Context: Any context. + */ +static void __prci_wrpll_unpack(struct wrpll_cfg *c, u32 r) +{ + u32 v; + + v = r & PRCI_COREPLLCFG0_DIVR_MASK; + v >>= PRCI_COREPLLCFG0_DIVR_SHIFT; + c->divr = v; + + v = r & PRCI_COREPLLCFG0_DIVF_MASK; + v >>= PRCI_COREPLLCFG0_DIVF_SHIFT; + c->divf = v; + + v = r & PRCI_COREPLLCFG0_DIVQ_MASK; + v >>= PRCI_COREPLLCFG0_DIVQ_SHIFT; + c->divq = v; + + v = r & PRCI_COREPLLCFG0_RANGE_MASK; + v >>= PRCI_COREPLLCFG0_RANGE_SHIFT; + c->range = v; + + c->flags &= + (WRPLL_FLAGS_INT_FEEDBACK_MASK | WRPLL_FLAGS_EXT_FEEDBACK_MASK); + + /* external feedback mode not supported */ + c->flags |= WRPLL_FLAGS_INT_FEEDBACK_MASK; +} + +/** + * __prci_wrpll_pack() - pack PLL configuration parameters into a register value + * @c: pointer to a struct wrpll_cfg record containing the PLL's cfg + * + * Using a set of WRPLL configuration values pointed to by @c, + * assemble a PRCI PLL configuration register value, and return it to + * the caller. + * + * Context: Any context. Caller must ensure that the contents of the + * record pointed to by @c do not change during the execution + * of this function. + * + * Returns: a value suitable for writing into a PRCI PLL configuration + * register + */ +static u32 __prci_wrpll_pack(const struct wrpll_cfg *c) +{ + u32 r = 0; + + r |= c->divr << PRCI_COREPLLCFG0_DIVR_SHIFT; + r |= c->divf << PRCI_COREPLLCFG0_DIVF_SHIFT; + r |= c->divq << PRCI_COREPLLCFG0_DIVQ_SHIFT; + r |= c->range << PRCI_COREPLLCFG0_RANGE_SHIFT; + + /* external feedback mode not supported */ + r |= PRCI_COREPLLCFG0_FSE_MASK; + + return r; +} + +/** + * __prci_wrpll_read_cfg() - read the WRPLL configuration from the PRCI + * @pd: PRCI context + * @pwd: PRCI WRPLL metadata + * + * Read the current configuration of the PLL identified by @pwd from + * the PRCI identified by @pd, and store it into the local configuration + * cache in @pwd. + * + * Context: Any context. Caller must prevent the records pointed to by + * @pd and @pwd from changing during execution. + */ +static void __prci_wrpll_read_cfg(struct __prci_data *pd, + struct __prci_wrpll_data *pwd) +{ + __prci_wrpll_unpack(&pwd->c, __prci_readl(pd, pwd->cfg0_offs)); +} + +/** + * __prci_wrpll_write_cfg() - write WRPLL configuration into the PRCI + * @pd: PRCI context + * @pwd: PRCI WRPLL metadata + * @c: WRPLL configuration record to write + * + * Write the WRPLL configuration described by @c into the WRPLL + * configuration register identified by @pwd in the PRCI instance + * described by @c. Make a cached copy of the WRPLL's current + * configuration so it can be used by other code. + * + * Context: Any context. Caller must prevent the records pointed to by + * @pd and @pwd from changing during execution. + */ +static void __prci_wrpll_write_cfg(struct __prci_data *pd, + struct __prci_wrpll_data *pwd, + struct wrpll_cfg *c) +{ + __prci_writel(__prci_wrpll_pack(c), pwd->cfg0_offs, pd); + + memcpy(&pwd->c, c, sizeof(*c)); +} + +/* + * Linux clock framework integration + * + * See the Linux clock framework documentation for more information on + * these functions. + */ + +unsigned long sifive_prci_wrpll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_wrpll_data *pwd = pc->pwd; + + return wrpll_calc_output_rate(&pwd->c, parent_rate); +} + +long sifive_prci_wrpll_round_rate(struct clk_hw *hw, + unsigned long rate, + unsigned long *parent_rate) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_wrpll_data *pwd = pc->pwd; + struct wrpll_cfg c; + + memcpy(&c, &pwd->c, sizeof(c)); + + wrpll_configure_for_rate(&c, rate, *parent_rate); + + return wrpll_calc_output_rate(&c, *parent_rate); +} + +int sifive_prci_wrpll_set_rate(struct clk_hw *hw, + unsigned long rate, unsigned long parent_rate) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_wrpll_data *pwd = pc->pwd; + struct __prci_data *pd = pc->pd; + int r; + + r = wrpll_configure_for_rate(&pwd->c, rate, parent_rate); + if (r) + return r; + + if (pwd->enable_bypass) + pwd->enable_bypass(pd); + + __prci_wrpll_write_cfg(pd, pwd, &pwd->c); + + udelay(wrpll_calc_max_lock_us(&pwd->c)); + + if (pwd->disable_bypass) + pwd->disable_bypass(pd); + + return 0; +} + +/* TLCLKSEL clock integration */ + +unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_data *pd = pc->pd; + u32 v; + u8 div; + + v = __prci_readl(pd, PRCI_CLKMUXSTATUSREG_OFFSET); + v &= PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK; + div = v ? 1 : 2; + + return div_u64(parent_rate, div); +} + +/* + * Core clock mux control + */ + +/** + * sifive_prci_coreclksel_use_hfclk() - switch the CORECLK mux to output HFCLK + * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg + * + * Switch the CORECLK mux to the HFCLK input source; return once complete. + * + * Context: Any context. Caller must prevent concurrent changes to the + * PRCI_CORECLKSEL_OFFSET register. + */ +void sifive_prci_coreclksel_use_hfclk(struct __prci_data *pd) +{ + u32 r; + + r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); + r |= PRCI_CORECLKSEL_CORECLKSEL_MASK; + __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd); + + r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */ +} + +/** + * sifive_prci_coreclksel_use_corepll() - switch the CORECLK mux to output + * COREPLL + * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg + * + * Switch the CORECLK mux to the COREPLL output clock; return once complete. + * + * Context: Any context. Caller must prevent concurrent changes to the + * PRCI_CORECLKSEL_OFFSET register. + */ +void sifive_prci_coreclksel_use_corepll(struct __prci_data *pd) +{ + u32 r; + + r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); + r &= ~PRCI_CORECLKSEL_CORECLKSEL_MASK; + __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd); + + r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */ +} + +/** + * __prci_register_clocks() - register clock controls in the PRCI + * @dev: Linux struct device + * @pd: The pointer for PRCI per-device instance data + * @desc: The pointer for the information of clocks of each SoCs + * + * Register the list of clock controls described in __prci_init_clocks[] with + * the Linux clock framework. + * + * Return: 0 upon success or a negative error code upon failure. + */ +static int __prci_register_clocks(struct device *dev, struct __prci_data *pd, + const struct prci_clk_desc *desc) +{ + struct clk_init_data init = { }; + struct __prci_clock *pic; + int parent_count, i, r; + + parent_count = of_clk_get_parent_count(dev->of_node); + if (parent_count != EXPECTED_CLK_PARENT_COUNT) { + dev_err(dev, "expected only two parent clocks, found %d\n", + parent_count); + return -EINVAL; + } + + /* Register PLLs */ + for (i = 0; i < desc->num_clks; ++i) { + pic = &(desc->clks[i]); + + init.name = pic->name; + init.parent_names = &pic->parent_name; + init.num_parents = 1; + init.ops = pic->ops; + pic->hw.init = &init; + + pic->pd = pd; + + if (pic->pwd) + __prci_wrpll_read_cfg(pd, pic->pwd); + + r = devm_clk_hw_register(dev, &pic->hw); + if (r) { + dev_warn(dev, "Failed to register clock %s: %d\n", + init.name, r); + return r; + } + + r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev)); + if (r) { + dev_warn(dev, "Failed to register clkdev for %s: %d\n", + init.name, r); + return r; + } + + pd->hw_clks.hws[i] = &pic->hw; + } + + pd->hw_clks.num = i; + + r = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, + &pd->hw_clks); + if (r) { + dev_err(dev, "could not add hw_provider: %d\n", r); + return r; + } + + return 0; +} + +/** + * sifive_prci_init() - initialize prci data and check parent count + * @pdev: platform device pointer for the prci + * + * Return: 0 upon success or a negative error code upon failure. + */ +static int sifive_prci_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct __prci_data *pd; + const struct prci_clk_desc *desc; + int r; + + desc = of_device_get_match_data(&pdev->dev); + + pd = devm_kzalloc(dev, struct_size(pd, hw_clks.hws, desc->num_clks), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pd->va = devm_ioremap_resource(dev, res); + if (IS_ERR(pd->va)) + return PTR_ERR(pd->va); + + r = __prci_register_clocks(dev, pd, desc); + if (r) { + dev_err(dev, "could not register clocks: %d\n", r); + return r; + } + + dev_dbg(dev, "SiFive PRCI probed\n"); + + return 0; +} + +static const struct of_device_id sifive_prci_of_match[] = { + {.compatible = "sifive,fu540-c000-prci", .data = &prci_clk_fu540}, + {} +}; + +static struct platform_driver sifive_prci_driver = { + .driver = { + .name = "sifive-clk-prci", + .of_match_table = sifive_prci_of_match, + }, + .probe = sifive_prci_probe, +}; + +static int __init sifive_prci_init(void) +{ + return platform_driver_register(&sifive_prci_driver); +} +core_initcall(sifive_prci_init); diff --git a/drivers/clk/sifive/sifive-prci.h b/drivers/clk/sifive/sifive-prci.h new file mode 100644 index 0000000000000..280df63b4b928 --- /dev/null +++ b/drivers/clk/sifive/sifive-prci.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018-2019 SiFive, Inc. + * Wesley Terpstra + * Paul Walmsley + * Zong Li + */ + +#ifndef __SIFIVE_CLK_SIFIVE_PRCI_H +#define __SIFIVE_CLK_SIFIVE_PRCI_H + +#include +#include +#include + +/* + * EXPECTED_CLK_PARENT_COUNT: how many parent clocks this driver expects: + * hfclk and rtcclk + */ +#define EXPECTED_CLK_PARENT_COUNT 2 + +/* + * Register offsets and bitmasks + */ + +/* COREPLLCFG0 */ +#define PRCI_COREPLLCFG0_OFFSET 0x4 +#define PRCI_COREPLLCFG0_DIVR_SHIFT 0 +#define PRCI_COREPLLCFG0_DIVR_MASK (0x3f << PRCI_COREPLLCFG0_DIVR_SHIFT) +#define PRCI_COREPLLCFG0_DIVF_SHIFT 6 +#define PRCI_COREPLLCFG0_DIVF_MASK (0x1ff << PRCI_COREPLLCFG0_DIVF_SHIFT) +#define PRCI_COREPLLCFG0_DIVQ_SHIFT 15 +#define PRCI_COREPLLCFG0_DIVQ_MASK (0x7 << PRCI_COREPLLCFG0_DIVQ_SHIFT) +#define PRCI_COREPLLCFG0_RANGE_SHIFT 18 +#define PRCI_COREPLLCFG0_RANGE_MASK (0x7 << PRCI_COREPLLCFG0_RANGE_SHIFT) +#define PRCI_COREPLLCFG0_BYPASS_SHIFT 24 +#define PRCI_COREPLLCFG0_BYPASS_MASK (0x1 << PRCI_COREPLLCFG0_BYPASS_SHIFT) +#define PRCI_COREPLLCFG0_FSE_SHIFT 25 +#define PRCI_COREPLLCFG0_FSE_MASK (0x1 << PRCI_COREPLLCFG0_FSE_SHIFT) +#define PRCI_COREPLLCFG0_LOCK_SHIFT 31 +#define PRCI_COREPLLCFG0_LOCK_MASK (0x1 << PRCI_COREPLLCFG0_LOCK_SHIFT) + +/* DDRPLLCFG0 */ +#define PRCI_DDRPLLCFG0_OFFSET 0xc +#define PRCI_DDRPLLCFG0_DIVR_SHIFT 0 +#define PRCI_DDRPLLCFG0_DIVR_MASK (0x3f << PRCI_DDRPLLCFG0_DIVR_SHIFT) +#define PRCI_DDRPLLCFG0_DIVF_SHIFT 6 +#define PRCI_DDRPLLCFG0_DIVF_MASK (0x1ff << PRCI_DDRPLLCFG0_DIVF_SHIFT) +#define PRCI_DDRPLLCFG0_DIVQ_SHIFT 15 +#define PRCI_DDRPLLCFG0_DIVQ_MASK (0x7 << PRCI_DDRPLLCFG0_DIVQ_SHIFT) +#define PRCI_DDRPLLCFG0_RANGE_SHIFT 18 +#define PRCI_DDRPLLCFG0_RANGE_MASK (0x7 << PRCI_DDRPLLCFG0_RANGE_SHIFT) +#define PRCI_DDRPLLCFG0_BYPASS_SHIFT 24 +#define PRCI_DDRPLLCFG0_BYPASS_MASK (0x1 << PRCI_DDRPLLCFG0_BYPASS_SHIFT) +#define PRCI_DDRPLLCFG0_FSE_SHIFT 25 +#define PRCI_DDRPLLCFG0_FSE_MASK (0x1 << PRCI_DDRPLLCFG0_FSE_SHIFT) +#define PRCI_DDRPLLCFG0_LOCK_SHIFT 31 +#define PRCI_DDRPLLCFG0_LOCK_MASK (0x1 << PRCI_DDRPLLCFG0_LOCK_SHIFT) + +/* DDRPLLCFG1 */ +#define PRCI_DDRPLLCFG1_OFFSET 0x10 +#define PRCI_DDRPLLCFG1_CKE_SHIFT 24 +#define PRCI_DDRPLLCFG1_CKE_MASK (0x1 << PRCI_DDRPLLCFG1_CKE_SHIFT) + +/* GEMGXLPLLCFG0 */ +#define PRCI_GEMGXLPLLCFG0_OFFSET 0x1c +#define PRCI_GEMGXLPLLCFG0_DIVR_SHIFT 0 +#define PRCI_GEMGXLPLLCFG0_DIVR_MASK (0x3f << PRCI_GEMGXLPLLCFG0_DIVR_SHIFT) +#define PRCI_GEMGXLPLLCFG0_DIVF_SHIFT 6 +#define PRCI_GEMGXLPLLCFG0_DIVF_MASK (0x1ff << PRCI_GEMGXLPLLCFG0_DIVF_SHIFT) +#define PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT 15 +#define PRCI_GEMGXLPLLCFG0_DIVQ_MASK (0x7 << PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT) +#define PRCI_GEMGXLPLLCFG0_RANGE_SHIFT 18 +#define PRCI_GEMGXLPLLCFG0_RANGE_MASK (0x7 << PRCI_GEMGXLPLLCFG0_RANGE_SHIFT) +#define PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT 24 +#define PRCI_GEMGXLPLLCFG0_BYPASS_MASK (0x1 << PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT) +#define PRCI_GEMGXLPLLCFG0_FSE_SHIFT 25 +#define PRCI_GEMGXLPLLCFG0_FSE_MASK (0x1 << PRCI_GEMGXLPLLCFG0_FSE_SHIFT) +#define PRCI_GEMGXLPLLCFG0_LOCK_SHIFT 31 +#define PRCI_GEMGXLPLLCFG0_LOCK_MASK (0x1 << PRCI_GEMGXLPLLCFG0_LOCK_SHIFT) + +/* GEMGXLPLLCFG1 */ +#define PRCI_GEMGXLPLLCFG1_OFFSET 0x20 +#define PRCI_GEMGXLPLLCFG1_CKE_SHIFT 24 +#define PRCI_GEMGXLPLLCFG1_CKE_MASK (0x1 << PRCI_GEMGXLPLLCFG1_CKE_SHIFT) + +/* CORECLKSEL */ +#define PRCI_CORECLKSEL_OFFSET 0x24 +#define PRCI_CORECLKSEL_CORECLKSEL_SHIFT 0 +#define PRCI_CORECLKSEL_CORECLKSEL_MASK \ + (0x1 << PRCI_CORECLKSEL_CORECLKSEL_SHIFT) + +/* DEVICESRESETREG */ +#define PRCI_DEVICESRESETREG_OFFSET 0x28 +#define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT 0 +#define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_MASK \ + (0x1 << PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT) +#define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT 1 +#define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_MASK \ + (0x1 << PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT) +#define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT 2 +#define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_MASK \ + (0x1 << PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT) +#define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT 3 +#define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_MASK \ + (0x1 << PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT) +#define PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT 5 +#define PRCI_DEVICESRESETREG_GEMGXL_RST_N_MASK \ + (0x1 << PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT) +#define PRCI_DEVICESRESETREG_CHIPLINK_RST_N_SHIFT 6 +#define PRCI_DEVICESRESETREG_CHIPLINK_RST_N_MASK \ + (0x1 << PRCI_DEVICESRESETREG_CHIPLINK_RST_N_SHIFT) + +/* CLKMUXSTATUSREG */ +#define PRCI_CLKMUXSTATUSREG_OFFSET 0x2c +#define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT 1 +#define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK \ + (0x1 << PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT) + +/* + * Private structures + */ + +/** + * struct __prci_data - per-device-instance data + * @va: base virtual address of the PRCI IP block + * @hw_clks: encapsulates struct clk_hw records + * + * PRCI per-device instance data + */ +struct __prci_data { + void __iomem *va; + struct clk_hw_onecell_data hw_clks; +}; + +/** + * struct __prci_wrpll_data - WRPLL configuration and integration data + * @c: WRPLL current configuration record + * @enable_bypass: fn ptr to code to bypass the WRPLL (if applicable; else NULL) + * @disable_bypass: fn ptr to code to not bypass the WRPLL (or NULL) + * @cfg0_offs: WRPLL CFG0 register offset (in bytes) from the PRCI base address + * + * @enable_bypass and @disable_bypass are used for WRPLL instances + * that contain a separate external glitchless clock mux downstream + * from the PLL. The WRPLL internal bypass mux is not glitchless. + */ +struct __prci_wrpll_data { + struct wrpll_cfg c; + void (*enable_bypass)(struct __prci_data *pd); + void (*disable_bypass)(struct __prci_data *pd); + u8 cfg0_offs; +}; + +/** + * struct __prci_clock - describes a clock device managed by PRCI + * @name: user-readable clock name string - should match the manual + * @parent_name: parent name for this clock + * @ops: struct clk_ops for the Linux clock framework to use for control + * @hw: Linux-private clock data + * @pwd: WRPLL-specific data, associated with this clock (if not NULL) + * @pd: PRCI-specific data associated with this clock (if not NULL) + * + * PRCI clock data. Used by the PRCI driver to register PRCI-provided + * clocks to the Linux clock infrastructure. + */ +struct __prci_clock { + const char *name; + const char *parent_name; + const struct clk_ops *ops; + struct clk_hw hw; + struct __prci_wrpll_data *pwd; + struct __prci_data *pd; +}; + +#define clk_hw_to_prci_clock(pwd) container_of(pwd, struct __prci_clock, hw) + +/* + * struct prci_clk_desc - describes the information of clocks of each SoCs + * @clks: point to a array of __prci_clock + * @num_clks: the number of element of clks + */ +struct prci_clk_desc { + struct __prci_clock *clks; + size_t num_clks; +}; + +/* Core clock mux control */ +void sifive_prci_coreclksel_use_hfclk(struct __prci_data *pd); +void sifive_prci_coreclksel_use_corepll(struct __prci_data *pd); + +/* Linux clock framework integration */ +long sifive_prci_wrpll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate); +int sifive_prci_wrpll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); +unsigned long sifive_prci_wrpll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate); +unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate); + +#endif /* __SIFIVE_CLK_SIFIVE_PRCI_H */ -- GitLab From 28108fc8a056f0fd26be17727eff212fae67a247 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 9 Dec 2020 17:49:13 +0800 Subject: [PATCH 0644/1894] clk: sifive: Use common name for prci configuration Use generic name CLK_SIFIVE_PRCI instead of CLK_SIFIVE_FU540_PRCI. This patch is prepared for fu740 support. Signed-off-by: Zong Li Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Reviewed-by: Pragnesh Patel Link: https://lore.kernel.org/r/20201209094916.17383-3-zong.li@sifive.com Signed-off-by: Stephen Boyd --- arch/riscv/Kconfig.socs | 2 +- drivers/clk/sifive/Kconfig | 6 +++--- drivers/clk/sifive/Makefile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 8a55f6156661d..3284d5c291be5 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -5,7 +5,7 @@ config SOC_SIFIVE select SERIAL_SIFIVE if TTY select SERIAL_SIFIVE_CONSOLE if TTY select CLK_SIFIVE - select CLK_SIFIVE_FU540_PRCI + select CLK_SIFIVE_PRCI select SIFIVE_PLIC help This enables support for SiFive SoC platform hardware. diff --git a/drivers/clk/sifive/Kconfig b/drivers/clk/sifive/Kconfig index f3b4eb9cb0f55..ab48cf7e0105c 100644 --- a/drivers/clk/sifive/Kconfig +++ b/drivers/clk/sifive/Kconfig @@ -8,12 +8,12 @@ menuconfig CLK_SIFIVE if CLK_SIFIVE -config CLK_SIFIVE_FU540_PRCI - bool "PRCI driver for SiFive FU540 SoCs" +config CLK_SIFIVE_PRCI + bool "PRCI driver for SiFive SoCs" select CLK_ANALOGBITS_WRPLL_CLN28HPC help Supports the Power Reset Clock interface (PRCI) IP block found in - FU540 SoCs. If this kernel is meant to run on a SiFive FU540 SoC, + FU540 SoCs. If this kernel is meant to run on a SiFive FU540 SoC, enable this driver. endif diff --git a/drivers/clk/sifive/Makefile b/drivers/clk/sifive/Makefile index 51b6ebc359e44..3074cdbc60098 100644 --- a/drivers/clk/sifive/Makefile +++ b/drivers/clk/sifive/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CLK_SIFIVE_FU540_PRCI) += sifive-prci.o fu540-prci.o +obj-$(CONFIG_CLK_SIFIVE_PRCI) += sifive-prci.o fu540-prci.o -- GitLab From efc91ae43c8d4bbf64e4b9a28113b24a74ffd58d Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 9 Dec 2020 17:49:14 +0800 Subject: [PATCH 0645/1894] clk: sifive: Add a driver for the SiFive FU740 PRCI IP block Add driver code for the SiFive FU740 PRCI IP block. This IP block handles reset and clock control for the SiFive FU740 device and implements SoC-level clock tree controls and dividers. The link of unmatched as follow, and the U740-C000 manual would be present in the same page as soon. https://www.sifive.com/boards/hifive-unmatched This driver contains bug fixes and contributions from Henry Styles Erik Danie Pragnesh Patel Signed-off-by: Zong Li Reviewed-by: Pragnesh Patel Acked-by: Palmer Dabbelt Cc: Henry Styles Cc: Erik Danie Cc: Pragnesh Patel Link: https://lore.kernel.org/r/20201209094916.17383-4-zong.li@sifive.com [sboyd@kernel.org: Include header to silence sparse] Signed-off-by: Stephen Boyd --- drivers/clk/sifive/Kconfig | 4 +- drivers/clk/sifive/Makefile | 2 +- drivers/clk/sifive/fu740-prci.c | 114 +++++++++++++++++ drivers/clk/sifive/fu740-prci.h | 21 +++ drivers/clk/sifive/sifive-prci.c | 120 ++++++++++++++++++ drivers/clk/sifive/sifive-prci.h | 88 +++++++++++++ include/dt-bindings/clock/sifive-fu740-prci.h | 23 ++++ 7 files changed, 369 insertions(+), 3 deletions(-) create mode 100644 drivers/clk/sifive/fu740-prci.c create mode 100644 drivers/clk/sifive/fu740-prci.h create mode 100644 include/dt-bindings/clock/sifive-fu740-prci.h diff --git a/drivers/clk/sifive/Kconfig b/drivers/clk/sifive/Kconfig index ab48cf7e0105c..1c14eb20c0668 100644 --- a/drivers/clk/sifive/Kconfig +++ b/drivers/clk/sifive/Kconfig @@ -13,7 +13,7 @@ config CLK_SIFIVE_PRCI select CLK_ANALOGBITS_WRPLL_CLN28HPC help Supports the Power Reset Clock interface (PRCI) IP block found in - FU540 SoCs. If this kernel is meant to run on a SiFive FU540 SoC, - enable this driver. + FU540/FU740 SoCs. If this kernel is meant to run on a SiFive FU540/ + FU740 SoCs, enable this driver. endif diff --git a/drivers/clk/sifive/Makefile b/drivers/clk/sifive/Makefile index 3074cdbc60098..7b06fc04e6b3e 100644 --- a/drivers/clk/sifive/Makefile +++ b/drivers/clk/sifive/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CLK_SIFIVE_PRCI) += sifive-prci.o fu540-prci.o +obj-$(CONFIG_CLK_SIFIVE_PRCI) += sifive-prci.o fu540-prci.o fu740-prci.o diff --git a/drivers/clk/sifive/fu740-prci.c b/drivers/clk/sifive/fu740-prci.c new file mode 100644 index 0000000000000..69208dd1a757a --- /dev/null +++ b/drivers/clk/sifive/fu740-prci.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 SiFive, Inc. + * Copyright (C) 2020 Zong Li + */ + +#include + +#include + +#include "fu540-prci.h" +#include "sifive-prci.h" + +/* PRCI integration data for each WRPLL instance */ + +static struct __prci_wrpll_data __prci_corepll_data = { + .cfg0_offs = PRCI_COREPLLCFG0_OFFSET, + .enable_bypass = sifive_prci_coreclksel_use_hfclk, + .disable_bypass = sifive_prci_coreclksel_use_final_corepll, +}; + +static struct __prci_wrpll_data __prci_ddrpll_data = { + .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET, +}; + +static struct __prci_wrpll_data __prci_gemgxlpll_data = { + .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET, +}; + +static struct __prci_wrpll_data __prci_dvfscorepll_data = { + .cfg0_offs = PRCI_DVFSCOREPLLCFG0_OFFSET, + .enable_bypass = sifive_prci_corepllsel_use_corepll, + .disable_bypass = sifive_prci_corepllsel_use_dvfscorepll, +}; + +static struct __prci_wrpll_data __prci_hfpclkpll_data = { + .cfg0_offs = PRCI_HFPCLKPLLCFG0_OFFSET, + .enable_bypass = sifive_prci_hfpclkpllsel_use_hfclk, + .disable_bypass = sifive_prci_hfpclkpllsel_use_hfpclkpll, +}; + +static struct __prci_wrpll_data __prci_cltxpll_data = { + .cfg0_offs = PRCI_CLTXPLLCFG0_OFFSET, +}; + +/* Linux clock framework integration */ + +static const struct clk_ops sifive_fu740_prci_wrpll_clk_ops = { + .set_rate = sifive_prci_wrpll_set_rate, + .round_rate = sifive_prci_wrpll_round_rate, + .recalc_rate = sifive_prci_wrpll_recalc_rate, +}; + +static const struct clk_ops sifive_fu740_prci_wrpll_ro_clk_ops = { + .recalc_rate = sifive_prci_wrpll_recalc_rate, +}; + +static const struct clk_ops sifive_fu740_prci_tlclksel_clk_ops = { + .recalc_rate = sifive_prci_tlclksel_recalc_rate, +}; + +static const struct clk_ops sifive_fu740_prci_hfpclkplldiv_clk_ops = { + .recalc_rate = sifive_prci_hfpclkplldiv_recalc_rate, +}; + +/* List of clock controls provided by the PRCI */ +struct __prci_clock __prci_init_clocks_fu740[] = { + [PRCI_CLK_COREPLL] = { + .name = "corepll", + .parent_name = "hfclk", + .ops = &sifive_fu740_prci_wrpll_clk_ops, + .pwd = &__prci_corepll_data, + }, + [PRCI_CLK_DDRPLL] = { + .name = "ddrpll", + .parent_name = "hfclk", + .ops = &sifive_fu740_prci_wrpll_ro_clk_ops, + .pwd = &__prci_ddrpll_data, + }, + [PRCI_CLK_GEMGXLPLL] = { + .name = "gemgxlpll", + .parent_name = "hfclk", + .ops = &sifive_fu740_prci_wrpll_clk_ops, + .pwd = &__prci_gemgxlpll_data, + }, + [PRCI_CLK_DVFSCOREPLL] = { + .name = "dvfscorepll", + .parent_name = "hfclk", + .ops = &sifive_fu740_prci_wrpll_clk_ops, + .pwd = &__prci_dvfscorepll_data, + }, + [PRCI_CLK_HFPCLKPLL] = { + .name = "hfpclkpll", + .parent_name = "hfclk", + .ops = &sifive_fu740_prci_wrpll_clk_ops, + .pwd = &__prci_hfpclkpll_data, + }, + [PRCI_CLK_CLTXPLL] = { + .name = "cltxpll", + .parent_name = "hfclk", + .ops = &sifive_fu740_prci_wrpll_clk_ops, + .pwd = &__prci_cltxpll_data, + }, + [PRCI_CLK_TLCLK] = { + .name = "tlclk", + .parent_name = "corepll", + .ops = &sifive_fu740_prci_tlclksel_clk_ops, + }, + [PRCI_CLK_PCLK] = { + .name = "pclk", + .parent_name = "hfpclkpll", + .ops = &sifive_fu740_prci_hfpclkplldiv_clk_ops, + }, +}; diff --git a/drivers/clk/sifive/fu740-prci.h b/drivers/clk/sifive/fu740-prci.h new file mode 100644 index 0000000000000..13ef971f77649 --- /dev/null +++ b/drivers/clk/sifive/fu740-prci.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 SiFive, Inc. + * Zong Li + */ + +#ifndef __SIFIVE_CLK_FU740_PRCI_H +#define __SIFIVE_CLK_FU740_PRCI_H + +#include "sifive-prci.h" + +#define NUM_CLOCK_FU740 8 + +extern struct __prci_clock __prci_init_clocks_fu740[NUM_CLOCK_FU740]; + +static const struct prci_clk_desc prci_clk_fu740 = { + .clks = __prci_init_clocks_fu740, + .num_clks = ARRAY_SIZE(__prci_init_clocks_fu740), +}; + +#endif /* __SIFIVE_CLK_FU740_PRCI_H */ diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c index 70653d33f33fe..cc4b4c6b4437d 100644 --- a/drivers/clk/sifive/sifive-prci.c +++ b/drivers/clk/sifive/sifive-prci.c @@ -10,6 +10,7 @@ #include #include "sifive-prci.h" #include "fu540-prci.h" +#include "fu740-prci.h" /* * Private functions @@ -225,6 +226,18 @@ unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw, return div_u64(parent_rate, div); } +/* HFPCLK clock integration */ + +unsigned long sifive_prci_hfpclkplldiv_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_data *pd = pc->pd; + u32 div = __prci_readl(pd, PRCI_HFPCLKPLLDIV_OFFSET); + + return div_u64(parent_rate, div + 2); +} + /* * Core clock mux control */ @@ -270,6 +283,112 @@ void sifive_prci_coreclksel_use_corepll(struct __prci_data *pd) r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */ } +/** + * sifive_prci_coreclksel_use_final_corepll() - switch the CORECLK mux to output + * FINAL_COREPLL + * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg + * + * Switch the CORECLK mux to the final COREPLL output clock; return once + * complete. + * + * Context: Any context. Caller must prevent concurrent changes to the + * PRCI_CORECLKSEL_OFFSET register. + */ +void sifive_prci_coreclksel_use_final_corepll(struct __prci_data *pd) +{ + u32 r; + + r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); + r &= ~PRCI_CORECLKSEL_CORECLKSEL_MASK; + __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd); + + r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */ +} + +/** + * sifive_prci_corepllsel_use_dvfscorepll() - switch the COREPLL mux to + * output DVFS_COREPLL + * @pd: struct __prci_data * for the PRCI containing the COREPLL mux reg + * + * Switch the COREPLL mux to the DVFSCOREPLL output clock; return once complete. + * + * Context: Any context. Caller must prevent concurrent changes to the + * PRCI_COREPLLSEL_OFFSET register. + */ +void sifive_prci_corepllsel_use_dvfscorepll(struct __prci_data *pd) +{ + u32 r; + + r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET); + r |= PRCI_COREPLLSEL_COREPLLSEL_MASK; + __prci_writel(r, PRCI_COREPLLSEL_OFFSET, pd); + + r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET); /* barrier */ +} + +/** + * sifive_prci_corepllsel_use_corepll() - switch the COREPLL mux to + * output COREPLL + * @pd: struct __prci_data * for the PRCI containing the COREPLL mux reg + * + * Switch the COREPLL mux to the COREPLL output clock; return once complete. + * + * Context: Any context. Caller must prevent concurrent changes to the + * PRCI_COREPLLSEL_OFFSET register. + */ +void sifive_prci_corepllsel_use_corepll(struct __prci_data *pd) +{ + u32 r; + + r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET); + r &= ~PRCI_COREPLLSEL_COREPLLSEL_MASK; + __prci_writel(r, PRCI_COREPLLSEL_OFFSET, pd); + + r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET); /* barrier */ +} + +/** + * sifive_prci_hfpclkpllsel_use_hfclk() - switch the HFPCLKPLL mux to + * output HFCLK + * @pd: struct __prci_data * for the PRCI containing the HFPCLKPLL mux reg + * + * Switch the HFPCLKPLL mux to the HFCLK input source; return once complete. + * + * Context: Any context. Caller must prevent concurrent changes to the + * PRCI_HFPCLKPLLSEL_OFFSET register. + */ +void sifive_prci_hfpclkpllsel_use_hfclk(struct __prci_data *pd) +{ + u32 r; + + r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET); + r |= PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_MASK; + __prci_writel(r, PRCI_HFPCLKPLLSEL_OFFSET, pd); + + r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET); /* barrier */ +} + +/** + * sifive_prci_hfpclkpllsel_use_hfpclkpll() - switch the HFPCLKPLL mux to + * output HFPCLKPLL + * @pd: struct __prci_data * for the PRCI containing the HFPCLKPLL mux reg + * + * Switch the HFPCLKPLL mux to the HFPCLKPLL output clock; return once complete. + * + * Context: Any context. Caller must prevent concurrent changes to the + * PRCI_HFPCLKPLLSEL_OFFSET register. + */ +void sifive_prci_hfpclkpllsel_use_hfpclkpll(struct __prci_data *pd) +{ + u32 r; + + r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET); + r &= ~PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_MASK; + __prci_writel(r, PRCI_HFPCLKPLLSEL_OFFSET, pd); + + r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET); /* barrier */ +} + /** * __prci_register_clocks() - register clock controls in the PRCI * @dev: Linux struct device @@ -377,6 +496,7 @@ static int sifive_prci_probe(struct platform_device *pdev) static const struct of_device_id sifive_prci_of_match[] = { {.compatible = "sifive,fu540-c000-prci", .data = &prci_clk_fu540}, + {.compatible = "sifive,fu740-c000-prci", .data = &prci_clk_fu740}, {} }; diff --git a/drivers/clk/sifive/sifive-prci.h b/drivers/clk/sifive/sifive-prci.h index 280df63b4b928..7e509dfb72d17 100644 --- a/drivers/clk/sifive/sifive-prci.h +++ b/drivers/clk/sifive/sifive-prci.h @@ -117,6 +117,87 @@ #define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK \ (0x1 << PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT) +/* CLTXPLLCFG0 */ +#define PRCI_CLTXPLLCFG0_OFFSET 0x30 +#define PRCI_CLTXPLLCFG0_DIVR_SHIFT 0 +#define PRCI_CLTXPLLCFG0_DIVR_MASK (0x3f << PRCI_CLTXPLLCFG0_DIVR_SHIFT) +#define PRCI_CLTXPLLCFG0_DIVF_SHIFT 6 +#define PRCI_CLTXPLLCFG0_DIVF_MASK (0x1ff << PRCI_CLTXPLLCFG0_DIVF_SHIFT) +#define PRCI_CLTXPLLCFG0_DIVQ_SHIFT 15 +#define PRCI_CLTXPLLCFG0_DIVQ_MASK (0x7 << PRCI_CLTXPLLCFG0_DIVQ_SHIFT) +#define PRCI_CLTXPLLCFG0_RANGE_SHIFT 18 +#define PRCI_CLTXPLLCFG0_RANGE_MASK (0x7 << PRCI_CLTXPLLCFG0_RANGE_SHIFT) +#define PRCI_CLTXPLLCFG0_BYPASS_SHIFT 24 +#define PRCI_CLTXPLLCFG0_BYPASS_MASK (0x1 << PRCI_CLTXPLLCFG0_BYPASS_SHIFT) +#define PRCI_CLTXPLLCFG0_FSE_SHIFT 25 +#define PRCI_CLTXPLLCFG0_FSE_MASK (0x1 << PRCI_CLTXPLLCFG0_FSE_SHIFT) +#define PRCI_CLTXPLLCFG0_LOCK_SHIFT 31 +#define PRCI_CLTXPLLCFG0_LOCK_MASK (0x1 << PRCI_CLTXPLLCFG0_LOCK_SHIFT) + +/* CLTXPLLCFG1 */ +#define PRCI_CLTXPLLCFG1_OFFSET 0x34 +#define PRCI_CLTXPLLCFG1_CKE_SHIFT 31 +#define PRCI_CLTXPLLCFG1_CKE_MASK (0x1 << PRCI_CLTXPLLCFG1_CKE_SHIFT) + +/* DVFSCOREPLLCFG0 */ +#define PRCI_DVFSCOREPLLCFG0_OFFSET 0x38 + +/* DVFSCOREPLLCFG1 */ +#define PRCI_DVFSCOREPLLCFG1_OFFSET 0x3c +#define PRCI_DVFSCOREPLLCFG1_CKE_SHIFT 31 +#define PRCI_DVFSCOREPLLCFG1_CKE_MASK (0x1 << PRCI_DVFSCOREPLLCFG1_CKE_SHIFT) + +/* COREPLLSEL */ +#define PRCI_COREPLLSEL_OFFSET 0x40 +#define PRCI_COREPLLSEL_COREPLLSEL_SHIFT 0 +#define PRCI_COREPLLSEL_COREPLLSEL_MASK \ + (0x1 << PRCI_COREPLLSEL_COREPLLSEL_SHIFT) + +/* HFPCLKPLLCFG0 */ +#define PRCI_HFPCLKPLLCFG0_OFFSET 0x50 +#define PRCI_HFPCLKPLL_CFG0_DIVR_SHIFT 0 +#define PRCI_HFPCLKPLL_CFG0_DIVR_MASK \ + (0x3f << PRCI_HFPCLKPLLCFG0_DIVR_SHIFT) +#define PRCI_HFPCLKPLL_CFG0_DIVF_SHIFT 6 +#define PRCI_HFPCLKPLL_CFG0_DIVF_MASK \ + (0x1ff << PRCI_HFPCLKPLLCFG0_DIVF_SHIFT) +#define PRCI_HFPCLKPLL_CFG0_DIVQ_SHIFT 15 +#define PRCI_HFPCLKPLL_CFG0_DIVQ_MASK \ + (0x7 << PRCI_HFPCLKPLLCFG0_DIVQ_SHIFT) +#define PRCI_HFPCLKPLL_CFG0_RANGE_SHIFT 18 +#define PRCI_HFPCLKPLL_CFG0_RANGE_MASK \ + (0x7 << PRCI_HFPCLKPLLCFG0_RANGE_SHIFT) +#define PRCI_HFPCLKPLL_CFG0_BYPASS_SHIFT 24 +#define PRCI_HFPCLKPLL_CFG0_BYPASS_MASK \ + (0x1 << PRCI_HFPCLKPLLCFG0_BYPASS_SHIFT) +#define PRCI_HFPCLKPLL_CFG0_FSE_SHIFT 25 +#define PRCI_HFPCLKPLL_CFG0_FSE_MASK \ + (0x1 << PRCI_HFPCLKPLLCFG0_FSE_SHIFT) +#define PRCI_HFPCLKPLL_CFG0_LOCK_SHIFT 31 +#define PRCI_HFPCLKPLL_CFG0_LOCK_MASK \ + (0x1 << PRCI_HFPCLKPLLCFG0_LOCK_SHIFT) + +/* HFPCLKPLLCFG1 */ +#define PRCI_HFPCLKPLLCFG1_OFFSET 0x54 +#define PRCI_HFPCLKPLLCFG1_CKE_SHIFT 31 +#define PRCI_HFPCLKPLLCFG1_CKE_MASK \ + (0x1 << PRCI_HFPCLKPLLCFG1_CKE_SHIFT) + +/* HFPCLKPLLSEL */ +#define PRCI_HFPCLKPLLSEL_OFFSET 0x58 +#define PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_SHIFT 0 +#define PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_MASK \ + (0x1 << PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_SHIFT) + +/* HFPCLKPLLDIV */ +#define PRCI_HFPCLKPLLDIV_OFFSET 0x5c + +/* PRCIPLL */ +#define PRCI_PRCIPLL_OFFSET 0xe0 + +/* PROCMONCFG */ +#define PRCI_PROCMONCFG_OFFSET 0xf0 + /* * Private structures */ @@ -187,6 +268,11 @@ struct prci_clk_desc { /* Core clock mux control */ void sifive_prci_coreclksel_use_hfclk(struct __prci_data *pd); void sifive_prci_coreclksel_use_corepll(struct __prci_data *pd); +void sifive_prci_coreclksel_use_final_corepll(struct __prci_data *pd); +void sifive_prci_corepllsel_use_dvfscorepll(struct __prci_data *pd); +void sifive_prci_corepllsel_use_corepll(struct __prci_data *pd); +void sifive_prci_hfpclkpllsel_use_hfclk(struct __prci_data *pd); +void sifive_prci_hfpclkpllsel_use_hfpclkpll(struct __prci_data *pd); /* Linux clock framework integration */ long sifive_prci_wrpll_round_rate(struct clk_hw *hw, unsigned long rate, @@ -197,5 +283,7 @@ unsigned long sifive_prci_wrpll_recalc_rate(struct clk_hw *hw, unsigned long parent_rate); unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw, unsigned long parent_rate); +unsigned long sifive_prci_hfpclkplldiv_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate); #endif /* __SIFIVE_CLK_SIFIVE_PRCI_H */ diff --git a/include/dt-bindings/clock/sifive-fu740-prci.h b/include/dt-bindings/clock/sifive-fu740-prci.h new file mode 100644 index 0000000000000..cd7706ea5677a --- /dev/null +++ b/include/dt-bindings/clock/sifive-fu740-prci.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Copyright (C) 2019 SiFive, Inc. + * Wesley Terpstra + * Paul Walmsley + * Zong Li + */ + +#ifndef __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H +#define __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H + +/* Clock indexes for use by Device Tree data and the PRCI driver */ + +#define PRCI_CLK_COREPLL 0 +#define PRCI_CLK_DDRPLL 1 +#define PRCI_CLK_GEMGXLPLL 2 +#define PRCI_CLK_DVFSCOREPLL 3 +#define PRCI_CLK_HFPCLKPLL 4 +#define PRCI_CLK_CLTXPLL 5 +#define PRCI_CLK_TLCLK 6 +#define PRCI_CLK_PCLK 7 + +#endif /* __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H */ -- GitLab From 263ac3908516abb0392747bbf595af2b13df5fa2 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 9 Dec 2020 17:49:15 +0800 Subject: [PATCH 0646/1894] clk: sifive: Fix the wrong bit field shift The clk enable bit should be 31 instead of 24. Signed-off-by: Zong Li Reported-by: Pragnesh Patel Link: https://lore.kernel.org/r/20201209094916.17383-5-zong.li@sifive.com Signed-off-by: Stephen Boyd --- drivers/clk/sifive/sifive-prci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sifive/sifive-prci.h b/drivers/clk/sifive/sifive-prci.h index 7e509dfb72d17..88493f3b9edf9 100644 --- a/drivers/clk/sifive/sifive-prci.h +++ b/drivers/clk/sifive/sifive-prci.h @@ -59,7 +59,7 @@ /* DDRPLLCFG1 */ #define PRCI_DDRPLLCFG1_OFFSET 0x10 -#define PRCI_DDRPLLCFG1_CKE_SHIFT 24 +#define PRCI_DDRPLLCFG1_CKE_SHIFT 31 #define PRCI_DDRPLLCFG1_CKE_MASK (0x1 << PRCI_DDRPLLCFG1_CKE_SHIFT) /* GEMGXLPLLCFG0 */ @@ -81,7 +81,7 @@ /* GEMGXLPLLCFG1 */ #define PRCI_GEMGXLPLLCFG1_OFFSET 0x20 -#define PRCI_GEMGXLPLLCFG1_CKE_SHIFT 24 +#define PRCI_GEMGXLPLLCFG1_CKE_SHIFT 31 #define PRCI_GEMGXLPLLCFG1_CKE_MASK (0x1 << PRCI_GEMGXLPLLCFG1_CKE_SHIFT) /* CORECLKSEL */ -- GitLab From 732374a0b440d9a79c8412f318a25cd37ba6f4e2 Mon Sep 17 00:00:00 2001 From: Pragnesh Patel Date: Wed, 9 Dec 2020 17:49:16 +0800 Subject: [PATCH 0647/1894] clk: sifive: Add clock enable and disable ops Add new functions "sifive_prci_clock_enable(), sifive_prci_clock_disable() and sifive_clk_is_enabled()" to enable or disable the PRCI clock Signed-off-by: Pragnesh Patel Tested-by: Zong Li Link: https://lore.kernel.org/r/20201209094916.17383-6-zong.li@sifive.com Signed-off-by: Stephen Boyd --- drivers/clk/sifive/fu540-prci.c | 6 +++ drivers/clk/sifive/fu740-prci.c | 9 ++++ drivers/clk/sifive/sifive-prci.c | 77 ++++++++++++++++++++++++++++---- drivers/clk/sifive/sifive-prci.h | 10 +++++ 4 files changed, 93 insertions(+), 9 deletions(-) diff --git a/drivers/clk/sifive/fu540-prci.c b/drivers/clk/sifive/fu540-prci.c index 83ced24b0b949..29bab915003cb 100644 --- a/drivers/clk/sifive/fu540-prci.c +++ b/drivers/clk/sifive/fu540-prci.c @@ -27,16 +27,19 @@ static struct __prci_wrpll_data __prci_corepll_data = { .cfg0_offs = PRCI_COREPLLCFG0_OFFSET, + .cfg1_offs = PRCI_COREPLLCFG1_OFFSET, .enable_bypass = sifive_prci_coreclksel_use_hfclk, .disable_bypass = sifive_prci_coreclksel_use_corepll, }; static struct __prci_wrpll_data __prci_ddrpll_data = { .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET, + .cfg1_offs = PRCI_DDRPLLCFG1_OFFSET, }; static struct __prci_wrpll_data __prci_gemgxlpll_data = { .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET, + .cfg1_offs = PRCI_GEMGXLPLLCFG1_OFFSET, }; /* Linux clock framework integration */ @@ -45,6 +48,9 @@ static const struct clk_ops sifive_fu540_prci_wrpll_clk_ops = { .set_rate = sifive_prci_wrpll_set_rate, .round_rate = sifive_prci_wrpll_round_rate, .recalc_rate = sifive_prci_wrpll_recalc_rate, + .enable = sifive_prci_clock_enable, + .disable = sifive_prci_clock_disable, + .is_enabled = sifive_clk_is_enabled, }; static const struct clk_ops sifive_fu540_prci_wrpll_ro_clk_ops = { diff --git a/drivers/clk/sifive/fu740-prci.c b/drivers/clk/sifive/fu740-prci.c index 69208dd1a757a..764d1097aa51f 100644 --- a/drivers/clk/sifive/fu740-prci.c +++ b/drivers/clk/sifive/fu740-prci.c @@ -15,32 +15,38 @@ static struct __prci_wrpll_data __prci_corepll_data = { .cfg0_offs = PRCI_COREPLLCFG0_OFFSET, + .cfg1_offs = PRCI_COREPLLCFG1_OFFSET, .enable_bypass = sifive_prci_coreclksel_use_hfclk, .disable_bypass = sifive_prci_coreclksel_use_final_corepll, }; static struct __prci_wrpll_data __prci_ddrpll_data = { .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET, + .cfg1_offs = PRCI_DDRPLLCFG1_OFFSET, }; static struct __prci_wrpll_data __prci_gemgxlpll_data = { .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET, + .cfg1_offs = PRCI_GEMGXLPLLCFG1_OFFSET, }; static struct __prci_wrpll_data __prci_dvfscorepll_data = { .cfg0_offs = PRCI_DVFSCOREPLLCFG0_OFFSET, + .cfg1_offs = PRCI_DVFSCOREPLLCFG1_OFFSET, .enable_bypass = sifive_prci_corepllsel_use_corepll, .disable_bypass = sifive_prci_corepllsel_use_dvfscorepll, }; static struct __prci_wrpll_data __prci_hfpclkpll_data = { .cfg0_offs = PRCI_HFPCLKPLLCFG0_OFFSET, + .cfg1_offs = PRCI_HFPCLKPLLCFG1_OFFSET, .enable_bypass = sifive_prci_hfpclkpllsel_use_hfclk, .disable_bypass = sifive_prci_hfpclkpllsel_use_hfpclkpll, }; static struct __prci_wrpll_data __prci_cltxpll_data = { .cfg0_offs = PRCI_CLTXPLLCFG0_OFFSET, + .cfg1_offs = PRCI_CLTXPLLCFG1_OFFSET, }; /* Linux clock framework integration */ @@ -49,6 +55,9 @@ static const struct clk_ops sifive_fu740_prci_wrpll_clk_ops = { .set_rate = sifive_prci_wrpll_set_rate, .round_rate = sifive_prci_wrpll_round_rate, .recalc_rate = sifive_prci_wrpll_recalc_rate, + .enable = sifive_prci_clock_enable, + .disable = sifive_prci_clock_disable, + .is_enabled = sifive_clk_is_enabled, }; static const struct clk_ops sifive_fu740_prci_wrpll_ro_clk_ops = { diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c index cc4b4c6b4437d..c78b042750e21 100644 --- a/drivers/clk/sifive/sifive-prci.c +++ b/drivers/clk/sifive/sifive-prci.c @@ -113,7 +113,7 @@ static u32 __prci_wrpll_pack(const struct wrpll_cfg *c) } /** - * __prci_wrpll_read_cfg() - read the WRPLL configuration from the PRCI + * __prci_wrpll_read_cfg0() - read the WRPLL configuration from the PRCI * @pd: PRCI context * @pwd: PRCI WRPLL metadata * @@ -124,14 +124,14 @@ static u32 __prci_wrpll_pack(const struct wrpll_cfg *c) * Context: Any context. Caller must prevent the records pointed to by * @pd and @pwd from changing during execution. */ -static void __prci_wrpll_read_cfg(struct __prci_data *pd, - struct __prci_wrpll_data *pwd) +static void __prci_wrpll_read_cfg0(struct __prci_data *pd, + struct __prci_wrpll_data *pwd) { __prci_wrpll_unpack(&pwd->c, __prci_readl(pd, pwd->cfg0_offs)); } /** - * __prci_wrpll_write_cfg() - write WRPLL configuration into the PRCI + * __prci_wrpll_write_cfg0() - write WRPLL configuration into the PRCI * @pd: PRCI context * @pwd: PRCI WRPLL metadata * @c: WRPLL configuration record to write @@ -144,15 +144,29 @@ static void __prci_wrpll_read_cfg(struct __prci_data *pd, * Context: Any context. Caller must prevent the records pointed to by * @pd and @pwd from changing during execution. */ -static void __prci_wrpll_write_cfg(struct __prci_data *pd, - struct __prci_wrpll_data *pwd, - struct wrpll_cfg *c) +static void __prci_wrpll_write_cfg0(struct __prci_data *pd, + struct __prci_wrpll_data *pwd, + struct wrpll_cfg *c) { __prci_writel(__prci_wrpll_pack(c), pwd->cfg0_offs, pd); memcpy(&pwd->c, c, sizeof(*c)); } +/** + * __prci_wrpll_write_cfg1() - write Clock enable/disable configuration + * into the PRCI + * @pd: PRCI context + * @pwd: PRCI WRPLL metadata + * @enable: Clock enable or disable value + */ +static void __prci_wrpll_write_cfg1(struct __prci_data *pd, + struct __prci_wrpll_data *pwd, + u32 enable) +{ + __prci_writel(enable, pwd->cfg1_offs, pd); +} + /* * Linux clock framework integration * @@ -199,16 +213,61 @@ int sifive_prci_wrpll_set_rate(struct clk_hw *hw, if (pwd->enable_bypass) pwd->enable_bypass(pd); - __prci_wrpll_write_cfg(pd, pwd, &pwd->c); + __prci_wrpll_write_cfg0(pd, pwd, &pwd->c); udelay(wrpll_calc_max_lock_us(&pwd->c)); + return 0; +} + +int sifive_clk_is_enabled(struct clk_hw *hw) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_wrpll_data *pwd = pc->pwd; + struct __prci_data *pd = pc->pd; + u32 r; + + r = __prci_readl(pd, pwd->cfg1_offs); + + if (r & PRCI_COREPLLCFG1_CKE_MASK) + return 1; + else + return 0; +} + +int sifive_prci_clock_enable(struct clk_hw *hw) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_wrpll_data *pwd = pc->pwd; + struct __prci_data *pd = pc->pd; + + if (sifive_clk_is_enabled(hw)) + return 0; + + __prci_wrpll_write_cfg1(pd, pwd, PRCI_COREPLLCFG1_CKE_MASK); + if (pwd->disable_bypass) pwd->disable_bypass(pd); return 0; } +void sifive_prci_clock_disable(struct clk_hw *hw) +{ + struct __prci_clock *pc = clk_hw_to_prci_clock(hw); + struct __prci_wrpll_data *pwd = pc->pwd; + struct __prci_data *pd = pc->pd; + u32 r; + + if (pwd->enable_bypass) + pwd->enable_bypass(pd); + + r = __prci_readl(pd, pwd->cfg1_offs); + r &= ~PRCI_COREPLLCFG1_CKE_MASK; + + __prci_wrpll_write_cfg1(pd, pwd, r); +} + /* TLCLKSEL clock integration */ unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw, @@ -427,7 +486,7 @@ static int __prci_register_clocks(struct device *dev, struct __prci_data *pd, pic->pd = pd; if (pic->pwd) - __prci_wrpll_read_cfg(pd, pic->pwd); + __prci_wrpll_read_cfg0(pd, pic->pwd); r = devm_clk_hw_register(dev, &pic->hw); if (r) { diff --git a/drivers/clk/sifive/sifive-prci.h b/drivers/clk/sifive/sifive-prci.h index 88493f3b9edf9..dbdbd17226884 100644 --- a/drivers/clk/sifive/sifive-prci.h +++ b/drivers/clk/sifive/sifive-prci.h @@ -40,6 +40,11 @@ #define PRCI_COREPLLCFG0_LOCK_SHIFT 31 #define PRCI_COREPLLCFG0_LOCK_MASK (0x1 << PRCI_COREPLLCFG0_LOCK_SHIFT) +/* COREPLLCFG1 */ +#define PRCI_COREPLLCFG1_OFFSET 0x8 +#define PRCI_COREPLLCFG1_CKE_SHIFT 31 +#define PRCI_COREPLLCFG1_CKE_MASK (0x1 << PRCI_COREPLLCFG1_CKE_SHIFT) + /* DDRPLLCFG0 */ #define PRCI_DDRPLLCFG0_OFFSET 0xc #define PRCI_DDRPLLCFG0_DIVR_SHIFT 0 @@ -220,6 +225,7 @@ struct __prci_data { * @enable_bypass: fn ptr to code to bypass the WRPLL (if applicable; else NULL) * @disable_bypass: fn ptr to code to not bypass the WRPLL (or NULL) * @cfg0_offs: WRPLL CFG0 register offset (in bytes) from the PRCI base address + * @cfg1_offs: WRPLL CFG1 register offset (in bytes) from the PRCI base address * * @enable_bypass and @disable_bypass are used for WRPLL instances * that contain a separate external glitchless clock mux downstream @@ -230,6 +236,7 @@ struct __prci_wrpll_data { void (*enable_bypass)(struct __prci_data *pd); void (*disable_bypass)(struct __prci_data *pd); u8 cfg0_offs; + u8 cfg1_offs; }; /** @@ -279,6 +286,9 @@ long sifive_prci_wrpll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate); int sifive_prci_wrpll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate); +int sifive_clk_is_enabled(struct clk_hw *hw); +int sifive_prci_clock_enable(struct clk_hw *hw); +void sifive_prci_clock_disable(struct clk_hw *hw); unsigned long sifive_prci_wrpll_recalc_rate(struct clk_hw *hw, unsigned long parent_rate); unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw, -- GitLab From c1048828c3dbd96c7e371fae658e5f40e6a45e99 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 16 Dec 2020 16:14:08 -0500 Subject: [PATCH 0648/1894] orangefs: add splice file operations Fix some xfstests regressions that started after 36e2c7421f02, "don't allow splice read/write without explicit ops". Thanks for help from Dave Chinner and Matthew Wilcox. Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index af375e049aae7..ec8ae4257975c 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -663,6 +663,8 @@ const struct file_operations orangefs_file_operations = { .unlocked_ioctl = orangefs_ioctl, .mmap = orangefs_file_mmap, .open = generic_file_open, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .flush = orangefs_flush, .release = orangefs_file_release, .fsync = orangefs_fsync, -- GitLab From e6582cb5dab4ae572513412cc10fd0ffe07e0b05 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 30 Nov 2020 11:19:21 +0100 Subject: [PATCH 0649/1894] blk-mq: Remove 'running from the wrong CPU' warning It's guaranteed that no request is in flight when a hctx is going offline. This warning is only triggered when the wq's CPU is hot plugged and the blk-mq is not synced up yet. As this state is temporary and the request is still processed correctly, better remove the warning as this is the fast path. Suggested-by: Ming Lei Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b09ce00cc6af4..a428798e1c5c6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1495,31 +1495,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { int srcu_idx; - /* - * We should be running this queue from one of the CPUs that - * are mapped to it. - * - * There are at least two related races now between setting - * hctx->next_cpu from blk_mq_hctx_next_cpu() and running - * __blk_mq_run_hw_queue(): - * - * - hctx->next_cpu is found offline in blk_mq_hctx_next_cpu(), - * but later it becomes online, then this warning is harmless - * at all - * - * - hctx->next_cpu is found online in blk_mq_hctx_next_cpu(), - * but later it becomes offline, then the warning can't be - * triggered, and we depend on blk-mq timeout handler to - * handle dispatched requests to this hctx - */ - if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && - cpu_online(hctx->next_cpu)) { - printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n", - raw_smp_processor_id(), - cpumask_empty(hctx->cpumask) ? "inactive": "active"); - dump_stack(); - } - /* * We can't run the queue inline with ints disabled. Ensure that * we catch bad users of this early. -- GitLab From e7508d48565060af5d89f10cb83c9359c8ae1310 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Thu, 10 Dec 2020 11:18:20 +0100 Subject: [PATCH 0650/1894] block/rnbd-clt: Get rid of warning regarding size argument in strlcpy The kernel test robot triggerred the following warning, >> drivers/block/rnbd/rnbd-clt.c:1397:42: warning: size argument in 'strlcpy' call appears to be size of the source; expected the size of the destination [-Wstrlcpy-strlcat-size] strlcpy(dev->pathname, pathname, strlen(pathname) + 1); ~~~~~~~^~~~~~~~~~~~~ To get rid of the above warning, use a kstrdup as Bart suggested. Fixes: 64e8a6ece1a5 ("block/rnbd-clt: Dynamically alloc buffer for pathname & blk_symlink_name") Reported-by: kernel test robot Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index a199b190c73d1..d63f0974bd044 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1388,12 +1388,11 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, goto out_queues; } - dev->pathname = kzalloc(strlen(pathname) + 1, GFP_KERNEL); + dev->pathname = kstrdup(pathname, GFP_KERNEL); if (!dev->pathname) { ret = -ENOMEM; goto out_queues; } - strlcpy(dev->pathname, pathname, strlen(pathname) + 1); dev->clt_device_id = ret; dev->sess = sess; -- GitLab From 46067844efdb8275ade705923120fc5391543b53 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 10 Dec 2020 11:18:21 +0100 Subject: [PATCH 0651/1894] block/rnbd-clt: Fix possible memleak In error case, we do not free the memory for blk_symlink_name. Do it by free the memory in error case, and set to NULL afterwards. Also fix the condition in rnbd_clt_remove_dev_symlink. Fixes: 64e8a6ece1a5 ("block/rnbd-clt: Dynamically alloc buffer for pathname & blk_symlink_name") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt-sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index a7caeedeb1986..d4aa6bfc95557 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -432,7 +432,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) * i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because * of sysfs link already was removed already. */ - if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) { + if (dev->blk_symlink_name && try_module_get(THIS_MODULE)) { sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); kfree(dev->blk_symlink_name); module_put(THIS_MODULE); @@ -521,7 +521,8 @@ static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) return 0; out_err: - dev->blk_symlink_name[0] = '\0'; + kfree(dev->blk_symlink_name); + dev->blk_symlink_name = NULL ; return ret; } -- GitLab From 87019e7d99d707e60e20ea3245a561419d5de5ce Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Thu, 10 Dec 2020 11:18:22 +0100 Subject: [PATCH 0652/1894] block/rnbd-srv: Protect dev session sysfs removal Since the removal of the session sysfs can also be called from the function destroy_sess, there is a need to protect the call from the function rnbd_srv_sess_dev_force_close Fixes: 786998050cbc ("block/rnbd-srv: close a mapped device from server side.") Signed-off-by: Md Haris Iqbal Reviewed-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index d1ee72ed8384f..066411cce5e26 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -338,9 +338,10 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev) { + mutex_lock(&sess_dev->sess->lock); rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&sess_dev->sess->lock); sess_dev->keep_id = true; - } static int process_msg_close(struct rtrs_srv *rtrs, -- GitLab From 3877ece01e46f01fae0fbc00df93d0e5f23196b0 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 10 Dec 2020 11:18:23 +0100 Subject: [PATCH 0653/1894] block/rnbd: Fix typos Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index d63f0974bd044..3a2e6e8ed6b16 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -359,7 +359,7 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, * 2nd reference is dropped after confirmation with the response is * returned. * 1st and 2nd can happen in any order, so the rnbd_iu should be - * released (rtrs_permit returned to ibbtrs) only leased after both + * released (rtrs_permit returned to rtrs) only after both * are finished. */ atomic_set(&iu->refcount, 2); @@ -803,7 +803,7 @@ static struct rnbd_clt_session *alloc_sess(const char *sessname) rnbd_init_cpu_qlists(sess->cpu_queues); /* - * That is simple percpu variable which stores cpu indeces, which are + * That is simple percpu variable which stores cpu indices, which are * incremented on each access. We need that for the sake of fairness * to wake up queues in a round-robin manner. */ @@ -1666,7 +1666,7 @@ static void rnbd_destroy_sessions(void) /* * Here at this point there is no any concurrent access to sessions * list and devices list: - * 1. New session or device can'be be created - session sysfs files + * 1. New session or device can't be created - session sysfs files * are removed. * 2. Device or session can't be removed - module reference is taken * into account in unmap device sysfs callback. -- GitLab From 512c781fd28cb401ee9f2843e32bf4640732c671 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 10 Dec 2020 11:18:24 +0100 Subject: [PATCH 0654/1894] block/rnbd: Set write-back cache and fua same to the target device The rnbd-client always sets the write-back cache and fua attributes of the rnbd device queue regardless of the target device on the server. That generates IO hang issue when the target device does not support both of write-back cacne and fua. This patch adds more fields for the cache policy and fua into the device opening message. The rnbd-server sends the information if the target device supports the write-back cache and fua and rnbd-client recevives it and set the device queue accordingly. Signed-off-by: Gioh Kim [jwang: some minor change, rename a few varables, remove unrelated comments.] Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 8 +++++--- drivers/block/rnbd/rnbd-clt.h | 2 ++ drivers/block/rnbd/rnbd-proto.h | 9 ++++++++- drivers/block/rnbd/rnbd-srv.c | 9 +++++++-- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 3a2e6e8ed6b16..b5fffbdeb263d 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -88,6 +88,8 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); dev->secure_discard = le16_to_cpu(rsp->secure_discard); dev->rotational = rsp->rotational; + dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); + dev->fua = !!(rsp->cache_policy & RNBD_FUA); dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; dev->max_segments = BMAX_SEGMENTS; @@ -1305,7 +1307,7 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) blk_queue_max_segments(dev->queue, dev->max_segments); blk_queue_io_opt(dev->queue, dev->sess->max_io_size); blk_queue_virt_boundary(dev->queue, SZ_4K - 1); - blk_queue_write_cache(dev->queue, true, true); + blk_queue_write_cache(dev->queue, dev->wc, dev->fua); dev->queue->queuedata = dev; } @@ -1528,13 +1530,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, } rnbd_clt_info(dev, - "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d)\n", + "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n", dev->gd->disk_name, dev->nsectors, dev->logical_block_size, dev->physical_block_size, dev->max_write_same_sectors, dev->max_discard_sectors, dev->discard_granularity, dev->discard_alignment, dev->secure_discard, dev->max_segments, - dev->max_hw_sectors, dev->rotational); + dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua); mutex_unlock(&dev->lock); diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index b193d59040503..efd67ae286ca6 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -112,6 +112,8 @@ struct rnbd_clt_dev { enum rnbd_access_mode access_mode; bool read_only; bool rotational; + bool wc; + bool fua; u32 max_hw_sectors; u32 max_write_same_sectors; u32 max_discard_sectors; diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index ca166241452c2..c1bc5c0fef71d 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -108,6 +108,11 @@ struct rnbd_msg_close { __le32 device_id; }; +enum rnbd_cache_policy { + RNBD_FUA = 1 << 0, + RNBD_WRITEBACK = 1 << 1, +}; + /** * struct rnbd_msg_open_rsp - response message to RNBD_MSG_OPEN * @hdr: message header @@ -124,6 +129,7 @@ struct rnbd_msg_close { * @max_segments: max segments hardware support in one transfer * @secure_discard: supports secure discard * @rotation: is a rotational disc? + * @cache_policy: support write-back caching or FUA? */ struct rnbd_msg_open_rsp { struct rnbd_msg_hdr hdr; @@ -139,7 +145,8 @@ struct rnbd_msg_open_rsp { __le16 max_segments; __le16 secure_discard; u8 rotational; - u8 reserved[11]; + u8 cache_policy; + u8 reserved[10]; }; /** diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 066411cce5e26..b8e44331e4944 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -550,6 +550,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, struct rnbd_srv_sess_dev *sess_dev) { struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev; + struct request_queue *q = bdev_get_queue(rnbd_dev->bdev); rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = @@ -574,8 +575,12 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev)); rsp->secure_discard = cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev)); - rsp->rotational = - !blk_queue_nonrot(bdev_get_queue(rnbd_dev->bdev)); + rsp->rotational = !blk_queue_nonrot(q); + rsp->cache_policy = 0; + if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + rsp->cache_policy |= RNBD_WRITEBACK; + if (blk_queue_fua(q)) + rsp->cache_policy |= RNBD_FUA; } static struct rnbd_srv_sess_dev * -- GitLab From 5a1328d0c3a757cdd8c65f4dfe0a02502a5810bc Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 10 Dec 2020 11:18:25 +0100 Subject: [PATCH 0655/1894] block/rnbd-clt: Dynamically allocate sglist for rnbd_iu The BMAX_SEGMENT static array for scatterlist is embedded in rnbd_iu structure to avoid memory allocation in hot IO path. In many cases, we do need only several sg entries because many IOs have only several segments. This patch change rnbd_iu to check the number of segments in the request and allocate sglist dynamically. For io path, use sg_alloc_table_chained to allocate sg list faster. First it makes two sg entries after pdu of request. The sg_alloc_table_chained uses the pre-allocated sg entries if the number of segments of the request is less than two. So it reduces the number of memory allocation. Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 60 ++++++++++++++++++++--------------- drivers/block/rnbd/rnbd-clt.h | 10 +++++- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index b5fffbdeb263d..5941ff7c83a8e 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -384,6 +384,7 @@ static void rnbd_softirq_done_fn(struct request *rq) struct rnbd_iu *iu; iu = blk_mq_rq_to_pdu(rq); + sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); rnbd_put_permit(sess, iu->permit); blk_mq_end_request(rq, errno_to_blk_status(iu->errno)); } @@ -477,7 +478,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) iu->buf = NULL; iu->dev = dev; - sg_mark_end(&iu->sglist[0]); + sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); msg.device_id = cpu_to_le32(device_id); @@ -492,6 +493,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) err = errno; } + sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -564,7 +566,8 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) iu->buf = rsp; iu->dev = dev; - sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); + sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); msg.access_mode = dev->access_mode; @@ -572,7 +575,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) WARN_ON(!rnbd_clt_get_dev(dev)); err = send_usr_msg(sess->rtrs, READ, iu, - &vec, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sgt.sgl, 1, msg_open_conf, &errno, wait); if (err) { rnbd_clt_put_dev(dev); @@ -582,6 +585,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) err = errno; } + sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -610,7 +614,8 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) iu->buf = rsp; iu->sess = sess; - sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); + sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); msg.ver = RNBD_PROTO_VER_MAJOR; @@ -626,7 +631,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) goto put_iu; } err = send_usr_msg(sess->rtrs, READ, iu, - &vec, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sgt.sgl, 1, msg_sess_info_conf, &errno, wait); if (err) { rnbd_clt_put_sess(sess); @@ -636,7 +641,7 @@ put_iu: } else { err = errno; } - + sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -1016,11 +1021,10 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, * See queue limits. */ if (req_op(rq) != REQ_OP_DISCARD) - sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sglist); + sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl); if (sg_cnt == 0) - /* Do not forget to mark the end */ - sg_mark_end(&iu->sglist[0]); + sg_mark_end(&iu->sgt.sgl[0]); msg.hdr.type = cpu_to_le16(RNBD_MSG_IO); msg.device_id = cpu_to_le32(dev->device_id); @@ -1029,13 +1033,13 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, .iov_base = &msg, .iov_len = sizeof(msg) }; - size = rnbd_clt_get_sg_size(iu->sglist, sg_cnt); + size = rnbd_clt_get_sg_size(iu->sgt.sgl, sg_cnt); req_ops = (struct rtrs_clt_req_ops) { .priv = iu, .conf_fn = msg_io_conf, }; err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit, - &vec, 1, size, iu->sglist, sg_cnt); + &vec, 1, size, iu->sgt.sgl, sg_cnt); if (unlikely(err)) { rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n", err); @@ -1122,6 +1126,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, struct rnbd_clt_dev *dev = rq->rq_disk->private_data; struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); int err; + blk_status_t ret = BLK_STS_IOERR; if (unlikely(dev->dev_state != DEV_STATE_MAPPED)) return BLK_STS_IOERR; @@ -1133,32 +1138,35 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_RESOURCE; } + iu->sgt.sgl = iu->first_sgl; + err = sg_alloc_table_chained(&iu->sgt, + /* Even-if the request has no segment, + * sglist must have one entry at least */ + blk_rq_nr_phys_segments(rq) ? : 1, + iu->sgt.sgl, + RNBD_INLINE_SG_CNT); + if (err) { + rnbd_clt_err_rl(dev, "sg_alloc_table_chained ret=%d\n", err); + rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); + rnbd_put_permit(dev->sess, iu->permit); + return BLK_STS_RESOURCE; + } + blk_mq_start_request(rq); err = rnbd_client_xfer_request(dev, rq, iu); if (likely(err == 0)) return BLK_STS_OK; if (unlikely(err == -EAGAIN || err == -ENOMEM)) { rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); - rnbd_put_permit(dev->sess, iu->permit); - return BLK_STS_RESOURCE; + ret = BLK_STS_RESOURCE; } - + sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); rnbd_put_permit(dev->sess, iu->permit); - return BLK_STS_IOERR; -} - -static int rnbd_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); - - sg_init_table(iu->sglist, BMAX_SEGMENTS); - return 0; + return ret; } static struct blk_mq_ops rnbd_mq_ops = { .queue_rq = rnbd_queue_rq, - .init_request = rnbd_init_request, .complete = rnbd_softirq_done_fn, }; @@ -1172,7 +1180,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->numa_node = NUMA_NO_NODE; tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_TAG_QUEUE_SHARED; - tag_set->cmd_size = sizeof(struct rnbd_iu); + tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE; tag_set->nr_hw_queues = num_online_cpus(); return blk_mq_alloc_tag_set(tag_set); diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index efd67ae286ca6..537d499dad3b0 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -44,6 +44,13 @@ struct rnbd_iu_comp { int errno; }; +#ifdef CONFIG_ARCH_NO_SG_CHAIN +#define RNBD_INLINE_SG_CNT 0 +#else +#define RNBD_INLINE_SG_CNT 2 +#endif +#define RNBD_RDMA_SGL_SIZE (sizeof(struct scatterlist) * RNBD_INLINE_SG_CNT) + struct rnbd_iu { union { struct request *rq; /* for block io */ @@ -56,11 +63,12 @@ struct rnbd_iu { /* use to send msg associated with a sess */ struct rnbd_clt_session *sess; }; - struct scatterlist sglist[BMAX_SEGMENTS]; + struct sg_table sgt; struct work_struct work; int errno; struct rnbd_iu_comp comp; atomic_t refcount; + struct scatterlist first_sgl[]; /* must be the last one */ }; struct rnbd_cpu_qlist { -- GitLab From 9aaf9a2aba0c2b5f0fc6dfeb011f0b4c8e224a73 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 10 Dec 2020 11:18:26 +0100 Subject: [PATCH 0656/1894] block/rnbd-clt: Does not request pdu to rtrs-clt Previously the rnbd client requested the rtrs to allocate rnbd_iu just after the rtrs_iu. So the rnbd client passes the size of rnbd_iu for rtrs_clt_open() and rtrs creates an array of rnbd_iu and rtrs_iu. For IO handling, rnbd_iu exists after the request because we pass the size of rnbd_iu when setting the tag-set. Therefore we do not use the rnbd_iu allocated by rtrs for IO handling. We only use the rnbd_iu allocated by rtrs when doing session initialization. Almost all rnbd_iu allocated by rtrs are wasted. By this patch the rnbd client does not request rnbd_iu allocation to rtrs but allocate it for itself when doing session initialization. Also remove unused rtrs_permit_to_pdu from rtrs. Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 17 +++++++++++++---- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 6 ------ drivers/infiniband/ulp/rtrs/rtrs.h | 7 ------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 5941ff7c83a8e..96e3f9fe82418 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -349,12 +349,19 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu; struct rtrs_permit *permit; + iu = kzalloc(sizeof(*iu), GFP_KERNEL); + if (!iu) { + return NULL; + } + permit = rnbd_get_permit(sess, con_type, wait ? RTRS_PERMIT_WAIT : RTRS_PERMIT_NOWAIT); - if (unlikely(!permit)) + if (unlikely(!permit)) { + kfree(iu); return NULL; - iu = rtrs_permit_to_pdu(permit); + } + iu->permit = permit; /* * 1st reference is dropped after finishing sending a "user" message, @@ -373,8 +380,10 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) { - if (atomic_dec_and_test(&iu->refcount)) + if (atomic_dec_and_test(&iu->refcount)) { rnbd_put_permit(sess, iu->permit); + kfree(iu); + } } static void rnbd_softirq_done_fn(struct request *rq) @@ -1218,7 +1227,7 @@ find_and_get_or_create_sess(const char *sessname, */ sess->rtrs = rtrs_clt_open(&rtrs_ops, sessname, paths, path_cnt, port_nr, - sizeof(struct rnbd_iu), + 0, /* Do not use pdu of rtrs */ RECONNECT_DELAY, BMAX_SEGMENTS, BLK_MAX_SEGMENT_SIZE, MAX_RECONNECTS); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 560865f65dc44..67f86c405a265 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -157,12 +157,6 @@ void rtrs_clt_put_permit(struct rtrs_clt *clt, struct rtrs_permit *permit) } EXPORT_SYMBOL(rtrs_clt_put_permit); -void *rtrs_permit_to_pdu(struct rtrs_permit *permit) -{ - return permit + 1; -} -EXPORT_SYMBOL(rtrs_permit_to_pdu); - /** * rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit * @sess: client session pointer diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h index 9af750f4d7835..8738e90e715a4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.h +++ b/drivers/infiniband/ulp/rtrs/rtrs.h @@ -63,13 +63,6 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, void rtrs_clt_close(struct rtrs_clt *sess); -/** - * rtrs_permit_to_pdu() - converts rtrs_permit to opaque pdu pointer - * @permit: RTRS permit pointer, it associates the memory allocation for future - * RDMA operation. - */ -void *rtrs_permit_to_pdu(struct rtrs_permit *permit); - enum { RTRS_PERMIT_NOWAIT = 0, RTRS_PERMIT_WAIT = 1, -- GitLab From a146468d76e0462393a3e15b77b8b3ede60e2d06 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 14 Dec 2020 20:57:27 -0700 Subject: [PATCH 0657/1894] io_uring: break links on shutdown failure Ensure that the return value of __sys_shutdown_sock() is used to potentially break links to the request, if we fail. Fixes: 36f4fa6886a8 ("io_uring: add support for shutdown(2)") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6f9392c35eefd..6a4560c9ed9ac 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3784,6 +3784,8 @@ static int io_shutdown(struct io_kiocb *req, bool force_nonblock) return -ENOTSOCK; ret = __sys_shutdown_sock(sock, req->shutdown.how); + if (ret < 0) + req_set_fail_links(req); io_req_complete(req, ret); return 0; #else -- GitLab From 4c46764733c85b82c07e9559b39da4d00a7dd659 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 5 Dec 2020 19:50:56 +0800 Subject: [PATCH 0658/1894] libnvdimm/label: Return -ENXIO for no slot in __blk_label_update Forget to set error code when nd_label_alloc_slot failed, and we add it to avoid overwritten error code. Fixes: 0ba1c634892b ("libnvdimm: write blk label set") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201205115056.2076523-1-zhangqilong3@huawei.com Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 6f2be7a34598c..9251441fd8a35 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -1008,8 +1008,10 @@ static int __blk_label_update(struct nd_region *nd_region, if (is_old_resource(res, old_res_list, old_num_resources)) continue; /* carry-over */ slot = nd_label_alloc_slot(ndd); - if (slot == UINT_MAX) + if (slot == UINT_MAX) { + rc = -ENXIO; goto abort; + } dev_dbg(ndd->dev, "allocated: %d\n", slot); nd_label = to_label(ndd, slot); -- GitLab From 81e7eb5bf08f36d34495a5898f6ef3fec05d9776 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 16 Dec 2020 22:43:44 -0500 Subject: [PATCH 0659/1894] Revert "Revert "scsi: megaraid_sas: Added support for shared host tagset for cpuhotplug"" This reverts commit 1a0e1943d8798cb3241fb5edb9a836af1611b60a. Commit b3c6a5997541 ("block: Fix a lockdep complaint triggered by request queue flushing") has been reverted and commit fb01a2932e81 has been introduced in its place. Consequently, it is now safe to reinstate the megaraid_sas tagset changes that led to boot problems in 5.10. Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 39 +++++++++++++++++++++ drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 ++++++++------- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e158d3d62056b..41cd66fc7d812 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,10 @@ unsigned int enable_sdev_max_qd; module_param(enable_sdev_max_qd, int, 0444); MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0"); +int host_tagset_enable = 1; +module_param(host_tagset_enable, int, 0444); +MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)"); + MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); @@ -3119,6 +3124,19 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } +static int megasas_map_queues(struct Scsi_Host *shost) +{ + struct megasas_instance *instance; + + instance = (struct megasas_instance *)shost->hostdata; + + if (shost->nr_hw_queues == 1) + return 0; + + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + instance->pdev, instance->low_latency_index_start); +} + static void megasas_aen_polling(struct work_struct *work); /** @@ -3427,6 +3445,7 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, + .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, }; @@ -6808,6 +6827,26 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; + /* Use shared host tagset only for fusion adaptors + * if there are managed interrupts (smp affinity enabled case). + * Single msix_vectors in kdump, so shared host tag is also disabled. + */ + + host->host_tagset = 0; + host->nr_hw_queues = 1; + + if ((instance->adapter_type != MFI_SERIES) && + (instance->msix_vectors > instance->low_latency_index_start) && + host_tagset_enable && + instance->smp_affinity_enable) { + host->host_tagset = 1; + host->nr_hw_queues = instance->msix_vectors - + instance->low_latency_index_start; + } + + dev_info(&instance->pdev->dev, + "Max firmware commands: %d shared with nr_hw_queues = %d\n", + instance->max_fw_cmds, host->nr_hw_queues); /* * Notify the mid-layer about the new controller */ diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index b0c01cf0428f2..fd607287608e1 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -359,24 +359,29 @@ megasas_get_msix_index(struct megasas_instance *instance, { int sdev_busy; - /* nr_hw_queue = 1 for MegaRAID */ - struct blk_mq_hw_ctx *hctx = - scmd->device->request_queue->queue_hw_ctx[0]; - - sdev_busy = atomic_read(&hctx->nr_active); + /* TBD - if sml remove device_busy in future, driver + * should track counter in internal structure. + */ + sdev_busy = atomic_read(&scmd->device->device_busy); if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) + sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); - else if (instance->msix_load_balance) + } else if (instance->msix_load_balance) { cmd->request_desc->SCSIIO.MSIxIndex = (mega_mod64(atomic64_add_return(1, &instance->total_io_count), instance->msix_vectors)); - else + } else if (instance->host->nr_hw_queues > 1) { + u32 tag = blk_mq_unique_tag(scmd->request); + + cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) + + instance->low_latency_index_start; + } else { cmd->request_desc->SCSIIO.MSIxIndex = instance->reply_map[raw_smp_processor_id()]; + } } /** @@ -956,9 +961,6 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance) if (megasas_alloc_cmdlist_fusion(instance)) goto fail_exit; - dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n", - instance->max_fw_cmds); - /* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */ io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; @@ -1102,8 +1104,9 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing) instance->perf_mode = MR_BALANCED_PERF_MODE; - dev_info(&instance->pdev->dev, "Performance mode :%s\n", - MEGASAS_PERF_MODE_2STR(instance->perf_mode)); + dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n", + MEGASAS_PERF_MODE_2STR(instance->perf_mode), + instance->low_latency_index_start); instance->fw_sync_cache_support = (scratch_pad_1 & MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0; -- GitLab From 7948fab26bcc468aa2a76462f441291b5fb0d5c7 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 7 Dec 2020 11:30:05 +0530 Subject: [PATCH 0660/1894] watchdog: qcom: Avoid context switch in restart handler The use of msleep() in the restart handler will cause scheduler to induce a context switch which is not desirable. This generates below warning on SDX55 when WDT is the only available restart source: [ 39.800188] reboot: Restarting system [ 39.804115] ------------[ cut here ]------------ [ 39.807855] WARNING: CPU: 0 PID: 678 at kernel/rcu/tree_plugin.h:297 rcu_note_context_switch+0x190/0x764 [ 39.812538] Modules linked in: [ 39.821954] CPU: 0 PID: 678 Comm: reboot Not tainted 5.10.0-rc1-00063-g33a9990d1d66-dirty #47 [ 39.824854] Hardware name: Generic DT based system [ 39.833470] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 39.838154] [] (show_stack) from [] (dump_stack+0x8c/0xa0) [ 39.846049] [] (dump_stack) from [] (__warn+0xd8/0xf0) [ 39.853058] [] (__warn) from [] (warn_slowpath_fmt+0x64/0xc8) [ 39.859925] [] (warn_slowpath_fmt) from [] (rcu_note_context_switch+0x190/0x764) [ 39.867503] [] (rcu_note_context_switch) from [] (__schedule+0x84/0x640) [ 39.876685] [] (__schedule) from [] (schedule+0x58/0x10c) [ 39.885095] [] (schedule) from [] (schedule_timeout+0x1e8/0x3d4) [ 39.892135] [] (schedule_timeout) from [] (msleep+0x2c/0x38) [ 39.899947] [] (msleep) from [] (qcom_wdt_restart+0xc4/0xcc) [ 39.907319] [] (qcom_wdt_restart) from [] (watchdog_restart_notifier+0x18/0x28) [ 39.914715] [] (watchdog_restart_notifier) from [] (atomic_notifier_call_chain+0x60/0x84) [ 39.923487] [] (atomic_notifier_call_chain) from [] (machine_restart+0x78/0x7c) [ 39.933551] [] (machine_restart) from [] (__do_sys_reboot+0xdc/0x1e0) [ 39.942397] [] (__do_sys_reboot) from [] (ret_fast_syscall+0x0/0x54) [ 39.950721] Exception stack(0xc3e0bfa8 to 0xc3e0bff0) [ 39.958855] bfa0: 0001221c bed2fe24 fee1dead 28121969 01234567 00000000 [ 39.963832] bfc0: 0001221c bed2fe24 00000003 00000058 000225e0 00000000 00000000 00000000 [ 39.971985] bfe0: b6e62560 bed2fc84 00010fd8 b6e62580 [ 39.980124] ---[ end trace 3f578288bad866e4 ]--- Hence, replace msleep() with mdelay() to fix this issue. Fixes: 05e487d905ab ("watchdog: qcom: register a restart notifier") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201207060005.21293-1-manivannan.sadhasivam@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/qcom-wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index 07d399c4edc4a..7cf0f2ec649b6 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -148,7 +148,7 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, */ wmb(); - msleep(150); + mdelay(150); return 0; } -- GitLab From 8cbd82d62f45423bc337abfcfd51da83fbe60277 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 11 Dec 2020 01:19:29 +0000 Subject: [PATCH 0661/1894] dt-bindings: watchdog: sun4i: Add A100 compatible Add a binding for A100's watchdog controller. Signed-off-by: Yangtao Li Acked-by: Rob Herring Signed-off-by: Andre Przywara Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201211011934.6171-17-andre.przywara@arm.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml index e8f2263761081..5ac607de8be41 100644 --- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml @@ -21,6 +21,9 @@ properties: - items: - const: allwinner,sun50i-a64-wdt - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,sun50i-a100-wdt + - const: allwinner,sun6i-a31-wdt - items: - const: allwinner,sun50i-h6-wdt - const: allwinner,sun6i-a31-wdt -- GitLab From 36c47df85ee8e1f8a35366ac11324f8875de00eb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Dec 2020 23:33:42 +0100 Subject: [PATCH 0662/1894] watchdog: coh901327: add COMMON_CLK dependency clang produces a build failure in configurations without COMMON_CLK when a timeout calculation goes wrong: arm-linux-gnueabi-ld: drivers/watchdog/coh901327_wdt.o: in function `coh901327_enable': coh901327_wdt.c:(.text+0x50): undefined reference to `__bad_udelay' Add a Kconfig dependency to only do build testing when COMMON_CLK is enabled. Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible") Signed-off-by: Arnd Bergmann Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201203223358.1269372-1-arnd@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 5d3c3ec918a80..60f2b9a4a5668 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -632,7 +632,7 @@ config SUNXI_WATCHDOG config COH901327_WATCHDOG bool "ST-Ericsson COH 901 327 watchdog" - depends on ARCH_U300 || (ARM && COMPILE_TEST) + depends on ARCH_U300 || (ARM && COMMON_CLK && COMPILE_TEST) default y if MACH_U300 select WATCHDOG_CORE help -- GitLab From e629fffcc333efbda6b7f8cdcf77238533ddf442 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Mon, 16 Nov 2020 15:25:39 +0100 Subject: [PATCH 0663/1894] dt-binding: watchdog: add Rockchip compatibles to snps,dw-wdt.yaml The Rockchip watchdog compatibles below are already in use, but somehow never added to a document, so add them to the snps,dw-wdt.yaml file. "rockchip,rk3066-wdt", "snps,dw-wdt" "rockchip,rk3188-wdt", "snps,dw-wdt" "rockchip,rk3288-wdt", "snps,dw-wdt" "rockchip,rk3368-wdt", "snps,dw-wdt" make ARCH=arm dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml make ARCH=arm64 dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml Signed-off-by: Johan Jonker Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201116142539.12377-1-jbx6244@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/snps,dw-wdt.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml index d9fc7bb851b1e..f7ee9229c29f9 100644 --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml @@ -14,7 +14,15 @@ maintainers: properties: compatible: - const: snps,dw-wdt + oneOf: + - const: snps,dw-wdt + - items: + - enum: + - rockchip,rk3066-wdt + - rockchip,rk3188-wdt + - rockchip,rk3288-wdt + - rockchip,rk3368-wdt + - const: snps,dw-wdt reg: maxItems: 1 -- GitLab From c21172b3a73e8daf016eec52af229bb7b9c76cc8 Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Tue, 17 Nov 2020 16:22:13 +0100 Subject: [PATCH 0664/1894] watchdog: iTCO_wdt: use dev_*() instead of pr_*() for logging For device log outputs, it's better to have device name / ID prefixed in all messages, so use the proper dev_*() functions here. Explicit message on module load/unload don't seem to be really helpful (we have other means to check which modules have been loaded), instead just add noise to the kernel log. So, removing them. Signed-off-by: Enrico Weigelt, metux IT consult Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201117152214.32244-2-info@metux.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index f2ddc8fc71cd6..bf31d7b67a697 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -40,8 +40,6 @@ * Includes, defines, variables, module parameters, ... */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - /* Module and version information */ #define DRV_NAME "iTCO_wdt" #define DRV_VERSION "1.11" @@ -279,7 +277,7 @@ static int iTCO_wdt_start(struct watchdog_device *wd_dev) /* disable chipset's NO_REBOOT bit */ if (p->update_no_reboot_bit(p->no_reboot_priv, false)) { spin_unlock(&p->io_lock); - pr_err("failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n"); + dev_err(wd_dev->parent, "failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n"); return -EIO; } @@ -510,7 +508,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) /* Check chipset's NO_REBOOT bit */ if (p->update_no_reboot_bit(p->no_reboot_priv, false) && iTCO_vendor_check_noreboot_on()) { - pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n"); + dev_info(dev, "unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n"); return -ENODEV; /* Cannot reset NO_REBOOT bit */ } @@ -530,12 +528,12 @@ static int iTCO_wdt_probe(struct platform_device *pdev) if (!devm_request_region(dev, p->tco_res->start, resource_size(p->tco_res), pdev->name)) { - pr_err("I/O address 0x%04llx already in use, device disabled\n", + dev_err(dev, "I/O address 0x%04llx already in use, device disabled\n", (u64)TCOBASE(p)); return -EBUSY; } - pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n", + dev_info(dev, "Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n", pdata->name, pdata->version, (u64)TCOBASE(p)); /* Clear out the (probably old) status */ @@ -558,7 +556,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) break; } - p->wddev.info = &ident, + p->wddev.info = &ident, p->wddev.ops = &iTCO_wdt_ops, p->wddev.bootstatus = 0; p->wddev.timeout = WATCHDOG_TIMEOUT; @@ -575,7 +573,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) if not reset to the default */ if (iTCO_wdt_set_timeout(&p->wddev, heartbeat)) { iTCO_wdt_set_timeout(&p->wddev, WATCHDOG_TIMEOUT); - pr_info("timeout value out of range, using %d\n", + dev_info(dev, "timeout value out of range, using %d\n", WATCHDOG_TIMEOUT); } @@ -583,11 +581,11 @@ static int iTCO_wdt_probe(struct platform_device *pdev) watchdog_stop_on_unregister(&p->wddev); ret = devm_watchdog_register_device(dev, &p->wddev); if (ret != 0) { - pr_err("cannot register watchdog device (err=%d)\n", ret); + dev_err(dev, "cannot register watchdog device (err=%d)\n", ret); return ret; } - pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n", + dev_info(dev, "initialized. heartbeat=%d sec (nowayout=%d)\n", heartbeat, nowayout); return 0; -- GitLab From 292bff9480c8d52fc58028979c4162abd83f1aec Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 23:24:17 +0000 Subject: [PATCH 0665/1894] ath11k: add missing null check on allocated skb Currently the null check on a newly allocated skb is missing and this can lead to a null pointer dereference is the allocation fails. Fix this by adding a null check and returning -ENOMEM. Addresses-Coverity: ("Dereference null return") Fixes: 43ed15e1ee01 ("ath11k: put hw to DBS using WMI_PDEV_SET_HW_MODE_CMDID") Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214232417.84556-1-colin.king@canonical.com --- drivers/net/wireless/ath/ath11k/wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index da4b546b62cb5..73869d445c5b3 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -3460,6 +3460,9 @@ int ath11k_wmi_set_hw_mode(struct ath11k_base *ab, len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(wmi_ab, len); + if (!skb) + return -ENOMEM; + cmd = (struct wmi_pdev_set_hw_mode_cmd_param *)skb->data; cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_PDEV_SET_HW_MODE_CMD) | -- GitLab From 3597010630d0aa96f5778901e691c6068bb86318 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Fri, 11 Dec 2020 00:56:13 -0500 Subject: [PATCH 0666/1894] ath11k: fix crash caused by NULL rx_channel During connect and disconnect stress test, crashed happened because ar->rx_channel is NULL. Fix it by checking whether ar->rx_channel is NULL. Crash stack is as below: RIP: 0010:ath11k_dp_rx_h_ppdu+0x110/0x230 [ath11k] [ 5028.808963] ath11k_dp_rx_wbm_err+0x14a/0x360 [ath11k] [ 5028.808970] ath11k_dp_rx_process_wbm_err+0x41c/0x520 [ath11k] [ 5028.808978] ath11k_dp_service_srng+0x25e/0x2d0 [ath11k] [ 5028.808982] ath11k_pci_ext_grp_napi_poll+0x23/0x80 [ath11k_pci] [ 5028.808986] net_rx_action+0x27e/0x400 [ 5028.808990] __do_softirq+0xfd/0x2bb [ 5028.808993] irq_exit+0xa6/0xb0 [ 5028.808995] do_IRQ+0x56/0xe0 [ 5028.808997] common_interrupt+0xf/0xf Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201211055613.9310-1-cjhuang@codeaurora.org --- drivers/net/wireless/ath/ath11k/dp_rx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 205c0f1a40e91..920e5026a635f 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2294,6 +2294,7 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, { u8 channel_num; u32 center_freq; + struct ieee80211_channel *channel; rx_status->freq = 0; rx_status->rate_idx = 0; @@ -2314,9 +2315,12 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, rx_status->band = NL80211_BAND_5GHZ; } else { spin_lock_bh(&ar->data_lock); - rx_status->band = ar->rx_channel->band; - channel_num = - ieee80211_frequency_to_channel(ar->rx_channel->center_freq); + channel = ar->rx_channel; + if (channel) { + rx_status->band = channel->band; + channel_num = + ieee80211_frequency_to_channel(channel->center_freq); + } spin_unlock_bh(&ar->data_lock); ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "rx_desc: ", rx_desc, sizeof(struct hal_rx_desc)); -- GitLab From aa44b2f3ecd41f90b7e477158036648a49d21a32 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Fri, 11 Dec 2020 00:13:58 -0500 Subject: [PATCH 0667/1894] ath11k: start vdev if a bss peer is already created For QCA6390, bss peer must be created before vdev is to start. This change is to start vdev if a bss peer is created. Otherwise, ath11k delays to start vdev. This fixes an issue in a case where HT/VHT/HE settings change between authentication and association, e.g., due to the user space request to disable HT. Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201211051358.9191-1-cjhuang@codeaurora.org --- drivers/net/wireless/ath/ath11k/mac.c | 8 ++++++-- drivers/net/wireless/ath/ath11k/peer.c | 17 +++++++++++++++++ drivers/net/wireless/ath/ath11k/peer.h | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 5c175e3e09b28..c1608f64ea95d 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3021,6 +3021,7 @@ static int ath11k_mac_station_add(struct ath11k *ar, } if (ab->hw_params.vdev_start_delay && + !arvif->is_started && arvif->vdev_type != WMI_VDEV_TYPE_AP) { ret = ath11k_start_vdev_delay(ar->hw, vif); if (ret) { @@ -5284,7 +5285,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, /* for QCA6390 bss peer must be created before vdev_start */ if (ab->hw_params.vdev_start_delay && arvif->vdev_type != WMI_VDEV_TYPE_AP && - arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) { + arvif->vdev_type != WMI_VDEV_TYPE_MONITOR && + !ath11k_peer_find_by_vdev_id(ab, arvif->vdev_id)) { memcpy(&arvif->chanctx, ctx, sizeof(*ctx)); ret = 0; goto out; @@ -5295,7 +5297,9 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, goto out; } - if (ab->hw_params.vdev_start_delay) { + if (ab->hw_params.vdev_start_delay && + (arvif->vdev_type == WMI_VDEV_TYPE_AP || + arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)) { param.vdev_id = arvif->vdev_id; param.peer_type = WMI_PEER_TYPE_DEFAULT; param.peer_addr = ar->mac_addr; diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c index 1866d82678fa9..b69e7ebfa9303 100644 --- a/drivers/net/wireless/ath/ath11k/peer.c +++ b/drivers/net/wireless/ath/ath11k/peer.c @@ -76,6 +76,23 @@ struct ath11k_peer *ath11k_peer_find_by_id(struct ath11k_base *ab, return NULL; } +struct ath11k_peer *ath11k_peer_find_by_vdev_id(struct ath11k_base *ab, + int vdev_id) +{ + struct ath11k_peer *peer; + + spin_lock_bh(&ab->base_lock); + + list_for_each_entry(peer, &ab->peers, list) { + if (vdev_id == peer->vdev_id) { + spin_unlock_bh(&ab->base_lock); + return peer; + } + } + spin_unlock_bh(&ab->base_lock); + return NULL; +} + void ath11k_peer_unmap_event(struct ath11k_base *ab, u16 peer_id) { struct ath11k_peer *peer; diff --git a/drivers/net/wireless/ath/ath11k/peer.h b/drivers/net/wireless/ath/ath11k/peer.h index bba2e00b6944a..8553ed061aeaa 100644 --- a/drivers/net/wireless/ath/ath11k/peer.h +++ b/drivers/net/wireless/ath/ath11k/peer.h @@ -43,5 +43,7 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif, struct ieee80211_sta *sta, struct peer_create_params *param); int ath11k_wait_for_peer_delete_done(struct ath11k *ar, u32 vdev_id, const u8 *addr); +struct ath11k_peer *ath11k_peer_find_by_vdev_id(struct ath11k_base *ab, + int vdev_id); #endif /* _PEER_H_ */ -- GitLab From 9b09456258ea2f35fc8a99c4ac4829dcba0ca4be Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2020 11:31:19 +0300 Subject: [PATCH 0668/1894] ath11k: Fix error code in ath11k_core_suspend() The "if (!ret)" condition is inverted and it should be "if (ret)". It means that we return success when we had intended to return an error code. This also caused a spurious warning even when the suspend was successful: [ 297.186612] ath11k_pci 0000:06:00.0: failed to suspend hif: 0 Fixes: d1b0c33850d2 ("ath11k: implement suspend for QCA6390 PCI devices") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/X9nF17L2/EKOSbn/@mwanda --- drivers/net/wireless/ath/ath11k/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index b97c38b9a2701..350b7913622cb 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -185,7 +185,7 @@ int ath11k_core_suspend(struct ath11k_base *ab) ath11k_hif_ce_irq_disable(ab); ret = ath11k_hif_suspend(ab); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to suspend hif: %d\n", ret); return ret; } -- GitLab From 30d085039314fcad2c2e33a2dfc8e79765ddf408 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2020 11:32:12 +0300 Subject: [PATCH 0669/1894] ath11k: Fix ath11k_pci_fix_l1ss() All these conditions are reversed so presumably most of the function is dead code. This caused a spurious warning: [ 95.734922] ath11k_pci 0000:06:00.0: failed to set sysclk: 0 Fixes: 0699940755e9 ("ath11k: pci: fix L1ss clock unstable problem") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/X9nGDHiTh+Z+asDy@mwanda --- drivers/net/wireless/ath/ath11k/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 857647aa57c8a..9f9a824a4c2d6 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -274,7 +274,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG, PCIE_QSERDES_COM_SYSCLK_EN_SEL_VAL, PCIE_QSERDES_COM_SYSCLK_EN_SEL_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set sysclk: %d\n", ret); return ret; } @@ -283,7 +283,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_REG, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_VAL, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set dtct config1 error: %d\n", ret); return ret; } @@ -292,7 +292,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_REG, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_VAL, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set dtct config2: %d\n", ret); return ret; } @@ -301,7 +301,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_REG, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_VAL, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set dtct config4: %d\n", ret); return ret; } -- GitLab From 2aeff9d8c8e69edd268e06a79771d71d5f46b4a3 Mon Sep 17 00:00:00 2001 From: Weiyi Lu Date: Fri, 13 Nov 2020 16:29:52 +0800 Subject: [PATCH 0670/1894] clk: mediatek: Make mtk_clk_register_mux() a static function mtk_clk_register_mux() should be a static function Fixes: a3ae549917f16 ("clk: mediatek: Add new clkmux register API") Signed-off-by: Weiyi Lu Link: https://lore.kernel.org/r/1605256192-31307-1-git-send-email-weiyi.lu@mediatek.com Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mux.c | 2 +- drivers/clk/mediatek/clk-mux.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/clk/mediatek/clk-mux.c b/drivers/clk/mediatek/clk-mux.c index 14e127e9a7404..dcc1352bf13cf 100644 --- a/drivers/clk/mediatek/clk-mux.c +++ b/drivers/clk/mediatek/clk-mux.c @@ -155,7 +155,7 @@ const struct clk_ops mtk_mux_gate_clr_set_upd_ops = { .set_parent = mtk_clk_mux_set_parent_setclr_lock, }; -struct clk *mtk_clk_register_mux(const struct mtk_mux *mux, +static struct clk *mtk_clk_register_mux(const struct mtk_mux *mux, struct regmap *regmap, spinlock_t *lock) { diff --git a/drivers/clk/mediatek/clk-mux.h b/drivers/clk/mediatek/clk-mux.h index f5625f4d9e6c6..8e2f927dd2fff 100644 --- a/drivers/clk/mediatek/clk-mux.h +++ b/drivers/clk/mediatek/clk-mux.h @@ -77,10 +77,6 @@ extern const struct clk_ops mtk_mux_gate_clr_set_upd_ops; _width, _gate, _upd_ofs, _upd, \ CLK_SET_RATE_PARENT) -struct clk *mtk_clk_register_mux(const struct mtk_mux *mux, - struct regmap *regmap, - spinlock_t *lock); - int mtk_clk_register_muxes(const struct mtk_mux *muxes, int num, struct device_node *node, spinlock_t *lock, -- GitLab From 150927c3674d7db4dd51a7269e01423c8c78e53b Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 17 Dec 2020 16:52:44 +0800 Subject: [PATCH 0671/1894] ALSA: hda/realtek - Supported Dell fixed type headset This platform only supported iphone type headset. It can't support Dell headset mode. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/b97e971978034bc9b772a08ec91265e8@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 71da03f950cc0..c203eba8d9ff2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7881,6 +7881,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a58, "Dell Precision 3650 Tower", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -- GitLab From 74c64efa1557fef731b59eb813f115436d18078e Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 18 Dec 2020 00:15:47 +0800 Subject: [PATCH 0672/1894] ALSA: core: memalloc: add page alignment for iram Since mmap for userspace is based on page alignment, add page alignment for iram alloc from pool, otherwise, some good data located in the same page of dmab->area maybe touched wrongly by userspace like pulseaudio. Signed-off-by: Robin Gong Cc: Link: https://lore.kernel.org/r/1608221747-3474-1-git-send-email-yibin.gong@nxp.com Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 0aeeb6244ff6c..0f335162f87c7 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -77,7 +77,8 @@ static void snd_malloc_dev_iram(struct snd_dma_buffer *dmab, size_t size) /* Assign the pool into private_data field */ dmab->private_data = pool; - dmab->area = gen_pool_dma_alloc(pool, size, &dmab->addr); + dmab->area = gen_pool_dma_alloc_align(pool, size, &dmab->addr, + PAGE_SIZE); } /** -- GitLab From 725124d10d00b2f56bb5bd08b431cc74ab3b3ace Mon Sep 17 00:00:00 2001 From: Amadej Kastelic Date: Tue, 15 Dec 2020 19:09:05 +0100 Subject: [PATCH 0673/1894] ALSA: usb-audio: Add VID to support native DSD reproduction on FiiO devices Add VID to support native DSD reproduction on FiiO devices. Tested-by: Amadej Kastelic Signed-off-by: Emilio Moretti Signed-off-by: Amadej Kastelic Cc: Link: https://lore.kernel.org/r/X9j7wdXSr4XyK7Bd@ryzen.localdomain Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 63cdf3c8c2bc3..e4a690bb4c996 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1771,6 +1771,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x25ce: /* Mytek devices */ case 0x278b: /* Rotel? */ case 0x292b: /* Gustard/Ess based devices */ + case 0x2972: /* FiiO devices */ case 0x2ab6: /* T+A devices */ case 0x3353: /* Khadas devices */ case 0x3842: /* EVGA */ -- GitLab From bf6d43d7232511d4aadb634ec97fcbe5d49ee120 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 15 Nov 2020 23:34:32 +0300 Subject: [PATCH 0674/1894] clk: Add hardware-enable column to clk summary Add "hardware enable" column to the clk summary in order to show actual hardware enable-state of all clocks. The possible states are "Y/N/?", where question mark means that state is unknown, i.e. clock isn't a mux and clk-driver doesn't support is_enabled() callback for this clock. In conjunction with clk_ignore_unused, this tells us what unused clocks are left enabled after bootloader. This is also a useful aid for debugging interactions with firmware which changes clock states without notifying kernel. Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20201115203432.13934-1-digetx@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f83dac54ed853..c2cbf36186801 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2931,7 +2931,14 @@ static void clk_summary_show_one(struct seq_file *s, struct clk_core *c, else seq_puts(s, "-----"); - seq_printf(s, " %6d\n", clk_core_get_scaled_duty_cycle(c, 100000)); + seq_printf(s, " %6d", clk_core_get_scaled_duty_cycle(c, 100000)); + + if (c->ops->is_enabled) + seq_printf(s, " %9c\n", clk_core_is_enabled(c) ? 'Y' : 'N'); + else if (!c->ops->enable) + seq_printf(s, " %9c\n", 'Y'); + else + seq_printf(s, " %9c\n", '?'); } static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, @@ -2950,9 +2957,9 @@ static int clk_summary_show(struct seq_file *s, void *data) struct clk_core *c; struct hlist_head **lists = (struct hlist_head **)s->private; - seq_puts(s, " enable prepare protect duty\n"); - seq_puts(s, " clock count count count rate accuracy phase cycle\n"); - seq_puts(s, "---------------------------------------------------------------------------------------------\n"); + seq_puts(s, " enable prepare protect duty hardware\n"); + seq_puts(s, " clock count count count rate accuracy phase cycle enable\n"); + seq_puts(s, "-------------------------------------------------------------------------------------------------------\n"); clk_prepare_lock(); -- GitLab From 8c6239f6e95f583bb763d0228e02d4dd0fb3d492 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 13 Nov 2020 21:16:23 +0800 Subject: [PATCH 0675/1894] clk: ti: Fix memleak in ti_fapll_synth_setup If clk_register fails, we should goto free branch before function returns to prevent memleak. Fixes: 163152cbbe321 ("clk: ti: Add support for FAPLL on dm816x") Reported-by: Hulk Robot Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201113131623.2098222-1-zhangqilong3@huawei.com Acked-by: Tony Lindgren Signed-off-by: Stephen Boyd --- drivers/clk/ti/fapll.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/clk/ti/fapll.c b/drivers/clk/ti/fapll.c index 95e36ba64accf..8024c6d2b9e95 100644 --- a/drivers/clk/ti/fapll.c +++ b/drivers/clk/ti/fapll.c @@ -498,6 +498,7 @@ static struct clk * __init ti_fapll_synth_setup(struct fapll_data *fd, { struct clk_init_data *init; struct fapll_synth *synth; + struct clk *clk = ERR_PTR(-ENOMEM); init = kzalloc(sizeof(*init), GFP_KERNEL); if (!init) @@ -520,13 +521,19 @@ static struct clk * __init ti_fapll_synth_setup(struct fapll_data *fd, synth->hw.init = init; synth->clk_pll = pll_clk; - return clk_register(NULL, &synth->hw); + clk = clk_register(NULL, &synth->hw); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + goto free; + } + + return clk; free: kfree(synth); kfree(init); - return ERR_PTR(-ENOMEM); + return clk; } static void __init ti_fapll_setup(struct device_node *node) -- GitLab From 66cc7af38149fefd2d8c9a47a456884bd2105c9d Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 6 Dec 2020 12:18:51 +0100 Subject: [PATCH 0676/1894] clk: ti: omap5: Fix reboot DPLL lock failure when using ABE TIMERs Having the ABE DPLL ref and bypass muxes set to different inputs was causing the DPLL not to lock when TIMER8 was used, as it is in the Pyra for the backlight. This patch fixes this by setting abe_dpll_bypass_clk_mux to sys_32k_ck in omap5xxx_dt_clk_init. A similar patch may also be needed for OMAP44xx which has similar code in omap4xxx_dt_clk_init, but I have not added this as I have no hardware to test on. Signed-off-by: David Shah Signed-off-by: H. Nikolaus Schaller Link: https://lore.kernel.org/r/1d3abe2512054866cc2ea7b2592238f4fa06502a.1607253531.git.hns@goldelico.com Acked-by: Tony Lindgren Signed-off-by: Stephen Boyd --- drivers/clk/ti/clk-54xx.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c index 8694bc9f5fc7f..f0542391ca4bd 100644 --- a/drivers/clk/ti/clk-54xx.c +++ b/drivers/clk/ti/clk-54xx.c @@ -605,7 +605,7 @@ static struct ti_dt_clk omap54xx_clks[] = { int __init omap5xxx_dt_clk_init(void) { int rc; - struct clk *abe_dpll_ref, *abe_dpll, *sys_32k_ck, *usb_dpll; + struct clk *abe_dpll_ref, *abe_dpll, *abe_dpll_byp, *sys_32k_ck, *usb_dpll; ti_dt_clocks_register(omap54xx_clks); @@ -616,6 +616,16 @@ int __init omap5xxx_dt_clk_init(void) abe_dpll_ref = clk_get_sys(NULL, "abe_dpll_clk_mux"); sys_32k_ck = clk_get_sys(NULL, "sys_32k_ck"); rc = clk_set_parent(abe_dpll_ref, sys_32k_ck); + + /* + * This must also be set to sys_32k_ck to match or + * the ABE DPLL will not lock on a warm reboot when + * ABE timers are used. + */ + abe_dpll_byp = clk_get_sys(NULL, "abe_dpll_bypass_clk_mux"); + if (!rc) + rc = clk_set_parent(abe_dpll_byp, sys_32k_ck); + abe_dpll = clk_get_sys(NULL, "dpll_abe_ck"); if (!rc) rc = clk_set_rate(abe_dpll, OMAP5_DPLL_ABE_DEFFREQ); -- GitLab From bd91abb218e0ac4a7402d6c25d383e2a706bb511 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 13 Oct 2020 17:34:20 +0300 Subject: [PATCH 0677/1894] dt-bindings: clock: adi,axi-clkgen: convert old binding to yaml format This change converts the old binding for the AXI clkgen driver to a yaml format. As maintainers, added: - Lars-Peter Clausen - as original author of driver & binding - Michael Hennerich - as supporter of Analog Devices drivers Acked-by: Michael Hennerich Acked-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201013143421.84188-1-alexandru.ardelean@analog.com Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../bindings/clock/adi,axi-clkgen.yaml | 53 +++++++++++++++++++ .../devicetree/bindings/clock/axi-clkgen.txt | 25 --------- 2 files changed, 53 insertions(+), 25 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml delete mode 100644 Documentation/devicetree/bindings/clock/axi-clkgen.txt diff --git a/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml new file mode 100644 index 0000000000000..0d06387184d68 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/adi,axi-clkgen.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Binding for Analog Devices AXI clkgen pcore clock generator + +maintainers: + - Lars-Peter Clausen + - Michael Hennerich + +description: | + The axi_clkgen IP core is a software programmable clock generator, + that can be synthesized on various FPGA platforms. + + Link: https://wiki.analog.com/resources/fpga/docs/axi_clkgen + +properties: + compatible: + enum: + - adi,axi-clkgen-2.00.a + + clocks: + description: + Specifies the reference clock(s) from which the output frequency is + derived. This must either reference one clock if only the first clock + input is connected or two if both clock inputs are connected. + minItems: 1 + maxItems: 2 + + '#clock-cells': + const: 0 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - '#clock-cells' + +additionalProperties: false + +examples: + - | + clock-controller@ff000000 { + compatible = "adi,axi-clkgen-2.00.a"; + #clock-cells = <0>; + reg = <0xff000000 0x1000>; + clocks = <&osc 1>; + }; diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt deleted file mode 100644 index aca94fe9416f0..0000000000000 --- a/Documentation/devicetree/bindings/clock/axi-clkgen.txt +++ /dev/null @@ -1,25 +0,0 @@ -Binding for the axi-clkgen clock generator - -This binding uses the common clock binding[1]. - -[1] Documentation/devicetree/bindings/clock/clock-bindings.txt - -Required properties: -- compatible : shall be "adi,axi-clkgen-1.00.a" or "adi,axi-clkgen-2.00.a". -- #clock-cells : from common clock binding; Should always be set to 0. -- reg : Address and length of the axi-clkgen register set. -- clocks : Phandle and clock specifier for the parent clock(s). This must - either reference one clock if only the first clock input is connected or two - if both clock inputs are connected. For the later case the clock connected - to the first input must be specified first. - -Optional properties: -- clock-output-names : From common clock binding. - -Example: - clock@ff000000 { - compatible = "adi,axi-clkgen"; - #clock-cells = <0>; - reg = <0xff000000 0x1000>; - clocks = <&osc 1>; - }; -- GitLab From ac1ee86a9cdb002b0c130cfbad668dd992a0596a Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 3 Dec 2020 09:40:36 +0200 Subject: [PATCH 0678/1894] clk: axi-clkgen: wrap limits in a struct and keep copy on the state object Up until the these limits were global/hard-coded, since they are typically limits of the fabric. However, since this is an FPGA generated clock, this may run on setups where one clock is on a fabric, and another one synthesized on another fabric connected via PCIe (or some other inter-connect, and then these limits need to be adjusted for each instance of the AXI CLKGEN. This change wraps the current constants in 'axi_clkgen_limits' struct and the 'axi_clkgen' instance keeps a copy of these limits, which is initialized at probe from the default limits. The limits are stored on the device-tree OF table, so that we can adjust them via the compatible string. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201203074037.26940-1-alexandru.ardelean@analog.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-axi-clkgen.c | 48 +++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/clk/clk-axi-clkgen.c b/drivers/clk/clk-axi-clkgen.c index 14d803e6af623..963a62e9c7286 100644 --- a/drivers/clk/clk-axi-clkgen.c +++ b/drivers/clk/clk-axi-clkgen.c @@ -46,9 +46,17 @@ #define MMCM_CLK_DIV_DIVIDE BIT(11) #define MMCM_CLK_DIV_NOCOUNT BIT(12) +struct axi_clkgen_limits { + unsigned int fpfd_min; + unsigned int fpfd_max; + unsigned int fvco_min; + unsigned int fvco_max; +}; + struct axi_clkgen { void __iomem *base; struct clk_hw clk_hw; + struct axi_clkgen_limits limits; }; static uint32_t axi_clkgen_lookup_filter(unsigned int m) @@ -100,12 +108,15 @@ static uint32_t axi_clkgen_lookup_lock(unsigned int m) return 0x1f1f00fa; } -static const unsigned int fpfd_min = 10000; -static const unsigned int fpfd_max = 300000; -static const unsigned int fvco_min = 600000; -static const unsigned int fvco_max = 1200000; +static const struct axi_clkgen_limits axi_clkgen_zynq_default_limits = { + .fpfd_min = 10000, + .fpfd_max = 300000, + .fvco_min = 600000, + .fvco_max = 1200000, +}; -static void axi_clkgen_calc_params(unsigned long fin, unsigned long fout, +static void axi_clkgen_calc_params(const struct axi_clkgen_limits *limits, + unsigned long fin, unsigned long fout, unsigned int *best_d, unsigned int *best_m, unsigned int *best_dout) { unsigned long d, d_min, d_max, _d_min, _d_max; @@ -122,12 +133,12 @@ static void axi_clkgen_calc_params(unsigned long fin, unsigned long fout, *best_m = 0; *best_dout = 0; - d_min = max_t(unsigned long, DIV_ROUND_UP(fin, fpfd_max), 1); - d_max = min_t(unsigned long, fin / fpfd_min, 80); + d_min = max_t(unsigned long, DIV_ROUND_UP(fin, limits->fpfd_max), 1); + d_max = min_t(unsigned long, fin / limits->fpfd_min, 80); again: - fvco_min_fract = fvco_min << fract_shift; - fvco_max_fract = fvco_max << fract_shift; + fvco_min_fract = limits->fvco_min << fract_shift; + fvco_max_fract = limits->fvco_max << fract_shift; m_min = max_t(unsigned long, DIV_ROUND_UP(fvco_min_fract, fin) * d_min, 1); m_max = min_t(unsigned long, fvco_max_fract * d_max / fin, 64 << fract_shift); @@ -319,6 +330,7 @@ static int axi_clkgen_set_rate(struct clk_hw *clk_hw, unsigned long rate, unsigned long parent_rate) { struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(clk_hw); + const struct axi_clkgen_limits *limits = &axi_clkgen->limits; unsigned int d, m, dout; struct axi_clkgen_div_params params; uint32_t power = 0; @@ -328,7 +340,7 @@ static int axi_clkgen_set_rate(struct clk_hw *clk_hw, if (parent_rate == 0 || rate == 0) return -EINVAL; - axi_clkgen_calc_params(parent_rate, rate, &d, &m, &dout); + axi_clkgen_calc_params(limits, parent_rate, rate, &d, &m, &dout); if (d == 0 || dout == 0 || m == 0) return -EINVAL; @@ -368,10 +380,12 @@ static int axi_clkgen_set_rate(struct clk_hw *clk_hw, static long axi_clkgen_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate) { + struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(hw); + const struct axi_clkgen_limits *limits = &axi_clkgen->limits; unsigned int d, m, dout; unsigned long long tmp; - axi_clkgen_calc_params(*parent_rate, rate, &d, &m, &dout); + axi_clkgen_calc_params(limits, *parent_rate, rate, &d, &m, &dout); if (d == 0 || dout == 0 || m == 0) return -EINVAL; @@ -485,6 +499,7 @@ static const struct clk_ops axi_clkgen_ops = { static const struct of_device_id axi_clkgen_ids[] = { { .compatible = "adi,axi-clkgen-2.00.a", + .data = &axi_clkgen_zynq_default_limits, }, { }, }; @@ -492,7 +507,7 @@ MODULE_DEVICE_TABLE(of, axi_clkgen_ids); static int axi_clkgen_probe(struct platform_device *pdev) { - const struct of_device_id *id; + const struct axi_clkgen_limits *dflt_limits; struct axi_clkgen *axi_clkgen; struct clk_init_data init; const char *parent_names[2]; @@ -501,11 +516,8 @@ static int axi_clkgen_probe(struct platform_device *pdev) unsigned int i; int ret; - if (!pdev->dev.of_node) - return -ENODEV; - - id = of_match_node(axi_clkgen_ids, pdev->dev.of_node); - if (!id) + dflt_limits = device_get_match_data(&pdev->dev); + if (!dflt_limits) return -ENODEV; axi_clkgen = devm_kzalloc(&pdev->dev, sizeof(*axi_clkgen), GFP_KERNEL); @@ -527,6 +539,8 @@ static int axi_clkgen_probe(struct platform_device *pdev) return -EINVAL; } + memcpy(&axi_clkgen->limits, dflt_limits, sizeof(axi_clkgen->limits)); + clk_name = pdev->dev.of_node->name; of_property_read_string(pdev->dev.of_node, "clock-output-names", &clk_name); -- GitLab From 16214f97f44321a48985ef37f5ca2d5b2479b8f2 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 3 Dec 2020 09:40:37 +0200 Subject: [PATCH 0679/1894] clk: axi-clkgen: move the OF table at the bottom of the file The change is mostly cosmetic. No functional changes. Since the driver now uses of_device_get_match_data() to obtain some driver specific info, there is no need to define the OF table before the probe function. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201203074037.26940-2-alexandru.ardelean@analog.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-axi-clkgen.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/clk/clk-axi-clkgen.c b/drivers/clk/clk-axi-clkgen.c index 963a62e9c7286..ad86e031ba3ee 100644 --- a/drivers/clk/clk-axi-clkgen.c +++ b/drivers/clk/clk-axi-clkgen.c @@ -496,15 +496,6 @@ static const struct clk_ops axi_clkgen_ops = { .get_parent = axi_clkgen_get_parent, }; -static const struct of_device_id axi_clkgen_ids[] = { - { - .compatible = "adi,axi-clkgen-2.00.a", - .data = &axi_clkgen_zynq_default_limits, - }, - { }, -}; -MODULE_DEVICE_TABLE(of, axi_clkgen_ids); - static int axi_clkgen_probe(struct platform_device *pdev) { const struct axi_clkgen_limits *dflt_limits; @@ -568,6 +559,15 @@ static int axi_clkgen_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id axi_clkgen_ids[] = { + { + .compatible = "adi,axi-clkgen-2.00.a", + .data = &axi_clkgen_zynq_default_limits, + }, + { } +}; +MODULE_DEVICE_TABLE(of, axi_clkgen_ids); + static struct platform_driver axi_clkgen_driver = { .driver = { .name = "adi-axi-clkgen", -- GitLab From 03813d9b7d4368d4a8c9bb8f5a2a1e23dac8f1b5 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 7 Dec 2020 11:50:50 +0100 Subject: [PATCH 0680/1894] clk: Trace clk_set_rate() "range" functions The clk_set_rate "range" functions don't have any tracepoints even though it might be useful. Add some. Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201207105050.2096917-1-maxime@cerno.tech [sboyd@kernel.org: Reword commit text] Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 6 ++++++ include/trace/events/clk.h | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f83dac54ed853..b98415d280c87 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2314,6 +2314,8 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) if (!clk) return 0; + trace_clk_set_rate_range(clk->core, min, max); + if (min > max) { pr_err("%s: clk %s dev %s con %s: invalid range [%lu, %lu]\n", __func__, clk->core->name, clk->dev_id, clk->con_id, @@ -2381,6 +2383,8 @@ int clk_set_min_rate(struct clk *clk, unsigned long rate) if (!clk) return 0; + trace_clk_set_min_rate(clk->core, rate); + return clk_set_rate_range(clk, rate, clk->max_rate); } EXPORT_SYMBOL_GPL(clk_set_min_rate); @@ -2397,6 +2401,8 @@ int clk_set_max_rate(struct clk *clk, unsigned long rate) if (!clk) return 0; + trace_clk_set_max_rate(clk->core, rate); + return clk_set_rate_range(clk, clk->min_rate, rate); } EXPORT_SYMBOL_GPL(clk_set_max_rate); diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h index cb1aea25c199c..e19edc63ee951 100644 --- a/include/trace/events/clk.h +++ b/include/trace/events/clk.h @@ -118,6 +118,50 @@ DEFINE_EVENT(clk_rate, clk_set_rate_complete, TP_ARGS(core, rate) ); +DEFINE_EVENT(clk_rate, clk_set_min_rate, + + TP_PROTO(struct clk_core *core, unsigned long rate), + + TP_ARGS(core, rate) +); + +DEFINE_EVENT(clk_rate, clk_set_max_rate, + + TP_PROTO(struct clk_core *core, unsigned long rate), + + TP_ARGS(core, rate) +); + +DECLARE_EVENT_CLASS(clk_rate_range, + + TP_PROTO(struct clk_core *core, unsigned long min, unsigned long max), + + TP_ARGS(core, min, max), + + TP_STRUCT__entry( + __string( name, core->name ) + __field(unsigned long, min ) + __field(unsigned long, max ) + ), + + TP_fast_assign( + __assign_str(name, core->name); + __entry->min = min; + __entry->max = max; + ), + + TP_printk("%s min %lu max %lu", __get_str(name), + (unsigned long)__entry->min, + (unsigned long)__entry->max) +); + +DEFINE_EVENT(clk_rate_range, clk_set_rate_range, + + TP_PROTO(struct clk_core *core, unsigned long min, unsigned long max), + + TP_ARGS(core, min, max) +); + DECLARE_EVENT_CLASS(clk_parent, TP_PROTO(struct clk_core *core, struct clk_core *parent), -- GitLab From 315fbe4cef98ee5fb6085bc54c7f25eb06466c70 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 17 Dec 2020 13:38:33 +0530 Subject: [PATCH 0681/1894] ASoC: qcom: Fix incorrect volatile registers MI2S and DMA control registers are not volatile, so remove these from volatile registers list. Registers reset state check by reading non volatile registers makes no use, so remove error check from cpu and platform trigger callbacks. Initialized map variable two times in lpass platform trigger API, so remove redundant initialization. Fixes commit b1824968221cc ("ASoC: qcom: Fix enabling BCLK and LRCLK in LPAIF invalid state") Signed-off-by: V Sujith Kumar Reddy Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/1608192514-29695-2-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cpu.c | 20 ++------------------ sound/soc/qcom/lpass-platform.c | 15 --------------- 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index af684fd19ab9e..c5e99c2d89c7e 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -270,18 +270,6 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, struct lpaif_i2sctl *i2sctl = drvdata->i2sctl; unsigned int id = dai->driver->id; int ret = -EINVAL; - unsigned int val = 0; - - ret = regmap_read(drvdata->lpaif_map, - LPAIF_I2SCTL_REG(drvdata->variant, dai->driver->id), &val); - if (ret) { - dev_err(dai->dev, "error reading from i2sctl reg: %d\n", ret); - return ret; - } - if (val == LPAIF_I2SCTL_RESET_STATE) { - dev_err(dai->dev, "error in i2sctl register state\n"); - return -ENOTRECOVERABLE; - } switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -454,20 +442,16 @@ static bool lpass_cpu_regmap_volatile(struct device *dev, unsigned int reg) struct lpass_variant *v = drvdata->variant; int i; - for (i = 0; i < v->i2s_ports; ++i) - if (reg == LPAIF_I2SCTL_REG(v, i)) - return true; for (i = 0; i < v->irq_ports; ++i) if (reg == LPAIF_IRQSTAT_REG(v, i)) return true; for (i = 0; i < v->rdma_channels; ++i) - if (reg == LPAIF_RDMACURR_REG(v, i) || reg == LPAIF_RDMACTL_REG(v, i)) + if (reg == LPAIF_RDMACURR_REG(v, i)) return true; for (i = 0; i < v->wrdma_channels; ++i) - if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start) || - reg == LPAIF_WRDMACTL_REG(v, i + v->wrdma_channel_start)) + if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start)) return true; return false; diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 80b09dede5f9c..232deee6fde56 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -452,7 +452,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, unsigned int reg_irqclr = 0, val_irqclr = 0; unsigned int reg_irqen = 0, val_irqen = 0, val_mask = 0; unsigned int dai_id = cpu_dai->driver->id; - unsigned int dma_ctrl_reg = 0; ch = pcm_data->dma_ch; if (dir == SNDRV_PCM_STREAM_PLAYBACK) { @@ -469,17 +468,7 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, id = pcm_data->dma_ch - v->wrdma_channel_start; map = drvdata->lpaif_map; } - ret = regmap_read(map, LPAIF_DMACTL_REG(v, ch, dir, dai_id), &dma_ctrl_reg); - if (ret) { - dev_err(soc_runtime->dev, "error reading from rdmactl reg: %d\n", ret); - return ret; - } - if (dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE || - dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE + 1) { - dev_err(soc_runtime->dev, "error in rdmactl register state\n"); - return -ENOTRECOVERABLE; - } switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: @@ -500,7 +489,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, "error writing to rdmactl reg: %d\n", ret); return ret; } - map = drvdata->hdmiif_map; reg_irqclr = LPASS_HDMITX_APP_IRQCLEAR_REG(v); val_irqclr = (LPAIF_IRQ_ALL(ch) | LPAIF_IRQ_HDMI_REQ_ON_PRELOAD(ch) | @@ -519,7 +507,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: - map = drvdata->lpaif_map; reg_irqclr = LPAIF_IRQCLEAR_REG(v, LPAIF_IRQ_PORT_HOST); val_irqclr = LPAIF_IRQ_ALL(ch); @@ -563,7 +550,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, "error writing to rdmactl reg: %d\n", ret); return ret; } - map = drvdata->hdmiif_map; reg_irqen = LPASS_HDMITX_APP_IRQEN_REG(v); val_mask = (LPAIF_IRQ_ALL(ch) | LPAIF_IRQ_HDMI_REQ_ON_PRELOAD(ch) | @@ -573,7 +559,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: - map = drvdata->lpaif_map; reg_irqen = LPAIF_IRQEN_REG(v, LPAIF_IRQ_PORT_HOST); val_mask = LPAIF_IRQ_ALL(ch); val_irqen = 0; -- GitLab From 8d1bfc04c97407767559f6389a0f0fb060cbe25e Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 17 Dec 2020 13:38:34 +0530 Subject: [PATCH 0682/1894] ASoC: qcom: Add support for playback recover after resume To support playback continuation after hard suspend(bypass powerd) and resume do regcache sync with component driver pm ops. Signed-off-by: V Sujith Kumar Reddy Signed-off-by: Srinivasa Rao Mandadapu Reviewed-by: Srinivas Kandagatla Tested-by: Steev Klimaszewski Link: https://lore.kernel.org/r/1608192514-29695-3-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-platform.c | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 232deee6fde56..d1c248590f3ab 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -823,6 +823,39 @@ static void lpass_platform_pcm_free(struct snd_soc_component *component, } } +static int lpass_platform_pcmops_suspend(struct snd_soc_component *component) +{ + struct lpass_data *drvdata = snd_soc_component_get_drvdata(component); + struct regmap *map; + unsigned int dai_id = component->id; + + if (dai_id == LPASS_DP_RX) + map = drvdata->hdmiif_map; + else + map = drvdata->lpaif_map; + + regcache_cache_only(map, true); + regcache_mark_dirty(map); + + return 0; +} + +static int lpass_platform_pcmops_resume(struct snd_soc_component *component) +{ + struct lpass_data *drvdata = snd_soc_component_get_drvdata(component); + struct regmap *map; + unsigned int dai_id = component->id; + + if (dai_id == LPASS_DP_RX) + map = drvdata->hdmiif_map; + else + map = drvdata->lpaif_map; + + regcache_cache_only(map, false); + return regcache_sync(map); +} + + static const struct snd_soc_component_driver lpass_component_driver = { .name = DRV_NAME, .open = lpass_platform_pcmops_open, @@ -835,6 +868,8 @@ static const struct snd_soc_component_driver lpass_component_driver = { .mmap = lpass_platform_pcmops_mmap, .pcm_construct = lpass_platform_pcm_new, .pcm_destruct = lpass_platform_pcm_free, + .suspend = lpass_platform_pcmops_suspend, + .resume = lpass_platform_pcmops_resume, }; -- GitLab From 61c7dbec33777ade95d3db58beec8d7f177868c8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 17 Dec 2020 11:28:49 +0900 Subject: [PATCH 0683/1894] ASoC: rsnd: don't call clk_disable_unprepare() if can't use We need to care clock accessibility, because we might can't use clock for some reasons. It sets clk_rate for each clocks when enabled. This means it doesn't have clk_rate if we can't use. We can avoid to call clk_disable_unprepare() in such case. Link: https://lore.kernel.org/r/CAMuHMdWvB+p=2JqTsO7bR8uJqKqO5A2XgXFXsVAjHk3hcxgcTw@mail.gmail.com Reported-by: Geert Uytterhoeven Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/87eejpgoi9.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/sh/rcar/adg.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index b9aacf3d3b29c..abdfd9cf91e2a 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -366,25 +366,27 @@ void rsnd_adg_clk_control(struct rsnd_priv *priv, int enable) struct rsnd_adg *adg = rsnd_priv_to_adg(priv); struct device *dev = rsnd_priv_to_dev(priv); struct clk *clk; - int i, ret; + int i; for_each_rsnd_clk(clk, adg, i) { - ret = 0; if (enable) { - ret = clk_prepare_enable(clk); + int ret = clk_prepare_enable(clk); /* * We shouldn't use clk_get_rate() under * atomic context. Let's keep it when * rsnd_adg_clk_enable() was called */ - adg->clk_rate[i] = clk_get_rate(adg->clk[i]); + adg->clk_rate[i] = 0; + if (ret < 0) + dev_warn(dev, "can't use clk %d\n", i); + else + adg->clk_rate[i] = clk_get_rate(clk); } else { - clk_disable_unprepare(clk); + if (adg->clk_rate[i]) + clk_disable_unprepare(clk); + adg->clk_rate[i] = 0; } - - if (ret < 0) - dev_warn(dev, "can't use clk %d\n", i); } } -- GitLab From bb224c3e3e41d940612d4cc9573289cdbd5cb8f5 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 17 Dec 2020 11:54:01 +0100 Subject: [PATCH 0684/1894] ASoC: Intel: haswell: Add missing pm_ops haswell machine board is missing pm_ops what prevents it from undergoing suspend-resume procedure successfully. Assign default snd_soc_pm_ops so this is no longer the case. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20201217105401.27865-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/haswell.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index c55d1239e705b..c763bfeb1f38f 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -189,6 +189,7 @@ static struct platform_driver haswell_audio = { .probe = haswell_audio_probe, .driver = { .name = "haswell-audio", + .pm = &snd_soc_pm_ops, }, }; -- GitLab From 6108f990c0887d3e8f1db2d13c7012e40a061f28 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Thu, 17 Dec 2020 16:56:51 +0800 Subject: [PATCH 0685/1894] ASoC: rt711: mutex between calibration and power state changes To avoid calibration time-out, this patch adds the mutex between calibration and power state changes Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20201217085651.24580-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt711.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 5771c02c34596..85f744184a60f 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -462,6 +462,8 @@ static int rt711_set_amp_gain_put(struct snd_kcontrol *kcontrol, unsigned int read_ll, read_rl; int i; + mutex_lock(&rt711->calibrate_mutex); + /* Can't use update bit function, so read the original value first */ addr_h = mc->reg; addr_l = mc->rreg; @@ -547,6 +549,8 @@ static int rt711_set_amp_gain_put(struct snd_kcontrol *kcontrol, if (dapm->bias_level <= SND_SOC_BIAS_STANDBY) regmap_write(rt711->regmap, RT711_SET_AUDIO_POWER_STATE, AC_PWRST_D3); + + mutex_unlock(&rt711->calibrate_mutex); return 0; } @@ -859,9 +863,11 @@ static int rt711_set_bias_level(struct snd_soc_component *component, break; case SND_SOC_BIAS_STANDBY: + mutex_lock(&rt711->calibrate_mutex); regmap_write(rt711->regmap, RT711_SET_AUDIO_POWER_STATE, AC_PWRST_D3); + mutex_unlock(&rt711->calibrate_mutex); break; default: -- GitLab From 349dd23931d1943b1083182e35715eba8b150fe1 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 17 Dec 2020 15:45:56 +0800 Subject: [PATCH 0686/1894] ASoC: max98373: don't access volatile registers in bias level off We will set regcache_cache_only true in suspend. As a result, regmap_read will return error when we try to read volatile registers in suspend. Besides, it doesn't make sense to read feedback data when codec is not active. To make userspace happy, this patch returns a cached value shich should be a valid value. Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20201217074556.32370-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98373-i2c.c | 20 +++++++++++++++++++ sound/soc/codecs/max98373-sdw.c | 20 +++++++++++++++++++ sound/soc/codecs/max98373.c | 34 ++++++++++++++++++++++++++++++--- sound/soc/codecs/max98373.h | 8 ++++++++ 4 files changed, 79 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c index 92921e34f9486..85f6865019d4a 100644 --- a/sound/soc/codecs/max98373-i2c.c +++ b/sound/soc/codecs/max98373-i2c.c @@ -19,6 +19,12 @@ #include #include "max98373.h" +static const u32 max98373_i2c_cache_reg[] = { + MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, + MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, + MAX98373_R20B6_BDE_CUR_STATE_READBACK, +}; + static struct reg_default max98373_reg[] = { {MAX98373_R2000_SW_RESET, 0x00}, {MAX98373_R2001_INT_RAW1, 0x00}, @@ -472,6 +478,11 @@ static struct snd_soc_dai_driver max98373_dai[] = { static int max98373_suspend(struct device *dev) { struct max98373_priv *max98373 = dev_get_drvdata(dev); + int i; + + /* cache feedback register values before suspend */ + for (i = 0; i < max98373->cache_num; i++) + regmap_read(max98373->regmap, max98373->cache[i].reg, &max98373->cache[i].val); regcache_cache_only(max98373->regmap, true); regcache_mark_dirty(max98373->regmap); @@ -509,6 +520,7 @@ static int max98373_i2c_probe(struct i2c_client *i2c, { int ret = 0; int reg = 0; + int i; struct max98373_priv *max98373 = NULL; max98373 = devm_kzalloc(&i2c->dev, sizeof(*max98373), GFP_KERNEL); @@ -534,6 +546,14 @@ static int max98373_i2c_probe(struct i2c_client *i2c, return ret; } + max98373->cache_num = ARRAY_SIZE(max98373_i2c_cache_reg); + max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num, + sizeof(*max98373->cache), + GFP_KERNEL); + + for (i = 0; i < max98373->cache_num; i++) + max98373->cache[i].reg = max98373_i2c_cache_reg[i]; + /* voltage/current slot & gpio configuration */ max98373_slot_config(&i2c->dev, max98373); diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c index ec2e79c573577..b8d471d79e939 100644 --- a/sound/soc/codecs/max98373-sdw.c +++ b/sound/soc/codecs/max98373-sdw.c @@ -23,6 +23,12 @@ struct sdw_stream_data { struct sdw_stream_runtime *sdw_stream; }; +static const u32 max98373_sdw_cache_reg[] = { + MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, + MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, + MAX98373_R20B6_BDE_CUR_STATE_READBACK, +}; + static struct reg_default max98373_reg[] = { {MAX98373_R0040_SCP_INIT_STAT_1, 0x00}, {MAX98373_R0041_SCP_INIT_MASK_1, 0x00}, @@ -245,6 +251,11 @@ static const struct regmap_config max98373_sdw_regmap = { static __maybe_unused int max98373_suspend(struct device *dev) { struct max98373_priv *max98373 = dev_get_drvdata(dev); + int i; + + /* cache feedback register values before suspend */ + for (i = 0; i < max98373->cache_num; i++) + regmap_read(max98373->regmap, max98373->cache[i].reg, &max98373->cache[i].val); regcache_cache_only(max98373->regmap, true); @@ -757,6 +768,7 @@ static int max98373_init(struct sdw_slave *slave, struct regmap *regmap) { struct max98373_priv *max98373; int ret; + int i; struct device *dev = &slave->dev; /* Allocate and assign private driver data structure */ @@ -768,6 +780,14 @@ static int max98373_init(struct sdw_slave *slave, struct regmap *regmap) max98373->regmap = regmap; max98373->slave = slave; + max98373->cache_num = ARRAY_SIZE(max98373_sdw_cache_reg); + max98373->cache = devm_kcalloc(dev, max98373->cache_num, + sizeof(*max98373->cache), + GFP_KERNEL); + + for (i = 0; i < max98373->cache_num; i++) + max98373->cache[i].reg = max98373_sdw_cache_reg[i]; + /* Read voltage and slot configuration */ max98373_slot_config(dev, max98373); diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index 929bb1798c43f..31d571d4fac1c 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -168,6 +168,31 @@ static SOC_ENUM_SINGLE_DECL(max98373_adc_samplerate_enum, MAX98373_R2051_MEAS_ADC_SAMPLING_RATE, 0, max98373_ADC_samplerate_text); +static int max98373_feedback_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + struct max98373_priv *max98373 = snd_soc_component_get_drvdata(component); + int i; + + if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) { + /* + * Register values will be cached before suspend. The cached value + * will be a valid value and userspace will happy with that. + */ + for (i = 0; i < max98373->cache_num; i++) { + if (mc->reg == max98373->cache[i].reg) { + ucontrol->value.integer.value[0] = max98373->cache[i].val; + return 0; + } + } + } + + return snd_soc_put_volsw(kcontrol, ucontrol); +} + static const struct snd_kcontrol_new max98373_snd_controls[] = { SOC_SINGLE("Digital Vol Sel Switch", MAX98373_R203F_AMP_DSP_CFG, MAX98373_AMP_VOL_SEL_SHIFT, 1, 0), @@ -209,8 +234,10 @@ SOC_SINGLE("ADC PVDD FLT Switch", MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG, MAX98373_FLT_EN_SHIFT, 1, 0), SOC_SINGLE("ADC TEMP FLT Switch", MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG, MAX98373_FLT_EN_SHIFT, 1, 0), -SOC_SINGLE("ADC PVDD", MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, 0, 0xFF, 0), -SOC_SINGLE("ADC TEMP", MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, 0, 0xFF, 0), +SOC_SINGLE_EXT("ADC PVDD", MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, 0, 0xFF, 0, + max98373_feedback_get, NULL), +SOC_SINGLE_EXT("ADC TEMP", MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, 0, 0xFF, 0, + max98373_feedback_get, NULL), SOC_SINGLE("ADC PVDD FLT Coeff", MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG, 0, 0x3, 0), SOC_SINGLE("ADC TEMP FLT Coeff", MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG, @@ -226,7 +253,8 @@ SOC_SINGLE("BDE LVL1 Thresh", MAX98373_R2097_BDE_L1_THRESH, 0, 0xFF, 0), SOC_SINGLE("BDE LVL2 Thresh", MAX98373_R2098_BDE_L2_THRESH, 0, 0xFF, 0), SOC_SINGLE("BDE LVL3 Thresh", MAX98373_R2099_BDE_L3_THRESH, 0, 0xFF, 0), SOC_SINGLE("BDE LVL4 Thresh", MAX98373_R209A_BDE_L4_THRESH, 0, 0xFF, 0), -SOC_SINGLE("BDE Active Level", MAX98373_R20B6_BDE_CUR_STATE_READBACK, 0, 8, 0), +SOC_SINGLE_EXT("BDE Active Level", MAX98373_R20B6_BDE_CUR_STATE_READBACK, 0, 8, 0, + max98373_feedback_get, NULL), SOC_SINGLE("BDE Clip Mode Switch", MAX98373_R2092_BDE_CLIPPER_MODE, 0, 1, 0), SOC_SINGLE("BDE Thresh Hysteresis", MAX98373_R209B_BDE_THRESH_HYST, 0, 0xFF, 0), SOC_SINGLE("BDE Hold Time", MAX98373_R2090_BDE_LVL_HOLD, 0, 0xFF, 0), diff --git a/sound/soc/codecs/max98373.h b/sound/soc/codecs/max98373.h index 4ab29b9d51c74..71f5a5228f34b 100644 --- a/sound/soc/codecs/max98373.h +++ b/sound/soc/codecs/max98373.h @@ -203,6 +203,11 @@ /* MAX98373_R2000_SW_RESET */ #define MAX98373_SOFT_RESET (0x1 << 0) +struct max98373_cache { + u32 reg; + u32 val; +}; + struct max98373_priv { struct regmap *regmap; int reset_gpio; @@ -212,6 +217,9 @@ struct max98373_priv { bool interleave_mode; unsigned int ch_size; bool tdm_mode; + /* cache for reading a valid fake feedback value */ + struct max98373_cache *cache; + int cache_num; /* variables to support soundwire */ struct sdw_slave *slave; bool hw_init; -- GitLab From 4bc4a912534a72f1c96f483448f0be16e5a48063 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Dec 2020 07:53:33 -0700 Subject: [PATCH 0687/1894] io_uring: hold mmap_sem for mm->locked_vm manipulation The kernel doesn't seem to have clear rules around this, but various spots are using the mmap_sem to serialize access to modifying the locked_vm count. Play it safe and lock the mm for write when accounting or unaccounting locked memory. Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6a4560c9ed9ac..2d07d35e7262a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8157,10 +8157,13 @@ static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, __io_unaccount_mem(ctx->user, nr_pages); if (ctx->mm_account) { - if (acct == ACCT_LOCKED) + if (acct == ACCT_LOCKED) { + mmap_write_lock(ctx->mm_account); ctx->mm_account->locked_vm -= nr_pages; - else if (acct == ACCT_PINNED) + mmap_write_unlock(ctx->mm_account); + }else if (acct == ACCT_PINNED) { atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); + } } } @@ -8176,10 +8179,13 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, } if (ctx->mm_account) { - if (acct == ACCT_LOCKED) + if (acct == ACCT_LOCKED) { + mmap_write_lock(ctx->mm_account); ctx->mm_account->locked_vm += nr_pages; - else if (acct == ACCT_PINNED) + mmap_write_unlock(ctx->mm_account); + } else if (acct == ACCT_PINNED) { atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); + } } return 0; -- GitLab From 1aba169e770911fb2afa63eb859883c4de2191e3 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 17 Dec 2020 00:58:47 -0800 Subject: [PATCH 0688/1894] nbd: Respect max_part for all partition scans The creation path of the NBD device respects max_part and only scans for partitions if max_part is not 0. However, some other code paths ignore max_part, and unconditionally scan for partitions. Add a check for max_part on each partition scan. Signed-off-by: Josh Triplett Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 92f84ed0ba9eb..6727358e147dd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -318,7 +318,8 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, blk_queue_logical_block_size(nbd->disk->queue, blksize); blk_queue_physical_block_size(nbd->disk->queue, blksize); - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); if (!set_capacity_and_notify(nbd->disk, bytesize >> 9)) kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); return 0; @@ -1476,9 +1477,11 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } else if (nbd_disconnected(nbd->config)) { - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } out: mutex_unlock(&nbd_index_mutex); -- GitLab From 76efc1c770968d6c786e5340029f8005ed29b2a5 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 10 Dec 2020 18:56:44 +0800 Subject: [PATCH 0689/1894] blk-iocost: Add iocg idle state tracepoint It will be helpful to trace the iocg's whole state, including active and idle state. And we can easily expand the original iocost_iocg_activate trace event to support a state trace class, including active and idle state tracing. Acked-by: Tejun Heo Signed-off-by: Baolin Wang Signed-off-by: Jens Axboe --- block/blk-iocost.c | 3 +++ include/trace/events/iocost.h | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index ffa418c0dcb1c..ac6078a349394 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2185,6 +2185,9 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) WEIGHT_ONE); } + TRACE_IOCG_PATH(iocg_idle, iocg, now, + atomic64_read(&iocg->active_period), + atomic64_read(&ioc->cur_period), vtime); __propagate_weights(iocg, 0, 0, false, now); list_del_init(&iocg->active_list); } diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index 0b6869980ba22..e282ce02fa2d6 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -11,7 +11,7 @@ struct ioc_gq; #include -TRACE_EVENT(iocost_iocg_activate, +DECLARE_EVENT_CLASS(iocost_iocg_state, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u64 last_period, u64 cur_period, u64 vtime), @@ -59,6 +59,20 @@ TRACE_EVENT(iocost_iocg_activate, ) ); +DEFINE_EVENT(iocost_iocg_state, iocost_iocg_activate, + TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, + u64 last_period, u64 cur_period, u64 vtime), + + TP_ARGS(iocg, path, now, last_period, cur_period, vtime) +); + +DEFINE_EVENT(iocost_iocg_state, iocost_iocg_idle, + TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, + u64 last_period, u64 cur_period, u64 vtime), + + TP_ARGS(iocg, path, now, last_period, cur_period, vtime) +); + DECLARE_EVENT_CLASS(iocg_inuse_update, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, -- GitLab From cda286f0715c82f8117e166afd42cca068876dde Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Dec 2020 00:24:35 +0000 Subject: [PATCH 0690/1894] io_uring: cancel reqs shouldn't kill overflow list io_uring_cancel_task_requests() doesn't imply that the ring is going away, it may continue to work well after that. The problem is that it sets ->cq_overflow_flushed effectively disabling the CQ overflow feature Split setting cq_overflow_flushed from flush, and do the first one only on exit. It's ok in terms of cancellations because there is a io_uring->in_idle check in __io_cqring_fill_event(). It also fixes a race with setting ->cq_overflow_flushed in io_uring_cancel_task_requests, whuch's is not atomic and a part of a bitmask with other flags. Though, the only other flag that's not set during init is drain_next, so it's not as bad for sane architectures. Signed-off-by: Pavel Begunkov Fixes: 0f2122045b946 ("io_uring: don't rely on weak ->files references") Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2d07d35e7262a..0d0217281ccfe 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1733,10 +1733,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, spin_lock_irqsave(&ctx->completion_lock, flags); - /* if force is set, the ring is going away. always drop after that */ - if (force) - ctx->cq_overflow_flushed = 1; - cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (!io_match_task(req, tsk, files)) @@ -8655,6 +8651,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + /* if force is set, the ring is going away. always drop after that */ + ctx->cq_overflow_flushed = 1; if (ctx->rings) io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); -- GitLab From 9cd2be519d05ee78876d55e8e902b7125f78b74f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Dec 2020 00:24:36 +0000 Subject: [PATCH 0691/1894] io_uring: remove racy overflow list fast checks list_empty_careful() is not racy only if some conditions are met, i.e. no re-adds after del_init. io_cqring_overflow_flush() does list_move(), so it's actually racy. Remove those checks, we have ->cq_check_overflow for the fast path. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0d0217281ccfe..fa3cf564ec065 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1724,8 +1724,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, LIST_HEAD(list); if (!force) { - if (list_empty_careful(&ctx->cq_overflow_list)) - return true; if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries)) return false; @@ -6822,8 +6820,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { - if (!list_empty(&ctx->cq_overflow_list) && - !io_cqring_overflow_flush(ctx, false, NULL, NULL)) + if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) return -EBUSY; } -- GitLab From e23de15fdbd3070446b2d212373c0ae556f63d93 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Dec 2020 00:24:37 +0000 Subject: [PATCH 0692/1894] io_uring: consolidate CQ nr events calculation Add a helper which calculates number of events in CQ. Handcoded version of it in io_cqring_overflow_flush() is not the clearest thing, so it makes it slightly more readable. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fa3cf564ec065..56ce97fc9c024 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1693,6 +1693,11 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) return io_wq_current_is_worker(); } +static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx) +{ + return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); +} + static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { if (waitqueue_active(&ctx->wait)) @@ -1723,14 +1728,10 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, unsigned long flags; LIST_HEAD(list); - if (!force) { - if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) == - rings->cq_ring_entries)) - return false; - } + if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries) + return false; spin_lock_irqsave(&ctx->completion_lock, flags); - cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (!io_match_task(req, tsk, files)) @@ -2314,8 +2315,6 @@ static void io_double_put_req(struct io_kiocb *req) static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) { - struct io_rings *rings = ctx->rings; - if (test_bit(0, &ctx->cq_check_overflow)) { /* * noflush == true is from the waitqueue handler, just ensure @@ -2330,7 +2329,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) /* See comment at the top of this file */ smp_rmb(); - return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); + return __io_cqring_events(ctx); } static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) -- GitLab From 09e88404f46cc32237f596c66f48a826294e08f2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Dec 2020 00:24:38 +0000 Subject: [PATCH 0693/1894] io_uring: inline io_cqring_mark_overflow() There is only one user of it and the name is misleading, get rid of it by inlining. By the way make overflow_flush's return value deduction simpler. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 56ce97fc9c024..5c9aa8b5e0775 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1708,15 +1708,6 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) eventfd_signal(ctx->cq_ev_fd, 1); } -static void io_cqring_mark_overflow(struct io_ring_ctx *ctx) -{ - if (list_empty(&ctx->cq_overflow_list)) { - clear_bit(0, &ctx->sq_check_overflow); - clear_bit(0, &ctx->cq_check_overflow); - ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; - } -} - /* Returns true if there are no backlogged entries after the flush */ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -1726,13 +1717,13 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct io_kiocb *req, *tmp; struct io_uring_cqe *cqe; unsigned long flags; + bool all_flushed; LIST_HEAD(list); if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries) return false; spin_lock_irqsave(&ctx->completion_lock, flags); - cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (!io_match_task(req, tsk, files)) continue; @@ -1753,9 +1744,14 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, } } - io_commit_cqring(ctx); - io_cqring_mark_overflow(ctx); + all_flushed = list_empty(&ctx->cq_overflow_list); + if (all_flushed) { + clear_bit(0, &ctx->sq_check_overflow); + clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; + } + io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1765,7 +1761,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, io_put_req(req); } - return cqe != NULL; + return all_flushed; } static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) -- GitLab From 89448c47b8452b67c146dc6cad6f737e004c5caf Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Dec 2020 00:24:39 +0000 Subject: [PATCH 0694/1894] io_uring: limit {io|sq}poll submit locking scope We don't need to take uring_lock for SQPOLL|IOPOLL to do io_cqring_overflow_flush() when cq_overflow_list is empty, remove it from the hot path. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5c9aa8b5e0775..8cf6f22afc5e4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9154,10 +9154,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); - if (!list_empty_careful(&ctx->cq_overflow_list)) + if (!list_empty_careful(&ctx->cq_overflow_list)) { + bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL; + + io_ring_submit_lock(ctx, needs_lock); io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); + io_ring_submit_unlock(ctx, needs_lock); + } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) -- GitLab From e7f6f893ac39c8715d959ff8d677645ef5e0f8b4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 8 Dec 2020 10:20:15 +0100 Subject: [PATCH 0695/1894] mt76: mt76u: fix NULL pointer dereference in mt76u_status_worker Fix the following NULL pointer dereference in mt76u_status_worker that can occur if status thread runs before allocating tx queues [ 31.395373] BUG: kernel NULL pointer dereference, address: 000000000000002c [ 31.395769] #PF: supervisor read access in kernel mode [ 31.395985] #PF: error_code(0x0000) - not-present page [ 31.396178] PGD 0 P4D 0 [ 31.396277] Oops: 0000 [#1] SMP [ 31.396430] CPU: 3 PID: 337 Comm: mt76-usb-status Not tainted 5.10.0-rc1-kvm+ #49 [ 31.396703] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-3.fc33 04/01/2014 [ 31.397048] RIP: 0010:mt76u_status_worker+0x2b/0x190 [ 31.397931] RSP: 0018:ffffc9000076fe98 EFLAGS: 00010282 [ 31.398118] RAX: 0000000000000001 RBX: ffff888111203fe8 RCX: 0000000000000000 [ 31.398400] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff888111203fe8 [ 31.398668] RBP: ffff888111201d00 R08: 000000000000038c R09: 000000000000009b [ 31.398952] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 31.399235] R13: 0000000000000000 R14: 0000000000000000 R15: ffff88810c987300 [ 31.399494] FS: 0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000 [ 31.399767] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 31.399991] CR2: 000000000000002c CR3: 0000000103525000 CR4: 00000000000006a0 [ 31.400236] Call Trace: [ 31.400348] ? schedule+0x3e/0xa0 [ 31.400514] __mt76_worker_fn+0x71/0xa0 [ 31.400634] ? mt76_get_min_avg_rssi+0x110/0x110 [ 31.400827] kthread+0x118/0x130 [ 31.400984] ? __kthread_bind_mask+0x60/0x60 [ 31.401212] ret_from_fork+0x1f/0x30 [ 31.401353] Modules linked in: [ 31.401480] CR2: 000000000000002c [ 31.401627] ---[ end trace 8bf174505cc34851 ]--- [ 31.401798] RIP: 0010:mt76u_status_worker+0x2b/0x190 [ 31.402636] RSP: 0018:ffffc9000076fe98 EFLAGS: 00010282 [ 31.402829] RAX: 0000000000000001 RBX: ffff888111203fe8 RCX: 0000000000000000 [ 31.403118] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff888111203fe8 [ 31.403424] RBP: ffff888111201d00 R08: 000000000000038c R09: 000000000000009b [ 31.403689] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 31.403933] R13: 0000000000000000 R14: 0000000000000000 R15: ffff88810c987300 [ 31.404209] FS: 0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000 [ 31.404482] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 31.404726] CR2: 000000000000002c CR3: 0000000103525000 CR4: 00000000000006a0 [ 31.405294] mt76x0u: probe of 1-1:1.0 failed with error -110 [ 31.406007] usb 1-1: USB disconnect, device number 2 [ 31.840756] usb 1-1: new high-speed USB device number 3 using xhci_hcd [ 32.461295] usb 1-1: reset high-speed USB device number 3 using xhci_hcd [ 32.659932] mt76x0u 1-1:1.0: ASIC revision: 76100002 MAC revision: 76502000 [ 33.197032] mt76x0u 1-1:1.0: EEPROM ver:02 fae:01 Fixes: 9daf27e62852 ("mt76: mt76u: use dedicated thread for status work") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/cd44dc407cf3e5f27688105d4a75fb1c68e62b06.1607419147.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index dc850109de22d..8b08cad131c84 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -816,6 +816,8 @@ static void mt76u_status_worker(struct mt76_worker *w) for (i = 0; i < IEEE80211_NUM_ACS; i++) { q = dev->phy.q_tx[i]; + if (!q) + continue; while (q->queued > 0) { if (!q->entry[q->tail].done) -- GitLab From 4dfde294b9792dcf8615b55c58f093d544f472f0 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 14 Dec 2020 13:31:06 +0800 Subject: [PATCH 0696/1894] rtlwifi: rise completion at the last step of firmware callback request_firmware_nowait() which schedules another work is used to load firmware when USB is probing. If USB is unplugged before running the firmware work, it goes disconnect ops, and then causes use-after-free. Though we wait for completion of firmware work before freeing the hw, firmware callback rises completion too early. So I move it to the last step. usb 5-1: Direct firmware load for rtlwifi/rtl8192cufw.bin failed with error -2 rtlwifi: Loading alternative firmware rtlwifi/rtl8192cufw.bin rtlwifi: Selected firmware is not available ================================================================== BUG: KASAN: use-after-free in rtl_fw_do_work.cold+0x68/0x6a drivers/net/wireless/realtek/rtlwifi/core.c:93 Write of size 4 at addr ffff8881454cff50 by task kworker/0:6/7379 CPU: 0 PID: 7379 Comm: kworker/0:6 Not tainted 5.10.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events request_firmware_work_func Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x4c8 mm/kasan/report.c:385 __kasan_report mm/kasan/report.c:545 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562 rtl_fw_do_work.cold+0x68/0x6a drivers/net/wireless/realtek/rtlwifi/core.c:93 request_firmware_work_func+0x12c/0x230 drivers/base/firmware_loader/main.c:1079 process_one_work+0x933/0x1520 kernel/workqueue.c:2272 worker_thread+0x64c/0x1120 kernel/workqueue.c:2418 kthread+0x38c/0x460 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:296 The buggy address belongs to the page: page:00000000f54435b3 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1454cf flags: 0x200000000000000() raw: 0200000000000000 0000000000000000 ffffea00051533c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881454cfe00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8881454cfe80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff8881454cff00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff8881454cff80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8881454d0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Reported-by: syzbot+65be4277f3c489293939@syzkaller.appspotmail.com Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214053106.7748-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtlwifi/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index a7259dbc953da..965bd95890459 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -78,7 +78,6 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, rtl_dbg(rtlpriv, COMP_ERR, DBG_LOUD, "Firmware callback routine entered!\n"); - complete(&rtlpriv->firmware_loading_complete); if (!firmware) { if (rtlpriv->cfg->alt_fw_name) { err = request_firmware(&firmware, @@ -91,13 +90,13 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, } pr_err("Selected firmware is not available\n"); rtlpriv->max_fw_size = 0; - return; + goto exit; } found_alt: if (firmware->size > rtlpriv->max_fw_size) { pr_err("Firmware is too big!\n"); release_firmware(firmware); - return; + goto exit; } if (!is_wow) { memcpy(rtlpriv->rtlhal.pfirmware, firmware->data, @@ -109,6 +108,9 @@ found_alt: rtlpriv->rtlhal.wowlan_fwsize = firmware->size; } release_firmware(firmware); + +exit: + complete(&rtlpriv->firmware_loading_complete); } void rtl_fw_cb(const struct firmware *firmware, void *context) -- GitLab From 6fc250887cbe14a350d472516f2e0118240c5d68 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:25:23 +0100 Subject: [PATCH 0697/1894] ACPI: scan: Evaluate _DEP before adding the device Evaluate _DEP before calling acpi_add_single_object() from acpi_bus_check_add() and do that only for ACPI_BUS_TYPE_DEVICE objects. While at it, rename acpi_device_dep_initialize() to acpi_scan_check_dep(), fix up a memory allocation statement in that function, consistently treat memory allocation failures in there as intermittent errors and make some related janitorial changes in it. This change will help to avoid calling acpi_add_single_object() if there are unmet _DEP dependencies in the future, as that may cause some control methods, potentially depending on the presence of operation regions supplied by other devices, to be evaluated prematurely. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Tested-by: Hans de Goede Reviewed-by: Mika Westerberg --- drivers/acpi/scan.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 37317e0ed65da..e861e1af355c9 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1842,32 +1842,30 @@ static void acpi_scan_init_hotplug(struct acpi_device *adev) } } -static void acpi_device_dep_initialize(struct acpi_device *adev) +static u32 acpi_scan_check_dep(acpi_handle handle) { - struct acpi_dep_data *dep; struct acpi_handle_list dep_devices; acpi_status status; + u32 count; int i; - adev->dep_unmet = 0; - - if (!acpi_has_method(adev->handle, "_DEP")) - return; + if (!acpi_has_method(handle, "_DEP")) + return 0; - status = acpi_evaluate_reference(adev->handle, "_DEP", NULL, - &dep_devices); + status = acpi_evaluate_reference(handle, "_DEP", NULL, &dep_devices); if (ACPI_FAILURE(status)) { - dev_dbg(&adev->dev, "Failed to evaluate _DEP.\n"); - return; + acpi_handle_debug(handle, "Failed to evaluate _DEP.\n"); + return 0; } - for (i = 0; i < dep_devices.count; i++) { + for (count = 0, i = 0; i < dep_devices.count; i++) { struct acpi_device_info *info; - int skip; + struct acpi_dep_data *dep; + bool skip; status = acpi_get_object_info(dep_devices.handles[i], &info); if (ACPI_FAILURE(status)) { - dev_dbg(&adev->dev, "Error reading _DEP device info\n"); + acpi_handle_debug(handle, "Error reading _DEP device info\n"); continue; } @@ -1877,26 +1875,30 @@ static void acpi_device_dep_initialize(struct acpi_device *adev) if (skip) continue; - dep = kzalloc(sizeof(struct acpi_dep_data), GFP_KERNEL); + dep = kzalloc(sizeof(*dep), GFP_KERNEL); if (!dep) - return; + continue; + + count++; dep->supplier = dep_devices.handles[i]; - dep->consumer = adev->handle; - adev->dep_unmet++; + dep->consumer = handle; mutex_lock(&acpi_dep_list_lock); list_add_tail(&dep->node , &acpi_dep_list); mutex_unlock(&acpi_dep_list_lock); } + + return count; } static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used, void *not_used, void **return_value) { struct acpi_device *device = NULL; - int type; + u32 dep_count = 0; unsigned long long sta; + int type; int result; acpi_bus_get_device(handle, &device); @@ -1912,12 +1914,16 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used, return AE_OK; } + if (type == ACPI_BUS_TYPE_DEVICE) + dep_count = acpi_scan_check_dep(handle); + acpi_add_single_object(&device, handle, type, sta); if (!device) return AE_CTRL_DEPTH; + device->dep_unmet = dep_count; + acpi_scan_init_hotplug(device); - acpi_device_dep_initialize(device); out: if (!*return_value) -- GitLab From 71da201f38dfb0c3a3d33bbe3168ea9112299dde Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:27:27 +0100 Subject: [PATCH 0698/1894] ACPI: scan: Defer enumeration of devices with _DEP lists In some cases ACPI control methods used during device enumeration (such as _HID or _STA) may rely on Operation Region handlers supplied by the drivers of other devices [1]: An example of this is the Acer Switch 10E SW3-016 model. The _HID method of the ACPI node for the UART attached Bluetooth, reads GPIOs to detect the installed wifi chip and update the _HID for the Bluetooth's ACPI node accordingly. The current ACPI scan code calls _HID before the GPIO controller's OpRegions are available, leading to the wrong _HID being used and Bluetooth not working. In principle, in those cases there should be a _DEP control method under the device object with OpRegion enumeration dependencies, so deferring the enumeration of devices with _DEP returning a non-empty list of suppliers of OpRegions depended on by the given device (modulo some known exceptions that don't really supply any OpRegions and are listed by _DEP for other reasons irrelevant for Linux) should at least address the first-order dependencies by allowing the OpRegion suppliers to be enumerated before their consumers. Implement the above idea by modifying acpi_bus_scan() to enumerate devices in the given scope of the ACPI namespace in two passes, where the first pass covers the devices without "significant" lists of dependencies coming from _DEP only and the second pass covers all of the devices that were not enumerated in the first pass. Take _DEP into account only for device objects with _HID, mostly in order to avoid deferring the creation of ACPI device objects that represent PCI devices and must be present during the enumeration of the PCI bus (which takes place during the processing of the ACPI device object that represents the host bridge), so that they can be properly associated with the corresponding PCI devices. Link: https://lore.kernel.org/linux-acpi/20201121203040.146252-1-hdegoede@redhat.com/ # [1] Reported-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Tested-by: Hans de Goede Reviewed-by: Mika Westerberg --- drivers/acpi/scan.c | 103 +++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 25 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e861e1af355c9..00f1b09f15a4a 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1635,8 +1635,6 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, device_initialize(&device->dev); dev_set_uevent_suppress(&device->dev, true); acpi_init_coherency(device); - /* Assume there are unmet deps until acpi_device_dep_initialize() runs */ - device->dep_unmet = 1; } void acpi_device_add_finalize(struct acpi_device *device) @@ -1849,7 +1847,13 @@ static u32 acpi_scan_check_dep(acpi_handle handle) u32 count; int i; - if (!acpi_has_method(handle, "_DEP")) + /* + * Check for _HID here to avoid deferring the enumeration of: + * 1. PCI devices. + * 2. ACPI nodes describing USB ports. + * Still, checking for _HID catches more then just these cases ... + */ + if (!acpi_has_method(handle, "_DEP") || !acpi_has_method(handle, "_HID")) return 0; status = acpi_evaluate_reference(handle, "_DEP", NULL, &dep_devices); @@ -1892,11 +1896,24 @@ static u32 acpi_scan_check_dep(acpi_handle handle) return count; } -static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used, - void *not_used, void **return_value) +static void acpi_scan_dep_init(struct acpi_device *adev) +{ + struct acpi_dep_data *dep; + + mutex_lock(&acpi_dep_list_lock); + + list_for_each_entry(dep, &acpi_dep_list, node) { + if (dep->consumer == adev->handle) + adev->dep_unmet++; + } + + mutex_unlock(&acpi_dep_list_lock); +} + +static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep, + struct acpi_device **adev_p) { struct acpi_device *device = NULL; - u32 dep_count = 0; unsigned long long sta; int type; int result; @@ -1914,24 +1931,40 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used, return AE_OK; } - if (type == ACPI_BUS_TYPE_DEVICE) - dep_count = acpi_scan_check_dep(handle); + if (type == ACPI_BUS_TYPE_DEVICE && check_dep) { + u32 count = acpi_scan_check_dep(handle); + /* Bail out if the number of recorded dependencies is not 0. */ + if (count > 0) + return AE_CTRL_DEPTH; + } acpi_add_single_object(&device, handle, type, sta); if (!device) return AE_CTRL_DEPTH; - device->dep_unmet = dep_count; - acpi_scan_init_hotplug(device); + if (!check_dep) + acpi_scan_dep_init(device); - out: - if (!*return_value) - *return_value = device; +out: + if (!*adev_p) + *adev_p = device; return AE_OK; } +static acpi_status acpi_bus_check_add_1(acpi_handle handle, u32 lvl_not_used, + void *not_used, void **ret_p) +{ + return acpi_bus_check_add(handle, true, (struct acpi_device **)ret_p); +} + +static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used, + void *not_used, void **ret_p) +{ + return acpi_bus_check_add(handle, false, (struct acpi_device **)ret_p); +} + static void acpi_default_enumeration(struct acpi_device *device) { /* @@ -1999,12 +2032,16 @@ static int acpi_scan_attach_handler(struct acpi_device *device) return ret; } -static void acpi_bus_attach(struct acpi_device *device) +static void acpi_bus_attach(struct acpi_device *device, bool first_pass) { struct acpi_device *child; + bool skip = !first_pass && device->flags.visited; acpi_handle ejd; int ret; + if (skip) + goto ok; + if (ACPI_SUCCESS(acpi_bus_get_ejd(device->handle, &ejd))) register_dock_dependent_device(device, ejd); @@ -2051,9 +2088,9 @@ static void acpi_bus_attach(struct acpi_device *device) ok: list_for_each_entry(child, &device->children, node) - acpi_bus_attach(child); + acpi_bus_attach(child, first_pass); - if (device->handler && device->handler->hotplug.notify_online) + if (!skip && device->handler && device->handler->hotplug.notify_online) device->handler->hotplug.notify_online(device); } @@ -2071,7 +2108,8 @@ void acpi_walk_dep_device_list(acpi_handle handle) adev->dep_unmet--; if (!adev->dep_unmet) - acpi_bus_attach(adev); + acpi_bus_attach(adev, true); + list_del(&dep->node); kfree(dep); } @@ -2096,17 +2134,32 @@ EXPORT_SYMBOL_GPL(acpi_walk_dep_device_list); */ int acpi_bus_scan(acpi_handle handle) { - void *device = NULL; + struct acpi_device *device = NULL; + + /* Pass 1: Avoid enumerating devices with missing dependencies. */ - if (ACPI_SUCCESS(acpi_bus_check_add(handle, 0, NULL, &device))) + if (ACPI_SUCCESS(acpi_bus_check_add(handle, true, &device))) acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, - acpi_bus_check_add, NULL, NULL, &device); + acpi_bus_check_add_1, NULL, NULL, + (void **)&device); - if (device) { - acpi_bus_attach(device); - return 0; - } - return -ENODEV; + if (!device) + return -ENODEV; + + acpi_bus_attach(device, true); + + /* Pass 2: Enumerate all of the remaining devices. */ + + device = NULL; + + if (ACPI_SUCCESS(acpi_bus_check_add(handle, false, &device))) + acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, + acpi_bus_check_add_2, NULL, NULL, + (void **)&device); + + acpi_bus_attach(device, false); + + return 0; } EXPORT_SYMBOL(acpi_bus_scan); -- GitLab From 0de7fb7c8687048299305529d17f6a1e98ae658c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:32:44 +0100 Subject: [PATCH 0699/1894] ACPI: scan: Avoid unnecessary second pass in acpi_bus_scan() If there are no devices whose enumeration has been deferred after the first pass in acpi_bus_scan(), the second pass is not necssary, so avoid it with the help of a new static variable. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Tested-by: Hans de Goede Reviewed-by: Mika Westerberg --- drivers/acpi/scan.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 00f1b09f15a4a..28713741d6ee3 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1910,6 +1910,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev) mutex_unlock(&acpi_dep_list_lock); } +static bool acpi_bus_scan_second_pass; + static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep, struct acpi_device **adev_p) { @@ -1934,8 +1936,10 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep, if (type == ACPI_BUS_TYPE_DEVICE && check_dep) { u32 count = acpi_scan_check_dep(handle); /* Bail out if the number of recorded dependencies is not 0. */ - if (count > 0) + if (count > 0) { + acpi_bus_scan_second_pass = true; return AE_CTRL_DEPTH; + } } acpi_add_single_object(&device, handle, type, sta); @@ -2136,6 +2140,8 @@ int acpi_bus_scan(acpi_handle handle) { struct acpi_device *device = NULL; + acpi_bus_scan_second_pass = false; + /* Pass 1: Avoid enumerating devices with missing dependencies. */ if (ACPI_SUCCESS(acpi_bus_check_add(handle, true, &device))) @@ -2148,6 +2154,9 @@ int acpi_bus_scan(acpi_handle handle) acpi_bus_attach(device, true); + if (!acpi_bus_scan_second_pass) + return 0; + /* Pass 2: Enumerate all of the remaining devices. */ device = NULL; -- GitLab From 9272e97ae9e9b95e0805c690404a0df9fb03055f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 15 Dec 2020 12:26:02 +0100 Subject: [PATCH 0700/1894] ACPI: scan: Add Intel Baytrail Mailbox Device to acpi_ignore_dep_ids Linux does not have a driver for / does not use the "Intel Baytrail Mailbox Device" (ACIP HID INT33BD). Add it to the acpi_ignore_dep_ids list, so that we do not defer probing ACPI devices which depend on another ACPI device with this HID. Specifically this makes us not defer the probing of the GPO1 ACPI device / GPIO controller on the Acer Switch 10E SW3-016. On this tablet model the _HID method of the ACPI node for the UART attached Bluetooth, reads GPIOs to detect the installed wifi chip and updates the reported _HID for the Bluetooth's ACPI node accordingly. For the Bluetooth's ACPI node to report the correct _HID the GPO1 device must be probed and attached during the first scan pass. Adding the "INT33BD" HID to the acpi_ignore_dep_ids list makes this all work. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 28713741d6ee3..05814d188c7d2 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -752,6 +752,7 @@ static bool acpi_info_matches_ids(struct acpi_device_info *info, /* List of HIDs for which we ignore matching ACPI devices, when checking _DEP lists. */ static const char * const acpi_ignore_dep_ids[] = { "PNP0D80", /* Windows-compatible System Power Management Controller */ + "INT33BD", /* Intel Baytrail Mailbox Device */ NULL }; -- GitLab From a84dfb3d55934253de6aed38ad75990278a2d21e Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 17 Dec 2020 16:08:34 +0100 Subject: [PATCH 0701/1894] ASoC: meson: axg-tdmin: fix axg skew offset The signal captured on from tdm decoder of the AXG SoC is incorrect. It appears amplified. The skew offset of the decoder is wrong. Setting the skew offset to 3, like the g12 and sm1 SoCs, solves and gives correct data. Fixes: 13a22e6a98f8 ("ASoC: meson: add tdm input driver") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201217150834.3247526-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdmin.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/sound/soc/meson/axg-tdmin.c b/sound/soc/meson/axg-tdmin.c index 88ed95ae886bb..b4faf9d5c1aad 100644 --- a/sound/soc/meson/axg-tdmin.c +++ b/sound/soc/meson/axg-tdmin.c @@ -224,15 +224,6 @@ static const struct axg_tdm_formatter_ops axg_tdmin_ops = { }; static const struct axg_tdm_formatter_driver axg_tdmin_drv = { - .component_drv = &axg_tdmin_component_drv, - .regmap_cfg = &axg_tdmin_regmap_cfg, - .ops = &axg_tdmin_ops, - .quirks = &(const struct axg_tdm_formatter_hw) { - .skew_offset = 2, - }, -}; - -static const struct axg_tdm_formatter_driver g12a_tdmin_drv = { .component_drv = &axg_tdmin_component_drv, .regmap_cfg = &axg_tdmin_regmap_cfg, .ops = &axg_tdmin_ops, @@ -247,10 +238,10 @@ static const struct of_device_id axg_tdmin_of_match[] = { .data = &axg_tdmin_drv, }, { .compatible = "amlogic,g12a-tdmin", - .data = &g12a_tdmin_drv, + .data = &axg_tdmin_drv, }, { .compatible = "amlogic,sm1-tdmin", - .data = &g12a_tdmin_drv, + .data = &axg_tdmin_drv, }, {} }; MODULE_DEVICE_TABLE(of, axg_tdmin_of_match); -- GitLab From 671ee4db952449acde126965bf76817a3159040d Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 17 Dec 2020 16:08:12 +0100 Subject: [PATCH 0702/1894] ASoC: meson: axg-tdm-interface: fix loopback When the axg-tdm-interface was introduced, the backend DAI was marked as an endpoint when DPCM was walking the DAPM graph to find a its BE. It is no longer the case since this commit 8dd26dff00c0 ("ASoC: dapm: Fix handling of custom_stop_condition on DAPM graph walks") Because of this, when DPCM finds a BE it does everything it needs on the DAIs but it won't power up the widgets between the FE and the BE if there is no actual endpoint after the BE. On meson-axg HWs, the loopback is a special DAI of the tdm-interface BE. It is only linked to the dummy codec since there no actual HW after it. >From the DAPM perspective, the DAI has no endpoint. Because of this, the TDM decoder, which is a widget between the FE and BE is not powered up. >From the user perspective, everything seems fine but no data is produced. Connecting the Loopback DAI to a dummy DAPM endpoint solves the problem. Fixes: 8dd26dff00c0 ("ASoC: dapm: Fix handling of custom_stop_condition on DAPM graph walks") Cc: Charles Keepax Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201217150812.3247405-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdm-interface.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index c8664ab80d45a..87cac440b3693 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -467,8 +467,20 @@ static int axg_tdm_iface_set_bias_level(struct snd_soc_component *component, return ret; } +static const struct snd_soc_dapm_widget axg_tdm_iface_dapm_widgets[] = { + SND_SOC_DAPM_SIGGEN("Playback Signal"), +}; + +static const struct snd_soc_dapm_route axg_tdm_iface_dapm_routes[] = { + { "Loopback", NULL, "Playback Signal" }, +}; + static const struct snd_soc_component_driver axg_tdm_iface_component_drv = { - .set_bias_level = axg_tdm_iface_set_bias_level, + .dapm_widgets = axg_tdm_iface_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(axg_tdm_iface_dapm_widgets), + .dapm_routes = axg_tdm_iface_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(axg_tdm_iface_dapm_routes), + .set_bias_level = axg_tdm_iface_set_bias_level, }; static const struct of_device_id axg_tdm_iface_of_match[] = { -- GitLab From 09d59c2f3465fb01e65a0c96698697b026ea8e79 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 4 Dec 2020 00:08:36 +0100 Subject: [PATCH 0703/1894] tools build: Add missing libcap to test-all.bin target We're missing -lcap in test-all.bin target, so in case it's the only library missing (if more are missing test-all.bin fails anyway), we will falsely claim that we detected it and fail build, like: $ make ... Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... libbfd: [ on ] ... libbfd-buildid: [ on ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] ... CC builtin-ftrace.o In file included from builtin-ftrace.c:29: util/cap.h:11:10: fatal error: sys/capability.h: No such file or directory 11 | #include | ^~~~~~~~~~~~~~~~~~ compilation terminated. Fixes: 74d5f3d06f707eb5 ("tools build: Add capability-related feature detection") Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Igor Lubashev Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201203230836.3751981-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index cdde783f3018b..89ba522e377dc 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -90,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap $(OUTPUT)test-hello.bin: $(BUILD) -- GitLab From 2eb5dd418034ecea2f7031e3d33f2991a878b148 Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Fri, 3 Jul 2020 17:33:44 +0800 Subject: [PATCH 0704/1894] =?UTF-8?q?perf=20record:=20Fix=20memory=20leak?= =?UTF-8?q?=20when=20using=20'--user-regs=3D=3F'=20to=20list=20registers?= When using 'perf record's option '-I' or '--user-regs=' along with argument '?' to list available register names, memory of variable 'os' allocated by strdup() needs to be released before __parse_regs() returns, otherwise memory leak will occur. Fixes: bcc84ec65ad1 ("perf record: Add ability to name registers to record") Signed-off-by: Zheng Zengkai Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Li Bin Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20200703093344.189450-1-zhengzengkai@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-regs-options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index e687497b3aac0..a4a100425b3a2 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -54,7 +54,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr) #endif fputc('\n', stderr); /* just printing available regs */ - return -1; + goto error; } #ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { -- GitLab From bf53fc6b5f415cddc7118091cb8fd6a211b2320d Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Fri, 4 Dec 2020 09:17:02 -0300 Subject: [PATCH 0705/1894] perf unwind: Fix separate debug info files when using elfutils' libdw's unwinder elfutils needs to be provided main binary and separate debug info file respectively. Providing separate debug info file instead of the main binary is not sufficient. One needs to try both supplied filename and its possible cache by its build-id depending on the use case. Signed-off-by: Jan Kratochvil Tested-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Ian Rogers Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind-libdw.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 7a3dbc259cecc..0ada907c60d49 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -20,10 +20,24 @@ static char *debuginfo_path; +static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata, + const char *modname __maybe_unused, Dwarf_Addr base __maybe_unused, + const char *file_name, const char *debuglink_file __maybe_unused, + GElf_Word debuglink_crc __maybe_unused, char **debuginfo_file_name) +{ + const struct dso *dso = *userdata; + + assert(dso); + if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename)) + *debuginfo_file_name = strdup(dso->symsrc_filename); + return -1; +} + static const Dwfl_Callbacks offline_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, + .find_debuginfo = __find_debuginfo, .debuginfo_path = &debuginfo_path, .section_address = dwfl_offline_section_address, + // .find_elf is not set as we use dwfl_report_elf() instead. }; static int __report_module(struct addr_location *al, u64 ip, @@ -46,16 +60,24 @@ static int __report_module(struct addr_location *al, u64 ip, mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; + void **userdatap; - dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); + dwfl_module_info(mod, &userdatap, &s, NULL, NULL, NULL, NULL, NULL); + *userdatap = dso; if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) - mod = dwfl_report_elf(ui->dwfl, dso->short_name, - (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, - false); + mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, + al->map->start - al->map->pgoff, false); + if (!mod) { + char filename[PATH_MAX]; + + if (dso__build_id_filename(dso, filename, sizeof(filename), false)) + mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, + al->map->start - al->map->pgoff, false); + } return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; } -- GitLab From 47d982202f8cfaac6f208c9109fa15cb6a0181f7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 09:27:52 -0800 Subject: [PATCH 0706/1894] tools headers UAPI: Update tools's copy of linux/perf_event.h To get the changes in: commit 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE") commit 995f088efebe ("perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201130172803.2676-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b95d3c485d27e..b15e3447cd9fe 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,8 +143,10 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -896,6 +898,8 @@ enum perf_event_type { * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX + * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE + * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, -- GitLab From 542b88fd12769bf5be307b11ca0f94a6140bba82 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 09:27:53 -0800 Subject: [PATCH 0707/1894] perf record: Support new sample type for data page size Support new sample type PERF_SAMPLE_DATA_PAGE_SIZE for page size. Add new option --data-page-size to record sample data page size. Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201130172803.2676-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 2 ++ tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 9 +++++++++ tools/perf/util/perf_event_attr_fprintf.c | 2 +- tools/perf/util/record.h | 1 + tools/perf/util/synthetic-events.c | 8 ++++++++ 7 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 768888b9326a7..2d30e525a6003 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -293,6 +293,9 @@ OPTIONS --phys-data:: Record the sample physical addresses. +--data-page-size:: + Record the sampled data address data page size. + -T:: --timestamp:: Record the sample timestamps. Use it with 'perf report -D' to see the diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d832c108a1ca2..fd39116506123 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2472,6 +2472,8 @@ static struct option __record_options[] = { OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, "Record the sample physical addresses"), + OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, + "Record the sampled data address data page size"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b828b99176f40..448ac30c2fc45 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -135,6 +135,7 @@ struct perf_sample { u32 raw_size; u64 data_src; u64 phys_addr; + u64 data_page_size; u64 cgroup; u32 flags; u16 insn_len; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3dd0eae9810db..5e6085c3fc761 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1188,6 +1188,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, evsel__set_sample_bit(evsel, CGROUP); } + if (opts->sample_data_page_size) + evsel__set_sample_bit(evsel, DATA_PAGE_SIZE); + if (opts->record_switch_events) attr->context_switch = track; @@ -2355,6 +2358,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->data_page_size = 0; + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) { + data->data_page_size = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index e67a227c0ce7e..fb0bb66844384 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,7 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), - bit_name(CGROUP), + bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 266760ac9143f..694b351dcd278 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -22,6 +22,7 @@ struct record_opts { bool raw_samples; bool sample_address; bool sample_phys_addr; + bool sample_data_page_size; bool sample_weight; bool sample_time; bool sample_time_set; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 297987c6960b5..2947e3f3c6d9d 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1409,6 +1409,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_CGROUP) result += sizeof(u64); + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1588,6 +1591,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) { + *array = sample->data_page_size; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; -- GitLab From 456ef4c11c06f0b8c53acaf796d77d2033f079f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Dec 2020 14:04:05 -0300 Subject: [PATCH 0708/1894] perf evsel: Emit warning about kernel not supporting the data page size sample_type bit Before we had this unhelpful message: $ perf record --data-page-size sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles:u). /bin/dmesg | grep -i perf may provide additional information. $ Add support to the perf_missing_features variable to remember what caused evsel__open() to fail and then use that information in evsel__open_strerror(). $ perf record --data-page-size sleep 1 Error: Asking for the data page size isn't supported by this kernel. $ Cc: Kan Liang Cc: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201207170759.GB129853@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 9 ++++++++- tools/perf/util/evsel.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5e6085c3fc761..c26ea82220bd8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1873,7 +1873,12 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { + if (!perf_missing_features.data_page_size && + (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { perf_missing_features.cgroup = true; pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); goto out_close; @@ -2673,6 +2678,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: + if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size) + return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel."); if (evsel->core.attr.write_backward && perf_missing_features.write_backward) return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel."); if (perf_missing_features.clockid) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 79a860d8e3eef..cd1d8dd431997 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -144,6 +144,7 @@ struct perf_missing_features { bool aux_output; bool branch_hw_idx; bool cgroup; + bool data_page_size; }; extern struct perf_missing_features perf_missing_features; -- GitLab From 4853f1caa43ea41a544c50a7cefc42e147aafeda Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:07 +0800 Subject: [PATCH 0709/1894] perf jevents: Add support for an extra directory level Currently only upto a level 2 directory is supported, in form vendor/platform. Add support for a further level, to support vendor/platform sub-directories in future, which will be vendor/platform/cpu and vendor/platform/sys. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 72cfa3b5046d3..9022216b1253e 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -978,15 +978,20 @@ static int process_one_file(const char *fpath, const struct stat *sb, int level = ftwbuf->level; int err = 0; - if (level == 2 && is_dir) { + if (level >= 2 && is_dir) { + int count = 0; /* * For level 2 directory, bname will include parent name, * like vendor/platform. So search back from platform dir * to find this. + * Something similar for level 3 directory, but we're a PMU + * category folder, like vendor/platform/cpu. */ bname = (char *) fpath + ftwbuf->base - 2; for (;;) { if (*bname == '/') + count++; + if (count == level - 1) break; bname--; } @@ -999,13 +1004,13 @@ static int process_one_file(const char *fpath, const struct stat *sb, level, sb->st_size, bname, fpath); /* base dir or too deep */ - if (level == 0 || level > 3) + if (level == 0 || level > 4) return 0; /* model directory, reset topic */ if ((level == 1 && is_dir && is_leaf_dir(fpath)) || - (level == 2 && is_dir)) { + (level >= 2 && is_dir && is_leaf_dir(fpath))) { if (close_table) print_events_table_suffix(eventsfp); -- GitLab From 4689f56796f87abee190d8a959dd318e006c5b5a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:08 +0800 Subject: [PATCH 0710/1894] perf jevents: Add support for system events tables Process the JSONs to find support for "system" events, which are not tied to a specific CPUID. A "COMPAT" property is now used to match against the namespace ID from the kernel PMU driver. The generated pmu-events.c will now have 2 tables: a. CPU events, as before. b. New pmu_sys_event_tables[] table, which will have events matched to specific SoCs. It will look like this: struct pmu_event pme_hisilicon_hip09_sys[] = { { .name = "cycles", .compat = "0x00030736", .event = "event=0", .desc = "Clock cycles", .topic = "smmu v3 pmcg", .long_desc = "Clock cycles", }, { .name = "smmuv3_pmcg.l1_tlb", .compat = "0x00030736", .event = "event=0x8a", .desc = "SMMUv3 PMCG l1_tlb. Unit: smmuv3_pmcg ", .topic = "smmu v3 pmcg", .long_desc = "SMMUv3 PMCG l1_tlb", .pmu = "smmuv3_pmcg", }, ... }; struct pmu_event pme_arm_cortex_a53[] = { { .name = "ext_mem_req", .event = "event=0xc0", .desc = "External memory request", .topic = "memory", }, { .name = "ext_mem_req_nc", .event = "event=0xc1", .desc = "Non-cacheable external memory request", .topic = "memory", }, ... }; struct pmu_event pme_hisilicon_hip09_cpu[] = { { .name = "l2d_cache_refill_wr", .event = "event=0x53", .desc = "L2D cache refill, write", .topic = "core imp def", .long_desc = "Attributable Level 2 data cache refill, write", }, ... }; struct pmu_events_map pmu_events_map[] = { { .cpuid = "0x00000000410fd030", .version = "v1", .type = "core", .table = pme_arm_cortex_a53 }, { .cpuid = "0x00000000480fd010", .version = "v1", .type = "core", .table = pme_hisilicon_hip09_cpu }, { .table = 0 }, }; struct pmu_event pme_hisilicon_hip09_cpu[] = { { .name = "uncore_hisi_l3c.rd_cpipe", .event = "event=0", .desc = "Total read accesses. Unit: hisi_sccl,l3c ", .topic = "uncore l3c", .long_desc = "Total read accesses", .pmu = "hisi_sccl,l3c", }, { .name = "uncore_hisi_l3c.wr_cpipe", .event = "event=0x1", .desc = "Total write accesses. Unit: hisi_sccl,l3c ", .topic = "uncore l3c", .long_desc = "Total write accesses", .pmu = "hisi_sccl,l3c", }, ... }; struct pmu_sys_events pmu_sys_event_tables[] = { { .table = pme_hisilicon_hip09_sys, }, ... }; Committer notes: Added the fix for architectures without PMU events, provided by John after I reported the build failing in such systems. Link: https://lore.kernel.org/lkml/650baaf2-36b6-a9e2-ff49-963ef864c1f3@huawei.com/ Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 86 +++++++++++++++++++++++++++++- tools/perf/pmu-events/pmu-events.h | 6 +++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 9022216b1253e..214975c819ffb 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -55,6 +55,7 @@ char *prog; struct json_event { char *name; + char *compat; char *event; char *desc; char *long_desc; @@ -82,6 +83,23 @@ enum aggr_mode_class convert(const char *aggr_mode) typedef int (*func)(void *data, struct json_event *je); +static LIST_HEAD(sys_event_tables); + +struct sys_event_table { + struct list_head list; + char *soc_id; +}; + +static void free_sys_event_tables(void) +{ + struct sys_event_table *et, *next; + + list_for_each_entry_safe(et, next, &sys_event_tables, list) { + free(et->soc_id); + free(et); + } +} + int eprintf(int level, int var, const char *fmt, ...) { @@ -360,6 +378,8 @@ static int print_events_table_entry(void *data, struct json_event *je) if (je->event) fprintf(outfp, "\t.event = \"%s\",\n", je->event); fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); + if (je->compat) + fprintf(outfp, "\t.compat = \"%s\",\n", je->compat); fprintf(outfp, "\t.topic = \"%s\",\n", topic); if (je->long_desc && je->long_desc[0]) fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); @@ -390,6 +410,7 @@ struct event_struct { struct list_head list; char *name; char *event; + char *compat; char *desc; char *long_desc; char *pmu; @@ -583,6 +604,8 @@ static int json_events(const char *fn, free(code); } else if (json_streq(map, field, "EventName")) { addfield(map, &je.name, "", "", val); + } else if (json_streq(map, field, "Compat")) { + addfield(map, &je.compat, "", "", val); } else if (json_streq(map, field, "BriefDescription")) { addfield(map, &je.desc, "", "", val); fixdesc(je.desc); @@ -683,6 +706,7 @@ free_strings: free(event); free(je.desc); free(je.name); + free(je.compat); free(je.long_desc); free(extra_desc); free(je.pmu); @@ -747,6 +771,15 @@ static char *file_name_to_table_name(char *fname) return tblname; } +static bool is_sys_dir(char *fname) +{ + size_t len = strlen(fname), len2 = strlen("/sys"); + + if (len2 > len) + return false; + return !strcmp(fname+len-len2, "/sys"); +} + static void print_mapping_table_prefix(FILE *outfp) { fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n"); @@ -781,6 +814,33 @@ static void print_mapping_test_table(FILE *outfp) fprintf(outfp, "},\n"); } +static void print_system_event_mapping_table_prefix(FILE *outfp) +{ + fprintf(outfp, "\nstruct pmu_sys_events pmu_sys_event_tables[] = {"); +} + +static void print_system_event_mapping_table_suffix(FILE *outfp) +{ + fprintf(outfp, "\n\t{\n\t\t.table = 0\n\t},"); + fprintf(outfp, "\n};\n"); +} + +static int process_system_event_tables(FILE *outfp) +{ + struct sys_event_table *sys_event_table; + + print_system_event_mapping_table_prefix(outfp); + + list_for_each_entry(sys_event_table, &sys_event_tables, list) { + fprintf(outfp, "\n\t{\n\t\t.table = %s,\n\t},", + sys_event_table->soc_id); + } + + print_system_event_mapping_table_suffix(outfp); + + return 0; +} + static int process_mapfile(FILE *outfp, char *fpath) { int n = 16384; @@ -886,6 +946,8 @@ static void create_empty_mapping(const char *output_file) fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n"); print_mapping_table_prefix(outfp); print_mapping_table_suffix(outfp); + print_system_event_mapping_table_prefix(outfp); + print_system_event_mapping_table_suffix(outfp); fclose(outfp); } @@ -1026,6 +1088,22 @@ static int process_one_file(const char *fpath, const struct stat *sb, return -1; } + if (is_sys_dir(bname)) { + struct sys_event_table *sys_event_table; + + sys_event_table = malloc(sizeof(*sys_event_table)); + if (!sys_event_table) + return -1; + + sys_event_table->soc_id = strdup(tblname); + if (!sys_event_table->soc_id) { + free(sys_event_table); + return -1; + } + list_add_tail(&sys_event_table->list, + &sys_event_tables); + } + print_events_table_prefix(eventsfp, tblname); return 0; } @@ -1185,10 +1263,16 @@ int main(int argc, char *argv[]) } rc = process_mapfile(eventsfp, mapfile); - fclose(eventsfp); if (rc) { pr_info("%s: Error processing mapfile %s\n", prog, mapfile); /* Make build fail */ + ret = 1; + goto err_close_eventsfp; + } + + rc = process_system_event_tables(eventsfp); + fclose(eventsfp); + if (rc) { ret = 1; goto err_out; } diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 7da1a3743b775..d1172f6aebf1a 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -12,6 +12,7 @@ enum aggr_mode_class { */ struct pmu_event { const char *name; + const char *compat; const char *event; const char *desc; const char *topic; @@ -43,10 +44,15 @@ struct pmu_events_map { struct pmu_event *table; }; +struct pmu_sys_events { + struct pmu_event *table; +}; + /* * Global table mapping each known CPU for the architecture to its * table of PMU events. */ extern struct pmu_events_map pmu_events_map[]; +extern struct pmu_sys_events pmu_sys_event_tables[]; #endif -- GitLab From 51d548471510843e56d9f427aa6473ca0981c4a4 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:09 +0800 Subject: [PATCH 0711/1894] perf pmu: Add pmu_id() Add a function to read the PMU id sysfs entry. This is only done for uncore PMUs where this would possibly be relevant. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 18 ++++++++++++++++++ tools/perf/util/pmu.h | 1 + 2 files changed, 19 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d41caeb35cf6c..cbeda45ce578f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -597,6 +597,7 @@ static struct perf_cpu_map *__pmu_cpumask(const char *path) * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ +#define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" #define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" @@ -635,6 +636,21 @@ static bool pmu_is_uncore(const char *name) return file_available(path); } +static char *pmu_id(const char *name) +{ + char path[PATH_MAX], *str; + size_t len; + + snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name); + + if (sysfs__read_str(path, &str, &len) < 0) + return NULL; + + str[len - 1] = 0; /* remove line feed */ + + return str; +} + /* * PMU CORE devices have different name other than cpu in sysfs on some * platforms. @@ -847,6 +863,8 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->name = strdup(name); pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); + if (pmu->is_uncore) + pmu->id = pmu_id(name); pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index a64e9c9ce731a..d4366e8e79df7 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -30,6 +30,7 @@ struct perf_pmu_caps { struct perf_pmu { char *name; + char *id; __u32 type; bool selectable; bool is_uncore; -- GitLab From 4513c719c6f1ccf0c362c8dcef1f9b476f8f5c9c Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:10 +0800 Subject: [PATCH 0712/1894] perf pmu: Add pmu_add_sys_aliases() Add pmu_add_sys_aliases() to add system PMU events aliases. For adding system PMU events, iterate through all the events for all SoC event tables in pmu_sys_event_tables[]. Matches must satisfy both: - PMU identifier matches event "compat" value - event "Unit" member must match, same as uncore event aliases matched by CPUID Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-5-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 78 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu.h | 2 ++ 2 files changed, 80 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index cbeda45ce578f..44ef28302fc77 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -812,6 +812,83 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) pmu_add_cpu_aliases_map(head, pmu, map); } +void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) +{ + int i = 0; + + while (1) { + struct pmu_sys_events *event_table; + int j = 0; + + event_table = &pmu_sys_event_tables[i++]; + + if (!event_table->table) + break; + + while (1) { + struct pmu_event *pe = &event_table->table[j++]; + int ret; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + + ret = fn(pe, data); + if (ret) + break; + } + } +} + +struct pmu_sys_event_iter_data { + struct list_head *head; + struct perf_pmu *pmu; +}; + +static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) +{ + struct pmu_sys_event_iter_data *idata = data; + struct perf_pmu *pmu = idata->pmu; + + if (!pe->name) { + if (pe->metric_group || pe->metric_name) + return 0; + return -EINVAL; + } + + if (!pe->compat || !pe->pmu) + return 0; + + if (!strcmp(pmu->id, pe->compat) && + pmu_uncore_alias_match(pe->pmu, pmu->name)) { + __perf_pmu__new_alias(idata->head, NULL, + (char *)pe->name, + (char *)pe->desc, + (char *)pe->event, + (char *)pe->long_desc, + (char *)pe->topic, + (char *)pe->unit, + (char *)pe->perpkg, + (char *)pe->metric_expr, + (char *)pe->metric_name, + (char *)pe->deprecated); + } + + return 0; +} + +static void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu) +{ + struct pmu_sys_event_iter_data idata = { + .head = head, + .pmu = pmu, + }; + + if (!pmu->id) + return; + + pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, &idata); +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -867,6 +944,7 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->id = pmu_id(name); pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); + pmu_add_sys_aliases(&aliases, pmu); INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d4366e8e79df7..8164388478c65 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -117,6 +117,8 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); +typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data); +void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); -- GitLab From 6d2783fe365fa5f571cf1416b5f5b1e352447a0e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:11 +0800 Subject: [PATCH 0713/1894] perf evlist: Change evlist__splice_list_tail() ordering Function find_evsel_group() expects events to be ordered such that they are grouped after their leader. Modify evlist__splice_list_tail() to guarantee this (ordering). [Should prob also change the function name] Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 493819173a8e4..89286f148012f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -179,11 +179,22 @@ void evlist__remove(struct evlist *evlist, struct evsel *evsel) void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list) { - struct evsel *evsel, *temp; + while (!list_empty(list)) { + struct evsel *evsel, *temp, *leader = NULL; - __evlist__for_each_entry_safe(list, temp, evsel) { - list_del_init(&evsel->core.node); - evlist__add(evlist, evsel); + __evlist__for_each_entry_safe(list, temp, evsel) { + list_del_init(&evsel->core.node); + evlist__add(evlist, evsel); + leader = evsel; + break; + } + + __evlist__for_each_entry_safe(list, temp, evsel) { + if (evsel->leader == leader) { + list_del_init(&evsel->core.node); + evlist__add(evlist, evsel); + } + } } } -- GitLab From c2337d67199a1ea1c75083da5d376aced1ab2c40 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:12 +0800 Subject: [PATCH 0714/1894] perf metricgroup: Fix metrics using aliases covering multiple PMUs Support for metric expressions using aliases which cover multiple PMUs is broken. Consider the following test metric expression: "MetricExpr": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE * UNC_CBO_XSNP_RESPONSE.MISS_EVICTION" When used on my broadwell, "perf stat" gives: unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_1/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_0/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_1/umask=0x41,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_0/umask=0x41,event=0x22/ Control descriptor is not initialized unc_cbo_xsnp_response.miss_eviction: 3645925 1000850523 1000850523 unc_cbo_xsnp_response.miss_xcore: 106850 1000850523 1000850523 Performance counter stats for 'system wide': 3,645,925 unc_cbo_xsnp_response.miss_eviction # 389567086250.00 test_metric_inc 106,850 unc_cbo_xsnp_response.miss_xcore 1.000883096 seconds time elapsed Notice that only the results from one PMU are included. Fix the logic of find_evsel_group() to enable events which apply to multiple PMUs, by checking if the event pmu_name matches that of the metric event. With that, "perf stat" now gives: unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_1/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_0/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_1/umask=0x41,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_0/umask=0x41,event=0x22/ Control descriptor is not initialized unc_cbo_xsnp_response.miss_eviction: 4237983 1000904100 1000904100 unc_cbo_xsnp_response.miss_xcore: 218643 1000904100 1000904100 unc_cbo_xsnp_response.miss_eviction: 4254148 1000902629 1000902629 unc_cbo_xsnp_response.miss_xcore: 213352 1000902629 1000902629 Performance counter stats for 'system wide': 4,237,983 unc_cbo_xsnp_response.miss_eviction # 3668558131345.00 test_metric_inc 218,643 unc_cbo_xsnp_response.miss_xcore 4,254,148 unc_cbo_xsnp_response.miss_eviction 213,352 unc_cbo_xsnp_response.miss_xcore 1.000938151 seconds time elapsed Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-7-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 81d201c8b833d..b89160718c04b 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -279,7 +279,9 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, * when then group is left. */ if (!has_constraint && - ev->leader != metric_events[i]->leader) + ev->leader != metric_events[i]->leader && + !strcmp(ev->leader->pmu_name, + metric_events[i]->leader->pmu_name)) break; if (!strcmp(metric_events[i]->name, ev->name)) { set_bit(ev->idx, evlist_used); -- GitLab From f6fe1e48ae185d028dfcabecb7d79036e2d89d27 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:13 +0800 Subject: [PATCH 0715/1894] perf metricgroup: Split up metricgroup__print() To aid supporting system event metric groups, break up the function metricgroup__print() into a part which iterates metrics and a part which actually "prints" the metric. No functional change intended. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-8-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 124 +++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 54 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b89160718c04b..4c6a686b08ebd 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -493,6 +493,72 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) putchar('\n'); } +static int metricgroup__print_pmu_event(struct pmu_event *pe, + bool metricgroups, char *filter, + bool raw, bool details, + struct rblist *groups, + struct strlist *metriclist) +{ + const char *g; + char *omg, *mg; + + g = pe->metric_group; + if (!g && pe->metric_name) { + if (pe->name) + return 0; + g = "No_group"; + } + + if (!g) + return 0; + + mg = strdup(g); + + if (!mg) + return -ENOMEM; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + char *s; + + g = skip_spaces(g); + if (*g == 0) + g = "No_group"; + if (filter && !strstr(g, filter)) + continue; + if (raw) + s = (char *)pe->metric_name; + else { + if (asprintf(&s, "%s\n%*s%s]", + pe->metric_name, 8, "[", pe->desc) < 0) + return -1; + if (details) { + if (asprintf(&s, "%s\n%*s%s]", + s, 8, "[", pe->metric_expr) < 0) + return -1; + } + } + + if (!s) + continue; + + if (!metricgroups) { + strlist__add(metriclist, s); + } else { + me = mep_lookup(groups, g); + if (!me) + continue; + strlist__add(me->metrics, s); + } + + if (!raw) + free(s); + } + free(omg); + + return 0; +} + void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { @@ -517,66 +583,16 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, groups.node_cmp = mep_cmp; groups.node_delete = mep_delete; for (i = 0; ; i++) { - const char *g; pe = &map->table[i]; if (!pe->name && !pe->metric_group && !pe->metric_name) break; if (!pe->metric_expr) continue; - g = pe->metric_group; - if (!g && pe->metric_name) { - if (pe->name) - continue; - g = "No_group"; - } - if (g) { - char *omg; - char *mg = strdup(g); - - if (!mg) - return; - omg = mg; - while ((g = strsep(&mg, ";")) != NULL) { - struct mep *me; - char *s; - - g = skip_spaces(g); - if (*g == 0) - g = "No_group"; - if (filter && !strstr(g, filter)) - continue; - if (raw) - s = (char *)pe->metric_name; - else { - if (asprintf(&s, "%s\n%*s%s]", - pe->metric_name, 8, "[", pe->desc) < 0) - return; - - if (details) { - if (asprintf(&s, "%s\n%*s%s]", - s, 8, "[", pe->metric_expr) < 0) - return; - } - } - - if (!s) - continue; - - if (!metricgroups) { - strlist__add(metriclist, s); - } else { - me = mep_lookup(&groups, g); - if (!me) - continue; - strlist__add(me->metrics, s); - } - - if (!raw) - free(s); - } - free(omg); - } + if (metricgroup__print_pmu_event(pe, metricgroups, filter, + raw, details, &groups, + metriclist) < 0) + return; } if (!filter || !rblist__empty(&groups)) { -- GitLab From a36fadb17c27b4b5360db69acc80f5f4ad8dde7e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:14 +0800 Subject: [PATCH 0716/1894] perf metricgroup: Support printing metric groups for system PMUs Currently printing metricgroups for core- or uncore-based events matched by CPUID is supported. Extend this for system events. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-9-git-send-email-john.garry@huawei.com [ Reorder 'struct metricgroup_print_sys_idata' field to avoid alignment holes ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 64 ++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4c6a686b08ebd..6344e9627ff39 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -559,6 +559,49 @@ static int metricgroup__print_pmu_event(struct pmu_event *pe, return 0; } +struct metricgroup_print_sys_idata { + struct strlist *metriclist; + char *filter; + struct rblist *groups; + bool metricgroups; + bool raw; + bool details; +}; + +typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *); + +struct metricgroup_iter_data { + metricgroup_sys_event_iter_fn fn; + void *data; +}; + +static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) +{ + struct metricgroup_iter_data *d = data; + struct perf_pmu *pmu = NULL; + + if (!pe->metric_expr || !pe->compat) + return 0; + + while ((pmu = perf_pmu__scan(pmu))) { + + if (!pmu->id || strcmp(pmu->id, pe->compat)) + continue; + + return d->fn(pe, d->data); + } + + return 0; +} + +static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) +{ + struct metricgroup_print_sys_idata *d = data; + + return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw, + d->details, d->groups, d->metriclist); +} + void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { @@ -569,9 +612,6 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, struct rb_node *node, *next; struct strlist *metriclist = NULL; - if (!map) - return; - if (!metricgroups) { metriclist = strlist__new(NULL, NULL); if (!metriclist) @@ -582,7 +622,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, groups.node_new = mep_new; groups.node_cmp = mep_cmp; groups.node_delete = mep_delete; - for (i = 0; ; i++) { + for (i = 0; map; i++) { pe = &map->table[i]; if (!pe->name && !pe->metric_group && !pe->metric_name) @@ -595,6 +635,22 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, return; } + { + struct metricgroup_iter_data data = { + .fn = metricgroup__print_sys_event_iter, + .data = (void *) &(struct metricgroup_print_sys_idata){ + .metriclist = metriclist, + .metricgroups = metricgroups, + .filter = filter, + .raw = raw, + .details = details, + .groups = &groups, + }, + }; + + pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + } + if (!filter || !rblist__empty(&groups)) { if (metricgroups && !raw) printf("\nMetric Groups:\n\n"); -- GitLab From be335ec28efa89d6bff8f4c6ce8daba88acf2b1a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:15 +0800 Subject: [PATCH 0717/1894] perf metricgroup: Support adding metrics for system PMUs Currently adding metrics for core- or uncore-based events matched by CPUID is supported. Extend this for system events. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-10-git-send-email-john.garry@huawei.com [ Reorder 'struct metricgroup_add_iter_data' field to avoid alignment holes ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 66 +++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6344e9627ff39..ee94d3e8dd654 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -415,6 +415,12 @@ static bool match_metric(const char *n, const char *list) return false; } +static bool match_pe_metric(struct pmu_event *pe, const char *metric) +{ + return match_metric(pe->metric_group, metric) || + match_metric(pe->metric_name, metric); +} + struct mep { struct rb_node nd; const char *name; @@ -757,6 +763,16 @@ int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) return 1; } +struct metricgroup_add_iter_data { + struct list_head *metric_list; + const char *metric; + struct metric **m; + struct expr_ids *ids; + int *ret; + bool *has_match; + bool metric_no_group; +}; + static int __add_metric(struct list_head *metric_list, struct pmu_event *pe, bool metric_no_group, @@ -866,10 +882,11 @@ static int __add_metric(struct list_head *metric_list, return 0; } -#define map_for_each_event(__pe, __idx, __map) \ - for (__idx = 0, __pe = &__map->table[__idx]; \ - __pe->name || __pe->metric_group || __pe->metric_name; \ - __pe = &__map->table[++__idx]) +#define map_for_each_event(__pe, __idx, __map) \ + if (__map) \ + for (__idx = 0, __pe = &__map->table[__idx]; \ + __pe->name || __pe->metric_group || __pe->metric_name; \ + __pe = &__map->table[++__idx]) #define map_for_each_metric(__pe, __idx, __map, __metric) \ map_for_each_event(__pe, __idx, __map) \ @@ -1037,6 +1054,29 @@ static int add_metric(struct list_head *metric_list, return ret; } +static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, + void *data) +{ + struct metricgroup_add_iter_data *d = data; + int ret; + + if (!match_pe_metric(pe, d->metric)) + return 0; + + ret = add_metric(d->metric_list, pe, d->metric_no_group, d->m, NULL, d->ids); + if (ret) + return ret; + + ret = resolve_metric(d->metric_no_group, + d->metric_list, NULL, d->ids); + if (ret) + return ret; + + *(d->has_match) = true; + + return *d->ret; +} + static int metricgroup__add_metric(const char *metric, bool metric_no_group, struct strbuf *events, struct list_head *metric_list, @@ -1067,6 +1107,22 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, goto out; } + { + struct metricgroup_iter_data data = { + .fn = metricgroup__add_metric_sys_event_iter, + .data = (void *) &(struct metricgroup_add_iter_data) { + .metric_list = &list, + .metric = metric, + .metric_no_group = metric_no_group, + .m = &m, + .ids = &ids, + .has_match = &has_match, + .ret = &ret, + }, + }; + + pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + } /* End of pmu events. */ if (!has_match) { ret = -EINVAL; @@ -1193,8 +1249,6 @@ int metricgroup__parse_groups(const struct option *opt, struct evlist *perf_evlist = *(struct evlist **)opt->value; struct pmu_events_map *map = perf_pmu__find_map(NULL); - if (!map) - return 0; return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); -- GitLab From e15a536521ed7f48fac268152a78e6e2f99102d2 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 4 Dec 2020 19:10:16 +0800 Subject: [PATCH 0718/1894] perf vendor events: Add JSON metrics for imx8mm DDR Perf Add JSON metrics for imx8mm DDR Perf. Signed-off-by: Joakim Zhang Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Signed-off-by: John Garry Link: http://lore.kernel.org/lkml/1607080216-36968-11-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/freescale/imx8mm/sys/ddrc.json | 39 +++++++++++++++++++ .../arm64/freescale/imx8mm/sys/metrics.json | 18 +++++++++ tools/perf/pmu-events/jevents.c | 2 + 3 files changed, 59 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json new file mode 100644 index 0000000000000..3b1cd708f568c --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json @@ -0,0 +1,39 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx8mm_ddr.cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr read-cycles event", + "EventCode": "0x2a", + "EventName": "imx8mm_ddr.read_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr write-cycles event", + "EventCode": "0x2b", + "EventName": "imx8mm_ddr.write_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr read event", + "EventCode": "0x35", + "EventName": "imx8mm_ddr.read", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr write event", + "EventCode": "0x38", + "EventName": "imx8mm_ddr.write", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + } +] + + diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json new file mode 100644 index 0000000000000..8e553b67cae63 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "bytes all masters read from ddr based on read-cycles event", + "MetricName": "imx8mm_ddr_read.all", + "MetricExpr": "imx8mm_ddr.read_cycles * 4 * 4", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "bytes all masters write to ddr based on write-cycles event", + "MetricName": "imx8mm_ddr_write.all", + "MetricExpr": "imx8mm_ddr.write_cycles * 4 * 4", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + } +] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 214975c819ffb..e1f3f5c8c550c 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -281,6 +281,8 @@ static struct map { { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, { "hisi_sccl,hha", "hisi_sccl,hha" }, { "hisi_sccl,l3c", "hisi_sccl,l3c" }, + /* it's not realistic to keep adding these, we need something more scalable ... */ + { "imx8_ddr", "imx8_ddr" }, { "L3PMC", "amd_l3" }, { "DFPMC", "amd_df" }, {} -- GitLab From 03de8656c7778c5434cc2ca8e6b4699c1176c090 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Dec 2020 15:13:01 +0900 Subject: [PATCH 0719/1894] perf report: Support --header-only for pipe mode The --header-only checks file header and prints the feature data. But as pipe mode doesn't have it in the header it prints almost nothing. Change it to process first few records until it founds HEADER_FEATURE. Before: $ perf record -o- true | perf report -i- --header-only # ======== # captured on : Thu Dec 10 14:34:59 2020 # header version : 1 # data offset : 0 # data size : 0 # feat offset : 0 # ======== # After: $ perf record -o- true | perf report -i- --header-only # ======== # captured on : Thu Dec 10 14:49:11 2020 # header version : 1 # data offset : 0 # data size : 0 # feat offset : 0 # ======== # # hostname : balhae # os release : 5.7.17-1xxx # perf version : 5.10.rc6.gdb0ea13cc741 # arch : x86_64 # nrcpus online : 8 # nrcpus avail : 8 # cpudesc : Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz # cpuid : GenuineIntel,6,142,12 # total memory : 16158916 kB # cmdline : perf record -o- true # event : name = cycles, , id = { 81, 82, 83, 84, 85, 86, 87, 88 }, size = 120, ... # CPU_TOPOLOGY info available, use -I to display # NUMA_TOPOLOGY info available, use -I to display # pmu mappings: intel_pt = 9, intel_bts = 8, software = 1, power = 20, uprobe = 7, ... # time of first sample : 0.000000 # time of last sample : 0.000000 # sample duration : 0.000 ms # MEM_TOPOLOGY info available, use -I to display # cpu pmu capabilities: branches=32, max_precise=3, pmu_name=skylake Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20201210061302.88213-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5efbd0602f175..2a845d6cac09f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -226,6 +226,8 @@ static int process_feature_event(struct perf_session *session, pr_err("failed: wrong feature ID: %" PRI_lu64 "\n", event->feat.feat_id); return -1; + } else if (rep->header_only) { + session_done = 1; } /* @@ -1512,6 +1514,13 @@ repeat: perf_session__fprintf_info(session, stdout, report.show_full_info); if (report.header_only) { + if (data.is_pipe) { + /* + * we need to process first few records + * which contains PERF_RECORD_HEADER_FEATURE. + */ + perf_session__process_events(session); + } ret = 0; goto error; } -- GitLab From 96aea4daa6cb893d339d80ce14727e6421991d8b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Dec 2020 15:13:02 +0900 Subject: [PATCH 0720/1894] perf evlist: Support pipe mode display Likewise, perf evlist command should print event attributes by reading PERF_RECORD_HEADER_ATTR records. Before: $ perf record -o- true | ./perf evlist -i- (prints nothing) After: $ perf record -o- true | ./perf evlist -i- cycles:pppH Committer testing: Verbose mode also works as expected: $ perf record -o- true | perf evlist -i- cycles:uhH $ perf record -o- true | perf evlist -vi- cycles:uhH: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|PERIOD, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1 $ Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20201210061302.88213-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-evlist.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 98e9928012519..4617b32c9c978 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -17,6 +17,14 @@ #include "util/data.h" #include "util/debug.h" #include +#include "util/tool.h" + +static int process_header_feature(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + session_done = 1; + return 0; +} static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { @@ -27,12 +35,20 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details .mode = PERF_DATA_MODE_READ, .force = details->force, }; + struct perf_tool tool = { + /* only needed for pipe mode */ + .attr = perf_event__process_attr, + .feature = process_header_feature, + }; bool has_tracepoint = false; - session = perf_session__new(&data, 0, NULL); + session = perf_session__new(&data, 0, &tool); if (IS_ERR(session)) return PTR_ERR(session); + if (data.is_pipe) + perf_session__process_events(session); + evlist__for_each_entry(session->evlist, pos) { evsel__fprintf(pos, details, stdout); -- GitLab From 7cfcd1e016cce5a72b4b86a3882eb80565430f82 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Dec 2020 21:43:28 +0100 Subject: [PATCH 0721/1894] perf tools: Add evlist__disable_evsel/evlist__enable_evsel Adding interface to enable/disable single event in the evlist based on its name. It will be used later in new control enable/disable interface. Keeping the evlist::enabled true when one or more events are enabled so the toggle can work properly and toggle evlist to disabled state. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Acked-by: Alexei Budankov Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20201210204330.233864-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 69 ++++++++++++++++++++++++++++++++++++++-- tools/perf/util/evlist.h | 2 ++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 89286f148012f..05363a7247c41 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -381,7 +381,30 @@ bool evsel__cpu_iter_skip(struct evsel *ev, int cpu) return true; } -void evlist__disable(struct evlist *evlist) +static int evsel__strcmp(struct evsel *pos, char *evsel_name) +{ + if (!evsel_name) + return 0; + if (evsel__is_dummy_event(pos)) + return 1; + return strcmp(pos->name, evsel_name); +} + +static int evlist__is_enabled(struct evlist *evlist) +{ + struct evsel *pos; + + evlist__for_each_entry(evlist, pos) { + if (!evsel__is_group_leader(pos) || !pos->core.fd) + continue; + /* If at least one event is enabled, evlist is enabled. */ + if (!pos->disabled) + return true; + } + return false; +} + +static void __evlist__disable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; struct affinity affinity; @@ -397,6 +420,8 @@ void evlist__disable(struct evlist *evlist) affinity__set(&affinity, cpu); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (evsel__cpu_iter_skip(pos, cpu)) continue; if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) @@ -414,15 +439,34 @@ void evlist__disable(struct evlist *evlist) affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = true; } - evlist->enabled = false; + /* + * If we disabled only single event, we need to check + * the enabled state of the evlist manually. + */ + if (evsel_name) + evlist->enabled = evlist__is_enabled(evlist); + else + evlist->enabled = false; +} + +void evlist__disable(struct evlist *evlist) +{ + __evlist__disable(evlist, NULL); +} + +void evlist__disable_evsel(struct evlist *evlist, char *evsel_name) +{ + __evlist__disable(evlist, evsel_name); } -void evlist__enable(struct evlist *evlist) +static void __evlist__enable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; struct affinity affinity; @@ -435,6 +479,8 @@ void evlist__enable(struct evlist *evlist) affinity__set(&affinity, cpu); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (evsel__cpu_iter_skip(pos, cpu)) continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) @@ -444,14 +490,31 @@ void evlist__enable(struct evlist *evlist) } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = false; } + /* + * Even single event sets the 'enabled' for evlist, + * so the toggle can work properly and toggle to + * 'disabled' state. + */ evlist->enabled = true; } +void evlist__enable(struct evlist *evlist) +{ + __evlist__enable(evlist, NULL); +} + +void evlist__enable_evsel(struct evlist *evlist, char *evsel_name) +{ + __evlist__enable(evlist, evsel_name); +} + void evlist__toggle_enable(struct evlist *evlist) { (evlist->enabled ? evlist__disable : evlist__enable)(evlist); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 9b0c795736bb5..1aae75895dea0 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -186,6 +186,8 @@ size_t evlist__mmap_size(unsigned long pages); void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); void evlist__toggle_enable(struct evlist *evlist); +void evlist__disable_evsel(struct evlist *evlist, char *evsel_name); +void evlist__enable_evsel(struct evlist *evlist, char *evsel_name); int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); -- GitLab From 8abceacff87d2fbb8e50e841d410e4808725151b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 12 Dec 2020 11:43:51 +0100 Subject: [PATCH 0722/1894] perf debug: Add debug_set_file function Allow to set debug output file via new debug_set_file function. It's called during perf startup in perf_debug_setup to set stderr file as default and any perf command can set it later to different file. It will be used in perf daemon command to get verbose output into log file. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201212104358.412065-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 9 ++++++++- tools/perf/util/debug.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 5cda5565777a0..50fd6a4be4e01 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -30,6 +30,12 @@ bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; +static FILE *debug_file; + +void debug_set_file(FILE *file) +{ + debug_file = file; +} int veprintf(int level, int var, const char *fmt, va_list args) { @@ -39,7 +45,7 @@ int veprintf(int level, int var, const char *fmt, va_list args) if (use_browser >= 1 && !redirect_to_stderr) ui_helpline__vshow(fmt, args); else - ret = vfprintf(stderr, fmt, args); + ret = vfprintf(debug_file, fmt, args); } return ret; @@ -227,6 +233,7 @@ DEBUG_WRAPPER(debug, 1); void perf_debug_setup(void) { + debug_set_file(stderr); libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f1734abd98dd0..43f7122956455 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -5,6 +5,7 @@ #include #include +#include #include extern int verbose; @@ -62,6 +63,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5) int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +void debug_set_file(FILE *file); void perf_debug_setup(void); int perf_quiet_option(void); -- GitLab From 47dce51acc330eefef5ea876f7707585b402282a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:48 +0100 Subject: [PATCH 0723/1894] perf tools: Add support to read build id from compressed elf Adding support to decompress file before reading build id. Adding filename__read_build_id and change its current versions to read_build_id. Shutting down stderr output of perf list in the shell test: 82: Check open filename arg using perf trace + vfs_getname : Ok because with decompression code in the place we the filename__read_build_id function is more verbose in case of error and the test did not account for that. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../tests/shell/trace+probe_vfs_getname.sh | 2 +- tools/perf/util/symbol-elf.c | 37 ++++++++++++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 11cc2af13f2bb..3d31c1d560d60 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -20,7 +20,7 @@ skip_if_no_perf_trace || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 44dd86a4f25f2..f3577f7d72fee 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -534,7 +534,7 @@ out: #ifdef HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); int err = -1; @@ -563,7 +563,7 @@ out_close: #else // HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); int fd, err = -1; @@ -595,6 +595,39 @@ out: #endif // HAVE_LIBBFD_BUILDID_SUPPORT +int filename__read_build_id(const char *filename, struct build_id *bid) +{ + struct kmod_path m = { .name = NULL, }; + char path[PATH_MAX]; + int err; + + if (!filename) + return -EFAULT; + + err = kmod_path__parse(&m, filename); + if (err) + return -1; + + if (m.comp) { + int error = 0, fd; + + fd = filename__decompress(filename, path, sizeof(path), m.comp, &error); + if (fd < 0) { + pr_debug("Failed to decompress (error %d) %s\n", + error, filename); + return -1; + } + close(fd); + filename = path; + } + + err = read_build_id(filename, bid); + + if (m.comp) + unlink(filename); + return err; +} + int sysfs__read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); -- GitLab From dc67d1920417140052976f3377fd216b87a50aad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Dec 2020 12:45:10 -0300 Subject: [PATCH 0724/1894] perf test: Make sample-parsing test aware of PERF_SAMPLE_{CODE,DATA}_PAGE_SIZE To fix this: $ perf test -v 27 27: Sample parsing : --- start --- test child forked, pid 586013 sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating test child finished with -1 ---- end ---- Sample parsing: FAILED! $ This patchset is still not completely merged, so when adding the PERF_SAMPLE_CODE_PAGE_SIZE to 'struct perf_sample' we need to add the bits added in this patch for 'perf_sample.data_page_size'. Fixes: 251cc77b8176de37 ("tools headers UAPI: Update tools's copy of linux/perf_event.h") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index a0bdaf390ac8e..2393916f6128a 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -154,6 +154,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_CGROUP) COMP(cgroup); + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) + COMP(data_page_size); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -234,6 +237,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) }, .phys_addr = 113, .cgroup = 114, + .data_page_size = 115, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, @@ -340,7 +344,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_CODE_PAGE_SIZE << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } -- GitLab From 526671bfc47df175eb87f96067d51b389a8af50d Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Wed, 16 Dec 2020 12:13:17 -0500 Subject: [PATCH 0725/1894] perf config: Fix example command in manpage to conform to syntax specified in the SYNOPSIS section. Committer testing: With the previously documented example: $ perf config --user report sort-order=srcline The config variable does not contain a section name: report $ With the fixed example line: $ perf config --user report.sort-order=srcline $ perf config --user report.sort-order report.sort-order=srcline $ Signed-off-by: Nick Thompson Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/linux-perf-users/20201217142619.GA14524@redhat.com/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 31069d8a53046..5c379adf83043 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -138,7 +138,7 @@ If you want to add or modify several config items, you can do like To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do - % perf config --user report sort-order=srcline + % perf config --user report.sort-order=srcline To change colors of selected line to other foreground and background colors in system config file (i.e. `$(sysconf)/perfconfig`), do -- GitLab From feca8a8342d3f53e394c9fc7d985b98ec0250ce1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Dec 2020 09:39:11 +0100 Subject: [PATCH 0726/1894] perf tools: Reformat record's control fd man text Adding available control commands in separate paragraph, so it's more readable and easier to add new commands. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201216083914.47215-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 2d30e525a6003..34cf651ee237a 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -637,11 +637,17 @@ endif::HAVE_LIBPFM[] --control=fifo:ctl-fifo[,ack-fifo]:: --control=fd:ctl-fd[,ack-fd]:: ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. -Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, -'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be -started with events disabled using --delay=-1 option. Optionally send control command -completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. -Example of bash shell script to enable and disable events during measurements: +Listen on ctl-fd descriptor for command to control measurement. + +Available commands: + 'enable' : enable events + 'disable' : disable events + 'snapshot': AUX area tracing snapshot). + +Measurements can be started with events disabled using --delay=-1 option. Optionally +send control command completion ('ack\n') to ack-fd descriptor to synchronize with the +controlling process. Example of bash shell script to enable and disable events during +measurements: #!/bin/bash -- GitLab From 4262f8c3efa1e79bd5950437a3eea58eeb4c1c70 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Dec 2020 09:59:00 -0300 Subject: [PATCH 0727/1894] tools headers: Syncronize linux/build_bug.h with the kernel sources To pick up the changes in: 14dc3983b5dff513 ("kbuild: avoid static_assert for genksyms") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/build_bug.h' differs from latest version at 'include/linux/build_bug.h' diff -u tools/include/linux/build_bug.h include/linux/build_bug.h Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/build_bug.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index cc7070c7439ba..ce365d2127682 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,4 +79,9 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert +#ifdef __GENKSYMS__ +/* genksyms gets confused by _Static_assert */ +#define _Static_assert(expr, ...) +#endif + #endif /* _LINUX_BUILD_BUG_H */ -- GitLab From 1c28a05d1a972594164efc7fcffda416c5d6ab02 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Dec 2020 09:13:16 -0300 Subject: [PATCH 0728/1894] tools headers UAPI: Sync linux/stat.h with the kernel sources To pick the changes in: 72d1249e2ffdbc34 ("uapi: fix statx attribute value overlap for DAX & MOUNT_ROOT") That don't cause any change in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/stat.h' differs from latest version at 'include/uapi/linux/stat.h' diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h Cc: Adrian Hunter Cc: Eric Sandeen Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/stat.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 82cc58fe93681..1500a0f58041a 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -171,9 +171,12 @@ struct statx { * be of use to ordinary userspace programs such as GUIs or ls rather than * specialised tools. * - * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags * semantically. Where possible, the numerical value is picked to correspond - * also. + * also. Note that the DAX attribute indicates that the file is in the CPU + * direct access state. It does not correspond to the per-inode flag that + * some filesystems support. + * */ #define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ #define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ @@ -183,7 +186,7 @@ struct statx { #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ -#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */ +#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ #endif /* _UAPI_LINUX_STAT_H */ -- GitLab From cca415537244f6102cbb09b5b90db6ae2c953bdd Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 7 Nov 2020 23:58:18 +0800 Subject: [PATCH 0729/1894] ext4: fix a memory leak of ext4_free_data When freeing metadata, we will create an ext4_free_data and insert it into the pending free list. After the current transaction is committed, the object will be freed. ext4_mb_free_metadata() will check whether the area to be freed overlaps with the pending free list. If true, return directly. At this time, ext4_free_data is leaked. Fortunately, the probability of this problem is small, since it only occurs if the file system is corrupted such that a block is claimed by more one inode and those inodes are deleted within a single jbd2 transaction. Signed-off-by: Chunguang Xu Link: https://lore.kernel.org/r/1604764698-4269-8-git-send-email-brookxu@tencent.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/mballoc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 29dfeb043050c..77815cd110b2e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5103,6 +5103,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, cluster), "Block already on to-be-freed list"); + kmem_cache_free(ext4_free_data_cachep, new_entry); return 0; } } -- GitLab From c9200760da8a728eb9767ca41a956764b28c1310 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 9 Dec 2020 15:59:11 -0500 Subject: [PATCH 0730/1894] ext4: check for invalid block size early when mounting a file system Check for valid block size directly by validating s_log_block_size; we were doing this in two places. First, by calculating blocksize via BLOCK_SIZE << s_log_block_size, and then checking that the blocksize was valid. And then secondly, by checking s_log_block_size directly. The first check is not reliable, and can trigger an UBSAN warning if s_log_block_size on a maliciously corrupted superblock is greater than 22. This is harmless, since the second test will correctly reject the maliciously fuzzed file system, but to make syzbot shut up, and because the two checks are duplicative in any case, delete the blocksize check, and move the s_log_block_size earlier in ext4_fill_super(). Signed-off-by: Theodore Ts'o Reported-by: syzbot+345b75652b1d24227443@syzkaller.appspotmail.com --- fs/ext4/super.c | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f86220a8df50a..4a16bbf0432ce 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4202,18 +4202,25 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - - if (blocksize == PAGE_SIZE) - set_opt(sb, DIOREAD_NOLOCK); - - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } + + blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + + if (blocksize == PAGE_SIZE) + set_opt(sb, DIOREAD_NOLOCK); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; @@ -4432,21 +4439,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - if (le32_to_cpu(es->s_log_block_size) > - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log block size: %u", - le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } - if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, "Number of reserved GDT blocks insanely large: %d", -- GitLab From bc18546bf68e47996a359d2533168d5770a22024 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 Oct 2020 14:22:32 +0300 Subject: [PATCH 0731/1894] ext4: fix an IS_ERR() vs NULL check The ext4_find_extent() function never returns NULL, it returns error pointers. Fixes: 44059e503b03 ("ext4: fast commit recovery path") Signed-off-by: Dan Carpenter Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201023112232.GB282278@mwanda Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 90ba74c146949..3960b7ec3ab77 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5815,8 +5815,8 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, int ret; path = ext4_find_extent(inode, start, NULL, 0); - if (!path) - return -EINVAL; + if (IS_ERR(path)) + return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) { ret = -EFSCORRUPTED; -- GitLab From 03505c58b86a5ca9bff2a9d611c2fe95dc14f707 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 12 Nov 2020 14:56:42 +0800 Subject: [PATCH 0732/1894] ext4: remove the unused EXT4_CURRENT_REV macro There are no callers of the EXT4_CURRENT_REV macro, so remove it. Signed-off-by: Kaixu Xia Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1605164202-31120-1-git-send-email-kaixuxia@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cd95965be3d17..264d3a092c7d2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1868,7 +1868,6 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ #define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ -#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV #define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV #define EXT4_GOOD_OLD_INODE_SIZE 128 -- GitLab From b1b7dce3f09b460da38946d1845f3076daa36abb Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 19 Nov 2020 15:28:22 -0800 Subject: [PATCH 0733/1894] ext4: add docs about fast commit idempotence Fast commit on-disk format is designed such that the replay of these tags can be idempotent. This patch adds documentation in the code in form of comments and in form kernel docs that describes these characteristics. This patch also adds a TODO item needed to ensure kernel fast commit replay idempotence. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201119232822.1860882-1-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4/journal.rst | 50 ++++++++++++++++++ fs/ext4/fast_commit.c | 61 ++++++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst index 849d5b119eb8b..cdbfec473167a 100644 --- a/Documentation/filesystems/ext4/journal.rst +++ b/Documentation/filesystems/ext4/journal.rst @@ -681,3 +681,53 @@ Here is the list of supported tags and their meanings: - Stores the TID of the commit, CRC of the fast commit of which this tag represents the end of +Fast Commit Replay Idempotence +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Fast commits tags are idempotent in nature provided the recovery code follows +certain rules. The guiding principle that the commit path follows while +committing is that it stores the result of a particular operation instead of +storing the procedure. + +Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' +was associated with inode 10. During fast commit, instead of storing this +operation as a procedure "rename a to b", we store the resulting file system +state as a "series" of outcomes: + +- Link dirent b to inode 10 +- Unlink dirent a +- Inode 10 with valid refcount + +Now when recovery code runs, it needs "enforce" this state on the file +system. This is what guarantees idempotence of fast commit replay. + +Let's take an example of a procedure that is not idempotent and see how fast +commits make it idempotent. Consider following sequence of operations: + +1) rm A +2) mv B A +3) read A + +If we store this sequence of operations as is then the replay is not idempotent. +Let's say while in replay, we crash after (2). During the second replay, +file A (which was actually created as a result of "mv B A" operation) would get +deleted. Thus, file named A would be absent when we try to read A. So, this +sequence of operations is not idempotent. However, as mentioned above, instead +of storing the procedure fast commits store the outcome of each procedure. Thus +the fast commit log for above procedure would be as follows: + +(Let's assume dirent A was linked to inode 10 and dirent B was linked to +inode 11 before the replay) + +1) Unlink A +2) Link A to inode 11 +3) Unlink B +4) Inode 11 + +If we crash after (3) we will have file A linked to inode 11. During the second +replay, we will remove file A (inode 11). But we will create it back and make +it point to inode 11. We won't find B, so we'll just skip that step. At this +point, the refcount for inode 11 is not reliable, but that gets fixed by the +replay of last inode 11 tag. Thus, by converting a non-idempotent procedure +into a series of idempotent outcomes, fast commits ensured idempotence during +the replay. diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f2033e13a273c..b4bc8bf307c92 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -103,8 +103,69 @@ * * Replay code should thus check for all the valid tails in the FC area. * + * Fast Commit Replay Idempotence + * ------------------------------ + * + * Fast commits tags are idempotent in nature provided the recovery code follows + * certain rules. The guiding principle that the commit path follows while + * committing is that it stores the result of a particular operation instead of + * storing the procedure. + * + * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' + * was associated with inode 10. During fast commit, instead of storing this + * operation as a procedure "rename a to b", we store the resulting file system + * state as a "series" of outcomes: + * + * - Link dirent b to inode 10 + * - Unlink dirent a + * - Inode <10> with valid refcount + * + * Now when recovery code runs, it needs "enforce" this state on the file + * system. This is what guarantees idempotence of fast commit replay. + * + * Let's take an example of a procedure that is not idempotent and see how fast + * commits make it idempotent. Consider following sequence of operations: + * + * rm A; mv B A; read A + * (x) (y) (z) + * + * (x), (y) and (z) are the points at which we can crash. If we store this + * sequence of operations as is then the replay is not idempotent. Let's say + * while in replay, we crash at (z). During the second replay, file A (which was + * actually created as a result of "mv B A" operation) would get deleted. Thus, + * file named A would be absent when we try to read A. So, this sequence of + * operations is not idempotent. However, as mentioned above, instead of storing + * the procedure fast commits store the outcome of each procedure. Thus the fast + * commit log for above procedure would be as follows: + * + * (Let's assume dirent A was linked to inode 10 and dirent B was linked to + * inode 11 before the replay) + * + * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] + * (w) (x) (y) (z) + * + * If we crash at (z), we will have file A linked to inode 11. During the second + * replay, we will remove file A (inode 11). But we will create it back and make + * it point to inode 11. We won't find B, so we'll just skip that step. At this + * point, the refcount for inode 11 is not reliable, but that gets fixed by the + * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled + * similarly. Thus, by converting a non-idempotent procedure into a series of + * idempotent outcomes, fast commits ensured idempotence during the replay. + * * TODOs * ----- + * + * 0) Fast commit replay path hardening: Fast commit replay code should use + * journal handles to make sure all the updates it does during the replay + * path are atomic. With that if we crash during fast commit replay, after + * trying to do recovery again, we will find a file system where fast commit + * area is invalid (because new full commit would be found). In order to deal + * with that, fast commit replay code should ensure that the "FC_REPLAY" + * superblock state is persisted before starting the replay, so that after + * the crash, fast commit recovery code can look at that flag and perform + * fast commit recovery even if that area is invalidated by later full + * commits. + * * 1) Make fast commit atomic updates more fine grained. Today, a fast commit * eligible update must be protected within ext4_fc_start_update() and * ext4_fc_stop_update(). These routines are called at much higher -- GitLab From 5a150bdec7dc79ad88e61cdf8c13106dd878311e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Nov 2020 12:28:32 -0600 Subject: [PATCH 0734/1894] ext4: fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by explicitly adding a break statement instead of just letting the code fall through to the next case. Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/03497331f088a938d7a728e7a689bd7953139429.1605896059.git.gustavoars@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4a16bbf0432ce..872d45a131ca1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4878,6 +4878,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "requested data journaling mode"); goto failed_mount_wq; } + break; default: break; } -- GitLab From 941ba122ca56756aad82db21d28f283ad33b8dee Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 20 Nov 2020 12:22:31 -0800 Subject: [PATCH 0735/1894] ext4: make fast_commit.h byte identical with e2fsprogs/fast_commit.h This patch makes fast_commit.h byte by byte identical with e2fsprogs/fast_commit.h. This will help us ensure that there are no on-disk format inconsistencies between e2fsck and kernel ext4. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201120202232.2240293-1-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 38 --------------------- fs/ext4/fast_commit.h | 78 +++++++++++++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 55 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index b4bc8bf307c92..4fcc21c25e793 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1281,18 +1281,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full) /* Ext4 Replay Path Routines */ -/* Get length of a particular tlv */ -static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) -{ - return le16_to_cpu(tl->fc_len); -} - -/* Get a pointer to "value" of a tlv */ -static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) -{ - return (u8 *)tl + sizeof(*tl); -} - /* Helper struct for dentry replay routines */ struct dentry_info_args { int parent_ino, dname_len, ino, inode_len; @@ -1831,32 +1819,6 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) return 0; } -static inline const char *tag2str(u16 tag) -{ - switch (tag) { - case EXT4_FC_TAG_LINK: - return "TAG_ADD_ENTRY"; - case EXT4_FC_TAG_UNLINK: - return "TAG_DEL_ENTRY"; - case EXT4_FC_TAG_ADD_RANGE: - return "TAG_ADD_RANGE"; - case EXT4_FC_TAG_CREAT: - return "TAG_CREAT_DENTRY"; - case EXT4_FC_TAG_DEL_RANGE: - return "TAG_DEL_RANGE"; - case EXT4_FC_TAG_INODE: - return "TAG_INODE"; - case EXT4_FC_TAG_PAD: - return "TAG_PAD"; - case EXT4_FC_TAG_TAIL: - return "TAG_TAIL"; - case EXT4_FC_TAG_HEAD: - return "TAG_HEAD"; - default: - return "TAG_ERROR"; - } -} - static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) { struct ext4_fc_replay_state *state; diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 3a6e5a1fa1b80..b77f70f55a622 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -3,6 +3,11 @@ #ifndef __FAST_COMMIT_H__ #define __FAST_COMMIT_H__ +/* + * Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and + * linux/fs/ext4/fast_commit.h. These file should always be byte identical. + */ + /* Fast commit tags */ #define EXT4_FC_TAG_ADD_RANGE 0x0001 #define EXT4_FC_TAG_DEL_RANGE 0x0002 @@ -50,7 +55,7 @@ struct ext4_fc_del_range { struct ext4_fc_dentry_info { __le32 fc_parent_ino; __le32 fc_ino; - u8 fc_dname[0]; + __u8 fc_dname[0]; }; /* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */ @@ -65,19 +70,6 @@ struct ext4_fc_tail { __le32 fc_crc; }; -/* - * In memory list of dentry updates that are performed on the file - * system used by fast commit code. - */ -struct ext4_fc_dentry_update { - int fcd_op; /* Type of update create / unlink / link */ - int fcd_parent; /* Parent inode number */ - int fcd_ino; /* Inode number */ - struct qstr fcd_name; /* Dirent name */ - unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ - struct list_head fcd_list; -}; - /* * Fast commit reason codes */ @@ -107,6 +99,20 @@ enum { EXT4_FC_REASON_MAX }; +#ifdef __KERNEL__ +/* + * In memory list of dentry updates that are performed on the file + * system used by fast commit code. + */ +struct ext4_fc_dentry_update { + int fcd_op; /* Type of update create / unlink / link */ + int fcd_parent; /* Parent inode number */ + int fcd_ino; /* Inode number */ + struct qstr fcd_name; /* Dirent name */ + unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ + struct list_head fcd_list; +}; + struct ext4_fc_stats { unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX]; unsigned long fc_num_commits; @@ -145,13 +151,51 @@ struct ext4_fc_replay_state { }; #define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1) +#endif #define fc_for_each_tl(__start, __end, __tl) \ - for (tl = (struct ext4_fc_tl *)start; \ - (u8 *)tl < (u8 *)end; \ - tl = (struct ext4_fc_tl *)((u8 *)tl + \ + for (tl = (struct ext4_fc_tl *)(__start); \ + (__u8 *)tl < (__u8 *)(__end); \ + tl = (struct ext4_fc_tl *)((__u8 *)tl + \ sizeof(struct ext4_fc_tl) + \ + le16_to_cpu(tl->fc_len))) +static inline const char *tag2str(__u16 tag) +{ + switch (tag) { + case EXT4_FC_TAG_LINK: + return "ADD_ENTRY"; + case EXT4_FC_TAG_UNLINK: + return "DEL_ENTRY"; + case EXT4_FC_TAG_ADD_RANGE: + return "ADD_RANGE"; + case EXT4_FC_TAG_CREAT: + return "CREAT_DENTRY"; + case EXT4_FC_TAG_DEL_RANGE: + return "DEL_RANGE"; + case EXT4_FC_TAG_INODE: + return "INODE"; + case EXT4_FC_TAG_PAD: + return "PAD"; + case EXT4_FC_TAG_TAIL: + return "TAIL"; + case EXT4_FC_TAG_HEAD: + return "HEAD"; + default: + return "ERROR"; + } +} + +/* Get length of a particular tlv */ +static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) +{ + return le16_to_cpu(tl->fc_len); +} + +/* Get a pointer to "value" of a tlv */ +static inline __u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) +{ + return (__u8 *)tl + sizeof(*tl); +} #endif /* __FAST_COMMIT_H__ */ -- GitLab From 9bd23c31f392bda88618008f27fd52ee9e0fac38 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 20 Nov 2020 12:22:32 -0800 Subject: [PATCH 0736/1894] jbd2: add a helper to find out number of fast commit blocks Add a helper to read number of fast commit blocks from jbd2 superblock and also rename the JBD2_MIN_FC_BLKS to JBD2_DEFAULT_FAST_COMMIT_BLOCKS since this constant is just the default number of fast commit blocks to use in case number of fast commit blocks isn't set in jbd2 superblock. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201120202232.2240293-2-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 8 ++------ include/linux/jbd2.h | 9 ++++++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 188f79d769881..2dc9444428028 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1869,9 +1869,7 @@ static int load_superblock(journal_t *journal) if (jbd2_has_feature_fast_commit(journal)) { journal->j_fc_last = be32_to_cpu(sb->s_maxlen); - num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks); - if (!num_fc_blocks) - num_fc_blocks = JBD2_MIN_FC_BLOCKS; + num_fc_blocks = jbd2_journal_get_num_fc_blks(sb); if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS) journal->j_last = journal->j_fc_last - num_fc_blocks; journal->j_fc_first = journal->j_last + 1; @@ -2102,9 +2100,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal) journal_superblock_t *sb = journal->j_superblock; unsigned long long num_fc_blks; - num_fc_blks = be32_to_cpu(sb->s_num_fc_blks); - if (num_fc_blks == 0) - num_fc_blks = JBD2_MIN_FC_BLOCKS; + num_fc_blks = jbd2_journal_get_num_fc_blks(sb); if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS) return -ENOSPC; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d2a4860feb72f..99d3cd051ac33 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,7 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 -#define JBD2_MIN_FC_BLOCKS 256 +#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256 #ifdef __KERNEL__ @@ -1692,6 +1692,13 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) return journal->j_chksum_driver != NULL; } +static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) +{ + int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks); + + return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS; +} + /* * Return number of free blocks in the log. Must be called under j_state_lock. */ -- GitLab From 46e294efc355c48d1dd4d58501aa56dac461792a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:06:49 +0100 Subject: [PATCH 0737/1894] ext4: fix deadlock with fs freezing and EA inodes Xattr code using inodes with large xattr data can end up dropping last inode reference (and thus deleting the inode) from places like ext4_xattr_set_entry(). That function is called with transaction started and so ext4_evict_inode() can deadlock against fs freezing like: CPU1 CPU2 removexattr() freeze_super() vfs_removexattr() ext4_xattr_set() handle = ext4_journal_start() ... ext4_xattr_set_entry() iput(old_ea_inode) ext4_evict_inode(old_ea_inode) sb->s_writers.frozen = SB_FREEZE_FS; sb_wait_write(sb, SB_FREEZE_FS); ext4_freeze() jbd2_journal_lock_updates() -> blocks waiting for all handles to stop sb_start_intwrite() -> blocks as sb is already in SB_FREEZE_FS state Generally it is advisable to delete inodes from a separate transaction as it can consume quite some credits however in this case it would be quite clumsy and furthermore the credits for inode deletion are quite limited and already accounted for. So just tweak ext4_evict_inode() to avoid freeze protection if we have transaction already started and thus it is not really needed anyway. Cc: stable@vger.kernel.org Fixes: dec214d00e0d ("ext4: xattr inode deduplication") Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201127110649.24730-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b147c2e20469b..6b44657fb3e48 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -175,6 +175,7 @@ void ext4_evict_inode(struct inode *inode) */ int extra_credits = 6; struct ext4_xattr_inode_array *ea_inode_array = NULL; + bool freeze_protected = false; trace_ext4_evict_inode(inode); @@ -232,9 +233,14 @@ void ext4_evict_inode(struct inode *inode) /* * Protect us against freezing - iput() caller didn't have to have any - * protection against it + * protection against it. When we are in a running transaction though, + * we are already protected against freezing and we cannot grab further + * protection due to lock ordering constraints. */ - sb_start_intwrite(inode->i_sb); + if (!ext4_journal_current_handle()) { + sb_start_intwrite(inode->i_sb); + freeze_protected = true; + } if (!IS_NOQUOTA(inode)) extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); @@ -253,7 +259,8 @@ void ext4_evict_inode(struct inode *inode) * cleaned up. */ ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); goto no_delete; } @@ -294,7 +301,8 @@ void ext4_evict_inode(struct inode *inode) stop_handle: ext4_journal_stop(handle); ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); goto no_delete; } @@ -323,7 +331,8 @@ stop_handle: else ext4_free_inode(handle, inode); ext4_journal_stop(handle); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); return; no_delete: -- GitLab From b08070eca9e247f60ab39d79b2c25d274750441f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:33:54 +0100 Subject: [PATCH 0738/1894] ext4: don't remount read-only with errors=continue on reboot ext4_handle_error() with errors=continue mount option can accidentally remount the filesystem read-only when the system is rebooting. Fix that. Fixes: 1dc1097ff60e ("ext4: avoid panic during forced reboot") Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Cc: stable@kernel.org Link: https://lore.kernel.org/r/20201127113405.26867-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 872d45a131ca1..3ef84e8ab1ae6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -666,19 +666,17 @@ static bool system_going_down(void) static void ext4_handle_error(struct super_block *sb) { + journal_t *journal = EXT4_SB(sb)->s_journal; + if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (sb_rdonly(sb)) + if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT)) return; - if (!test_opt(sb, ERRORS_CONT)) { - journal_t *journal = EXT4_SB(sb)->s_journal; - - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (journal) - jbd2_journal_abort(journal, -EIO); - } + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + if (journal) + jbd2_journal_abort(journal, -EIO); /* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already -- GitLab From 81414b4dd48f596bf33e1b32c2e43e2047150ca6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:33:55 +0100 Subject: [PATCH 0739/1894] ext4: remove redundant sb checksum recomputation Superblock is written out either through ext4_commit_super() or through ext4_handle_dirty_super(). In both cases we recompute the checksum so it is not necessary to recompute it after updating superblock free inodes & blocks counters. Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201127113405.26867-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3ef84e8ab1ae6..7bb516c9b2de0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5010,13 +5010,11 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); - ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeclusters_counter, block, GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); - ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } -- GitLab From 93c20bc3eafba52c134cf5183f18833b9bd22bf8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:33:56 +0100 Subject: [PATCH 0740/1894] ext4: standardize error message in ext4_protect_reserved_inode() We use __ext4_error() when ext4_protect_reserved_inode() finds filesystem corruption. However EXT4_ERROR_INODE_ERR() is perfectly capable of reporting all the needed information. So just use that. Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201127113405.26867-4-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index c13422a770e4f..4666b55b736ec 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -176,12 +176,10 @@ static int ext4_protect_reserved_inode(struct super_block *sb, err = add_system_zone(system_blks, map.m_pblk, n, ino); if (err < 0) { if (err == -EFSCORRUPTED) { - __ext4_error(sb, __func__, __LINE__, - -err, map.m_pblk, - "blocks %llu-%llu from inode %u overlap system zone", - map.m_pblk, - map.m_pblk + map.m_len - 1, - ino); + EXT4_ERROR_INODE_ERR(inode, -err, + "blocks %llu-%llu from inode overlap system zone", + map.m_pblk, + map.m_pblk + map.m_len - 1); } break; } -- GitLab From 014c9caa29d3a44e0de695c99ef18bec3e887d52 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:33:57 +0100 Subject: [PATCH 0741/1894] ext4: make ext4_abort() use __ext4_error() The only difference between __ext4_abort() and __ext4_error() is that the former one ignores errors=continue mount option. Unify the code to reduce duplication. Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201127113405.26867-5-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 29 +++++++--------- fs/ext4/ext4_jbd2.c | 4 +-- fs/ext4/inode.c | 2 +- fs/ext4/super.c | 84 ++++++++++++--------------------------------- 4 files changed, 37 insertions(+), 82 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 264d3a092c7d2..84c63b9b83106 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2961,9 +2961,9 @@ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, ext4_group_t block_group, unsigned int flags); -extern __printf(6, 7) -void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64, - const char *, ...); +extern __printf(7, 8) +void __ext4_error(struct super_block *, const char *, unsigned int, bool, + int, __u64, const char *, ...); extern __printf(6, 7) void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, int, const char *, ...); @@ -2972,9 +2972,6 @@ void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, const char *, ...); extern void __ext4_std_error(struct super_block *, const char *, unsigned int, int); -extern __printf(5, 6) -void __ext4_abort(struct super_block *, const char *, unsigned int, int, - const char *, ...); extern __printf(4, 5) void __ext4_warning(struct super_block *, const char *, unsigned int, const char *, ...); @@ -3004,6 +3001,9 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define EXT4_ERROR_FILE(file, block, fmt, a...) \ ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) +#define ext4_abort(sb, err, fmt, a...) \ + __ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a) + #ifdef CONFIG_PRINTK #define ext4_error_inode(inode, func, line, block, fmt, ...) \ @@ -3014,11 +3014,11 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define ext4_error_file(file, func, line, block, fmt, ...) \ __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) #define ext4_error(sb, fmt, ...) \ - __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__) + __ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \ + ##__VA_ARGS__) #define ext4_error_err(sb, err, fmt, ...) \ - __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__) -#define ext4_abort(sb, err, fmt, ...) \ - __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__) + __ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \ + ##__VA_ARGS__) #define ext4_warning(sb, fmt, ...) \ __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) #define ext4_warning_inode(inode, fmt, ...) \ @@ -3051,17 +3051,12 @@ do { \ #define ext4_error(sb, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error(sb, "", 0, 0, 0, " "); \ + __ext4_error(sb, "", 0, false, 0, 0, " "); \ } while (0) #define ext4_error_err(sb, err, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error(sb, "", 0, err, 0, " "); \ -} while (0) -#define ext4_abort(sb, err, fmt, ...) \ -do { \ - no_printk(fmt, ##__VA_ARGS__); \ - __ext4_abort(sb, "", 0, err, " "); \ + __ext4_error(sb, "", 0, false, err, 0, " "); \ } while (0) #define ext4_warning(sb, fmt, ...) \ do { \ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0fd0c42a4f7db..1a0a827a7f345 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -296,8 +296,8 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); - __ext4_abort(inode->i_sb, where, line, -err, - "error %d when attempting revoke", err); + __ext4_error(inode->i_sb, where, line, true, -err, 0, + "error %d when attempting revoke", err); } BUFFER_TRACE(bh, "exit"); return err; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6b44657fb3e48..ab786123a022e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4619,7 +4619,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { if (flags & EXT4_IGET_HANDLE) return ERR_PTR(-ESTALE); - __ext4_error(sb, function, line, EFSCORRUPTED, 0, + __ext4_error(sb, function, line, false, EFSCORRUPTED, 0, "inode #%lu: comm %s: iget: illegal inode #", ino, current->comm); return ERR_PTR(-EFSCORRUPTED); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7bb516c9b2de0..930396eb8e6e5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -662,16 +662,21 @@ static bool system_going_down(void) * We'll just use the jbd2_journal_abort() error code to record an error in * the journal instead. On recovery, the journal will complain about * that error until we've noted it down and cleared it. + * + * If force_ro is set, we unconditionally force the filesystem into an + * ABORT|READONLY state, unless the error response on the fs has been set to + * panic in which case we take the easy way out and panic immediately. This is + * used to deal with unrecoverable failures such as journal IO errors or ENOMEM + * at a critical moment in log management. */ - -static void ext4_handle_error(struct super_block *sb) +static void ext4_handle_error(struct super_block *sb, bool force_ro) { journal_t *journal = EXT4_SB(sb)->s_journal; if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT)) + if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) return; ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); @@ -682,18 +687,17 @@ static void ext4_handle_error(struct super_block *sb) * could panic during 'reboot -f' as the underlying device got already * disabled. */ - if (test_opt(sb, ERRORS_RO) || system_going_down()) { - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); - /* - * Make sure updated value of ->s_mount_flags will be visible - * before ->s_flags update - */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; - } else if (test_opt(sb, ERRORS_PANIC)) { + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); } + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); + /* + * Make sure updated value of ->s_mount_flags will be visible before + * ->s_flags update + */ + smp_wmb(); + sb->s_flags |= SB_RDONLY; } #define ext4_error_ratelimit(sb) \ @@ -701,7 +705,7 @@ static void ext4_handle_error(struct super_block *sb) "EXT4-fs error") void __ext4_error(struct super_block *sb, const char *function, - unsigned int line, int error, __u64 block, + unsigned int line, bool force_ro, int error, __u64 block, const char *fmt, ...) { struct va_format vaf; @@ -721,7 +725,7 @@ void __ext4_error(struct super_block *sb, const char *function, va_end(args); } save_error_info(sb, error, 0, block, function, line); - ext4_handle_error(sb); + ext4_handle_error(sb, force_ro); } void __ext4_error_inode(struct inode *inode, const char *function, @@ -753,7 +757,7 @@ void __ext4_error_inode(struct inode *inode, const char *function, } save_error_info(inode->i_sb, error, inode->i_ino, block, function, line); - ext4_handle_error(inode->i_sb); + ext4_handle_error(inode->i_sb, false); } void __ext4_error_file(struct file *file, const char *function, @@ -792,7 +796,7 @@ void __ext4_error_file(struct file *file, const char *function, } save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block, function, line); - ext4_handle_error(inode->i_sb); + ext4_handle_error(inode->i_sb, false); } const char *ext4_decode_error(struct super_block *sb, int errno, @@ -860,51 +864,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, } save_error_info(sb, -errno, 0, 0, function, line); - ext4_handle_error(sb); -} - -/* - * ext4_abort is a much stronger failure handler than ext4_error. The - * abort function may be used to deal with unrecoverable failures such - * as journal IO errors or ENOMEM at a critical moment in log management. - * - * We unconditionally force the filesystem into an ABORT|READONLY state, - * unless the error response on the fs has been set to panic in which - * case we take the easy way out and panic immediately. - */ - -void __ext4_abort(struct super_block *sb, const char *function, - unsigned int line, int error, const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) - return; - - save_error_info(sb, error, 0, 0, function, line); - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n", - sb->s_id, function, line, &vaf); - va_end(args); - - if (sb_rdonly(sb) == 0) { - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (EXT4_SB(sb)->s_journal) - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); - - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); - /* - * Make sure updated value of ->s_mount_flags will be visible - * before ->s_flags update - */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; - } - if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) - panic("EXT4-fs panic from previous error\n"); + ext4_handle_error(sb, false); } void __ext4_msg(struct super_block *sb, @@ -1007,7 +967,7 @@ __acquires(bitlock) ext4_unlock_group(sb, grp); ext4_commit_super(sb, 1); - ext4_handle_error(sb); + ext4_handle_error(sb, false); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the -- GitLab From 4067662388f97d0f360e568820d9d5bac6a3c9fa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:33:58 +0100 Subject: [PATCH 0742/1894] ext4: move functions in super.c Just move error info related functions in super.c close to ext4_handle_error(). We'll want to combine save_error_info() with ext4_handle_error() and this makes change more obvious and saves a forward declaration as well. No functional change. Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201127113405.26867-6-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 196 ++++++++++++++++++++++++------------------------ 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 930396eb8e6e5..88d19f354dc37 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -423,104 +423,6 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) #define ext4_get_tstamp(es, tstamp) \ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) -static void __save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - int err; - - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - if (bdev_read_only(sb->s_bdev)) - return; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_update_tstamp(es, s_last_error_time); - strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); - es->s_last_error_line = cpu_to_le32(line); - es->s_last_error_ino = cpu_to_le32(ino); - es->s_last_error_block = cpu_to_le64(block); - switch (error) { - case EIO: - err = EXT4_ERR_EIO; - break; - case ENOMEM: - err = EXT4_ERR_ENOMEM; - break; - case EFSBADCRC: - err = EXT4_ERR_EFSBADCRC; - break; - case 0: - case EFSCORRUPTED: - err = EXT4_ERR_EFSCORRUPTED; - break; - case ENOSPC: - err = EXT4_ERR_ENOSPC; - break; - case ENOKEY: - err = EXT4_ERR_ENOKEY; - break; - case EROFS: - err = EXT4_ERR_EROFS; - break; - case EFBIG: - err = EXT4_ERR_EFBIG; - break; - case EEXIST: - err = EXT4_ERR_EEXIST; - break; - case ERANGE: - err = EXT4_ERR_ERANGE; - break; - case EOVERFLOW: - err = EXT4_ERR_EOVERFLOW; - break; - case EBUSY: - err = EXT4_ERR_EBUSY; - break; - case ENOTDIR: - err = EXT4_ERR_ENOTDIR; - break; - case ENOTEMPTY: - err = EXT4_ERR_ENOTEMPTY; - break; - case ESHUTDOWN: - err = EXT4_ERR_ESHUTDOWN; - break; - case EFAULT: - err = EXT4_ERR_EFAULT; - break; - default: - err = EXT4_ERR_UNKNOWN; - } - es->s_last_error_errcode = err; - if (!es->s_first_error_time) { - es->s_first_error_time = es->s_last_error_time; - es->s_first_error_time_hi = es->s_last_error_time_hi; - strncpy(es->s_first_error_func, func, - sizeof(es->s_first_error_func)); - es->s_first_error_line = cpu_to_le32(line); - es->s_first_error_ino = es->s_last_error_ino; - es->s_first_error_block = es->s_last_error_block; - es->s_first_error_errcode = es->s_last_error_errcode; - } - /* - * Start the daily error reporting function if it hasn't been - * started already - */ - if (!es->s_error_count) - mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); - le32_add_cpu(&es->s_error_count, 1); -} - -static void save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - __save_error_info(sb, error, ino, block, func, line); - if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb, 1); -} - /* * The del_gendisk() function uninitializes the disk-specific data * structures, including the bdi structure, without telling anyone @@ -649,6 +551,104 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } +static void __save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + int err; + + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + if (bdev_read_only(sb->s_bdev)) + return; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + ext4_update_tstamp(es, s_last_error_time); + strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); + es->s_last_error_line = cpu_to_le32(line); + es->s_last_error_ino = cpu_to_le32(ino); + es->s_last_error_block = cpu_to_le64(block); + switch (error) { + case EIO: + err = EXT4_ERR_EIO; + break; + case ENOMEM: + err = EXT4_ERR_ENOMEM; + break; + case EFSBADCRC: + err = EXT4_ERR_EFSBADCRC; + break; + case 0: + case EFSCORRUPTED: + err = EXT4_ERR_EFSCORRUPTED; + break; + case ENOSPC: + err = EXT4_ERR_ENOSPC; + break; + case ENOKEY: + err = EXT4_ERR_ENOKEY; + break; + case EROFS: + err = EXT4_ERR_EROFS; + break; + case EFBIG: + err = EXT4_ERR_EFBIG; + break; + case EEXIST: + err = EXT4_ERR_EEXIST; + break; + case ERANGE: + err = EXT4_ERR_ERANGE; + break; + case EOVERFLOW: + err = EXT4_ERR_EOVERFLOW; + break; + case EBUSY: + err = EXT4_ERR_EBUSY; + break; + case ENOTDIR: + err = EXT4_ERR_ENOTDIR; + break; + case ENOTEMPTY: + err = EXT4_ERR_ENOTEMPTY; + break; + case ESHUTDOWN: + err = EXT4_ERR_ESHUTDOWN; + break; + case EFAULT: + err = EXT4_ERR_EFAULT; + break; + default: + err = EXT4_ERR_UNKNOWN; + } + es->s_last_error_errcode = err; + if (!es->s_first_error_time) { + es->s_first_error_time = es->s_last_error_time; + es->s_first_error_time_hi = es->s_last_error_time_hi; + strncpy(es->s_first_error_func, func, + sizeof(es->s_first_error_func)); + es->s_first_error_line = cpu_to_le32(line); + es->s_first_error_ino = es->s_last_error_ino; + es->s_first_error_block = es->s_last_error_block; + es->s_first_error_errcode = es->s_last_error_errcode; + } + /* + * Start the daily error reporting function if it hasn't been + * started already + */ + if (!es->s_error_count) + mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); + le32_add_cpu(&es->s_error_count, 1); +} + +static void save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) +{ + __save_error_info(sb, error, ino, block, func, line); + if (!bdev_read_only(sb->s_bdev)) + ext4_commit_super(sb, 1); +} + /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * -- GitLab From 02a7780e4d2fcf438ac6773bc469e7ada2af56be Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:33:59 +0100 Subject: [PATCH 0743/1894] ext4: simplify ext4 error translation We convert errno's to ext4 on-disk format error codes in save_error_info(). Add a function and a bit of macro magic to make this simpler. Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201127113405.26867-7-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 95 +++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 55 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 88d19f354dc37..0e6d9a4bd72fd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -551,76 +551,61 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } +struct ext4_err_translation { + int code; + int errno; +}; + +#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err } + +static struct ext4_err_translation err_translation[] = { + EXT4_ERR_TRANSLATE(EIO), + EXT4_ERR_TRANSLATE(ENOMEM), + EXT4_ERR_TRANSLATE(EFSBADCRC), + EXT4_ERR_TRANSLATE(EFSCORRUPTED), + EXT4_ERR_TRANSLATE(ENOSPC), + EXT4_ERR_TRANSLATE(ENOKEY), + EXT4_ERR_TRANSLATE(EROFS), + EXT4_ERR_TRANSLATE(EFBIG), + EXT4_ERR_TRANSLATE(EEXIST), + EXT4_ERR_TRANSLATE(ERANGE), + EXT4_ERR_TRANSLATE(EOVERFLOW), + EXT4_ERR_TRANSLATE(EBUSY), + EXT4_ERR_TRANSLATE(ENOTDIR), + EXT4_ERR_TRANSLATE(ENOTEMPTY), + EXT4_ERR_TRANSLATE(ESHUTDOWN), + EXT4_ERR_TRANSLATE(EFAULT), +}; + +static int ext4_errno_to_code(int errno) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(err_translation); i++) + if (err_translation[i].errno == errno) + return err_translation[i].code; + return EXT4_ERR_UNKNOWN; +} + static void __save_error_info(struct super_block *sb, int error, __u32 ino, __u64 block, const char *func, unsigned int line) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; - int err; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (bdev_read_only(sb->s_bdev)) return; + /* We default to EFSCORRUPTED error... */ + if (error == 0) + error = EFSCORRUPTED; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); ext4_update_tstamp(es, s_last_error_time); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); es->s_last_error_line = cpu_to_le32(line); es->s_last_error_ino = cpu_to_le32(ino); es->s_last_error_block = cpu_to_le64(block); - switch (error) { - case EIO: - err = EXT4_ERR_EIO; - break; - case ENOMEM: - err = EXT4_ERR_ENOMEM; - break; - case EFSBADCRC: - err = EXT4_ERR_EFSBADCRC; - break; - case 0: - case EFSCORRUPTED: - err = EXT4_ERR_EFSCORRUPTED; - break; - case ENOSPC: - err = EXT4_ERR_ENOSPC; - break; - case ENOKEY: - err = EXT4_ERR_ENOKEY; - break; - case EROFS: - err = EXT4_ERR_EROFS; - break; - case EFBIG: - err = EXT4_ERR_EFBIG; - break; - case EEXIST: - err = EXT4_ERR_EEXIST; - break; - case ERANGE: - err = EXT4_ERR_ERANGE; - break; - case EOVERFLOW: - err = EXT4_ERR_EOVERFLOW; - break; - case EBUSY: - err = EXT4_ERR_EBUSY; - break; - case ENOTDIR: - err = EXT4_ERR_ENOTDIR; - break; - case ENOTEMPTY: - err = EXT4_ERR_ENOTEMPTY; - break; - case ESHUTDOWN: - err = EXT4_ERR_ESHUTDOWN; - break; - case EFAULT: - err = EXT4_ERR_EFAULT; - break; - default: - err = EXT4_ERR_UNKNOWN; - } - es->s_last_error_errcode = err; + es->s_last_error_errcode = ext4_errno_to_code(error); if (!es->s_first_error_time) { es->s_first_error_time = es->s_last_error_time; es->s_first_error_time_hi = es->s_last_error_time_hi; -- GitLab From 443d6e86f821a165fae3fc3fc13086d27ac140b1 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 16 Dec 2020 21:38:02 -0700 Subject: [PATCH 0744/1894] netfilter: x_tables: Update remaining dereference to RCU This fixes the dereference to fetch the RCU pointer when holding the appropriate xtables lock. Reported-by: kernel test robot Fixes: cc00bcaa5899 ("netfilter: x_tables: Switch synchronization to RCU") Signed-off-by: Subash Abhinov Kasiviswanathan Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 563b62b76a5f1..c576a63d09db1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6e2851f8d3a3f..e8f6f9d862376 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c4f532f4d3118..0d453fa9e327b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) -- GitLab From 2b33d6ffa9e38f344418976b06057e2fc2aa9e2a Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 17 Dec 2020 11:53:40 +0300 Subject: [PATCH 0745/1894] netfilter: ipset: fixes possible oops in mtype_resize currently mtype_resize() can cause oops t = ip_set_alloc(htable_size(htable_bits)); if (!t) { ret = -ENOMEM; goto out; } t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); Increased htable_bits can force htable_size() to return 0. In own turn ip_set_alloc(0) returns not 0 but ZERO_SIZE_PTR, so follwoing access to t->hregion should trigger an OOPS. Signed-off-by: Vasily Averin Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 5f1208ad049ee..1e7dddf824aed 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -640,7 +640,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t dsize = set->dsize; + size_t hsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -664,14 +664,12 @@ mtype_resize(struct ip_set *set, bool retried) retry: ret = 0; htable_bits++; - if (!htable_bits) { - /* In case we have plenty of memory :-) */ - pr_warn("Cannot increase the hashsize of set %s further\n", - set->name); - ret = -IPSET_ERR_HASH_FULL; - goto out; - } - t = ip_set_alloc(htable_size(htable_bits)); + if (!htable_bits) + goto hbwarn; + hsize = htable_size(htable_bits); + if (!hsize) + goto hbwarn; + t = ip_set_alloc(hsize); if (!t) { ret = -ENOMEM; goto out; @@ -813,6 +811,12 @@ cleanup: if (ret == -EAGAIN) goto retry; goto out; + +hbwarn: + /* In case we have plenty of memory :-) */ + pr_warn("Cannot increase the hashsize of set %s further\n", set->name); + ret = -IPSET_ERR_HASH_FULL; + goto out; } /* Get the current number of elements and ext_size in the set */ -- GitLab From 5c8193f568ae16f3242abad6518dc2ca6c8eef86 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 17 Dec 2020 17:53:18 +0300 Subject: [PATCH 0746/1894] netfilter: ipset: fix shift-out-of-bounds in htable_bits() htable_bits() can call jhash_size(32) and trigger shift-out-of-bounds UBSAN: shift-out-of-bounds in net/netfilter/ipset/ip_set_hash_gen.h:151:6 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 8498 Comm: syz-executor519 Not tainted 5.10.0-rc7-next-20201208-syzkaller #0 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 htable_bits net/netfilter/ipset/ip_set_hash_gen.h:151 [inline] hash_mac_create.cold+0x58/0x9b net/netfilter/ipset/ip_set_hash_gen.h:1524 ip_set_create+0x610/0x1380 net/netfilter/ipset/ip_set_core.c:1115 nfnetlink_rcv_msg+0xecc/0x1180 net/netfilter/nfnetlink.c:252 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 nfnetlink_rcv+0x1ac/0x420 net/netfilter/nfnetlink.c:600 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x907/0xe40 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2345 ___sys_sendmsg+0xf3/0x170 net/socket.c:2399 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2432 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This patch replaces htable_bits() by simple fls(hashsize - 1) call: it alone returns valid nbits both for round and non-round hashsizes. It is normal to set any nbits here because it is validated inside following htable_size() call which returns 0 for nbits>31. Fixes: 1feab10d7e6d("netfilter: ipset: Unified hash type generation") Reported-by: syzbot+d66bfadebca46cf61a2b@syzkaller.appspotmail.com Signed-off-by: Vasily Averin Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1e7dddf824aed..6186358eac7c5 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -141,20 +141,6 @@ htable_size(u8 hbits) return hsize * sizeof(struct hbucket *) + sizeof(struct htable); } -/* Compute htable_bits from the user input parameter hashsize */ -static u8 -htable_bits(u32 hashsize) -{ - /* Assume that hashsize == 2^htable_bits */ - u8 bits = fls(hashsize - 1); - - if (jhash_size(bits) != hashsize) - /* Round up to the first 2^n value */ - bits = fls(hashsize); - - return bits; -} - #ifdef IP_SET_HASH_WITH_NETS #if IPSET_NET_COUNT > 1 #define __CIDR(cidr, i) (cidr[i]) @@ -1525,7 +1511,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, if (!h) return -ENOMEM; - hbits = htable_bits(hashsize); + /* Compute htable_bits from the user input parameter hashsize. + * Assume that hashsize == 2^htable_bits, + * otherwise round up to the first 2^n value. + */ + hbits = fls(hashsize - 1); hsize = htable_size(hbits); if (hsize == 0) { kfree(h); -- GitLab From fef98671194be005853cbbf51b164a3927589b64 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 17 Dec 2020 20:23:17 +0100 Subject: [PATCH 0747/1894] ACPI: PM: s2idle: Move x86-specific code to the x86 directory Some code in drivers/acpi/sleep.c (which is regarded as a generic file) related to suspend-to-idle support has grown direct dependencies on x86, but in fact it has been specific to x86 (which is the only user of it) anyway for a long time. For this reason, move that code to a separate file under acpi/x86/ and make it build and run as before under the right conditions. While at it, rename a vendor checking function in that code and consistently use acpi_handle_debug() for printing debug-related information in it. No expected functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Makefile | 1 + drivers/acpi/sleep.c | 453 +------------------------------------ drivers/acpi/sleep.h | 16 ++ drivers/acpi/x86/s2idle.c | 460 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 489 insertions(+), 441 deletions(-) create mode 100644 drivers/acpi/x86/s2idle.c diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 44e4125063178..076894a3330fd 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -54,6 +54,7 @@ acpi-y += property.o acpi-$(CONFIG_X86) += acpi_cmos_rtc.o acpi-$(CONFIG_X86) += x86/apple.o acpi-$(CONFIG_X86) += x86/utils.o +acpi-$(CONFIG_X86) += x86/s2idle.o acpi-$(CONFIG_DEBUG_FS) += debugfs.o acpi-y += acpi_lpat.o acpi-$(CONFIG_ACPI_LPIT) += acpi_lpit.o diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b929fd0d2e047..09fd13757b658 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -92,10 +92,6 @@ bool acpi_sleep_state_supported(u8 sleep_state) } #ifdef CONFIG_ACPI_SLEEP -static bool sleep_no_lps0 __read_mostly; -module_param(sleep_no_lps0, bool, 0644); -MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface"); - static u32 acpi_target_sleep_state = ACPI_STATE_S0; u32 acpi_target_system_state(void) @@ -165,7 +161,7 @@ static int __init init_nvs_nosave(const struct dmi_system_id *d) return 0; } -static bool acpi_sleep_default_s3; +bool acpi_sleep_default_s3; static int __init init_default_s3(const struct dmi_system_id *d) { @@ -688,409 +684,13 @@ static const struct platform_suspend_ops acpi_suspend_ops_old = { static bool s2idle_wakeup; -/* - * On platforms supporting the Low Power S0 Idle interface there is an ACPI - * device object with the PNP0D80 compatible device ID (System Power Management - * Controller) and a specific _DSM method under it. That method, if present, - * can be used to indicate to the platform that the OS is transitioning into a - * low-power state in which certain types of activity are not desirable or that - * it is leaving such a state, which allows the platform to adjust its operation - * mode accordingly. - */ -static const struct acpi_device_id lps0_device_ids[] = { - {"PNP0D80", }, - {"", }, -}; - -#define ACPI_LPS0_DSM_UUID "c4eb40a0-6cd2-11e2-bcfd-0800200c9a66" - -#define ACPI_LPS0_GET_DEVICE_CONSTRAINTS 1 -#define ACPI_LPS0_SCREEN_OFF 3 -#define ACPI_LPS0_SCREEN_ON 4 -#define ACPI_LPS0_ENTRY 5 -#define ACPI_LPS0_EXIT 6 - -/* AMD */ -#define ACPI_LPS0_DSM_UUID_AMD "e3f32452-febc-43ce-9039-932122d37721" -#define ACPI_LPS0_SCREEN_OFF_AMD 4 -#define ACPI_LPS0_SCREEN_ON_AMD 5 - -static acpi_handle lps0_device_handle; -static guid_t lps0_dsm_guid; -static char lps0_dsm_func_mask; - -/* Device constraint entry structure */ -struct lpi_device_info { - char *name; - int enabled; - union acpi_object *package; -}; - -/* Constraint package structure */ -struct lpi_device_constraint { - int uid; - int min_dstate; - int function_states; -}; - -struct lpi_constraints { - acpi_handle handle; - int min_dstate; -}; - -/* AMD */ -/* Device constraint entry structure */ -struct lpi_device_info_amd { - int revision; - int count; - union acpi_object *package; -}; - -/* Constraint package structure */ -struct lpi_device_constraint_amd { - char *name; - int enabled; - int function_states; - int min_dstate; -}; - -static struct lpi_constraints *lpi_constraints_table; -static int lpi_constraints_table_size; -static int rev_id; - -static void lpi_device_get_constraints_amd(void) -{ - union acpi_object *out_obj; - int i, j, k; - - out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid, - 1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS, - NULL, ACPI_TYPE_PACKAGE); - - if (!out_obj) - return; - - acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n", - out_obj ? "successful" : "failed"); - - for (i = 0; i < out_obj->package.count; i++) { - union acpi_object *package = &out_obj->package.elements[i]; - struct lpi_device_info_amd info = { }; - - if (package->type == ACPI_TYPE_INTEGER) { - switch (i) { - case 0: - info.revision = package->integer.value; - break; - case 1: - info.count = package->integer.value; - break; - } - } else if (package->type == ACPI_TYPE_PACKAGE) { - lpi_constraints_table = kcalloc(package->package.count, - sizeof(*lpi_constraints_table), - GFP_KERNEL); - - if (!lpi_constraints_table) - goto free_acpi_buffer; - - acpi_handle_info(lps0_device_handle, - "LPI: constraints list begin:\n"); - - for (j = 0; j < package->package.count; ++j) { - union acpi_object *info_obj = &package->package.elements[j]; - struct lpi_device_constraint_amd dev_info = {}; - struct lpi_constraints *list; - acpi_status status; - - for (k = 0; k < info_obj->package.count; ++k) { - union acpi_object *obj = &info_obj->package.elements[k]; - union acpi_object *obj_new; - - list = &lpi_constraints_table[lpi_constraints_table_size]; - list->min_dstate = -1; - - obj_new = &obj[k]; - switch (k) { - case 0: - dev_info.enabled = obj->integer.value; - break; - case 1: - dev_info.name = obj->string.pointer; - break; - case 2: - dev_info.function_states = obj->integer.value; - break; - case 3: - dev_info.min_dstate = obj->integer.value; - break; - } - - if (!dev_info.enabled || !dev_info.name || - !dev_info.min_dstate) - continue; - - status = acpi_get_handle(NULL, dev_info.name, - &list->handle); - if (ACPI_FAILURE(status)) - continue; - - acpi_handle_info(lps0_device_handle, - "Name:%s\n", dev_info.name); - - list->min_dstate = dev_info.min_dstate; - - if (list->min_dstate < 0) { - acpi_handle_info(lps0_device_handle, - "Incomplete constraint defined\n"); - continue; - } - } - lpi_constraints_table_size++; - } - } - } - - acpi_handle_info(lps0_device_handle, "LPI: constraints list end\n"); - -free_acpi_buffer: - ACPI_FREE(out_obj); -} - -static void lpi_device_get_constraints(void) -{ - union acpi_object *out_obj; - int i; - - out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid, - 1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS, - NULL, ACPI_TYPE_PACKAGE); - - acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n", - out_obj ? "successful" : "failed"); - - if (!out_obj) - return; - - lpi_constraints_table = kcalloc(out_obj->package.count, - sizeof(*lpi_constraints_table), - GFP_KERNEL); - if (!lpi_constraints_table) - goto free_acpi_buffer; - - acpi_handle_debug(lps0_device_handle, "LPI: constraints list begin:\n"); - - for (i = 0; i < out_obj->package.count; i++) { - struct lpi_constraints *constraint; - acpi_status status; - union acpi_object *package = &out_obj->package.elements[i]; - struct lpi_device_info info = { }; - int package_count = 0, j; - - if (!package) - continue; - - for (j = 0; j < package->package.count; ++j) { - union acpi_object *element = - &(package->package.elements[j]); - - switch (element->type) { - case ACPI_TYPE_INTEGER: - info.enabled = element->integer.value; - break; - case ACPI_TYPE_STRING: - info.name = element->string.pointer; - break; - case ACPI_TYPE_PACKAGE: - package_count = element->package.count; - info.package = element->package.elements; - break; - } - } - - if (!info.enabled || !info.package || !info.name) - continue; - - constraint = &lpi_constraints_table[lpi_constraints_table_size]; - - status = acpi_get_handle(NULL, info.name, &constraint->handle); - if (ACPI_FAILURE(status)) - continue; - - acpi_handle_debug(lps0_device_handle, - "index:%d Name:%s\n", i, info.name); - - constraint->min_dstate = -1; - - for (j = 0; j < package_count; ++j) { - union acpi_object *info_obj = &info.package[j]; - union acpi_object *cnstr_pkg; - union acpi_object *obj; - struct lpi_device_constraint dev_info; - - switch (info_obj->type) { - case ACPI_TYPE_INTEGER: - /* version */ - break; - case ACPI_TYPE_PACKAGE: - if (info_obj->package.count < 2) - break; - - cnstr_pkg = info_obj->package.elements; - obj = &cnstr_pkg[0]; - dev_info.uid = obj->integer.value; - obj = &cnstr_pkg[1]; - dev_info.min_dstate = obj->integer.value; - - acpi_handle_debug(lps0_device_handle, - "uid:%d min_dstate:%s\n", - dev_info.uid, - acpi_power_state_string(dev_info.min_dstate)); - - constraint->min_dstate = dev_info.min_dstate; - break; - } - } - - if (constraint->min_dstate < 0) { - acpi_handle_debug(lps0_device_handle, - "Incomplete constraint defined\n"); - continue; - } - - lpi_constraints_table_size++; - } - - acpi_handle_debug(lps0_device_handle, "LPI: constraints list end\n"); - -free_acpi_buffer: - ACPI_FREE(out_obj); -} - -static void lpi_check_constraints(void) -{ - int i; - - for (i = 0; i < lpi_constraints_table_size; ++i) { - acpi_handle handle = lpi_constraints_table[i].handle; - struct acpi_device *adev; - - if (!handle || acpi_bus_get_device(handle, &adev)) - continue; - - acpi_handle_debug(handle, - "LPI: required min power state:%s current power state:%s\n", - acpi_power_state_string(lpi_constraints_table[i].min_dstate), - acpi_power_state_string(adev->power.state)); - - if (!adev->flags.power_manageable) { - acpi_handle_info(handle, "LPI: Device not power manageable\n"); - lpi_constraints_table[i].handle = NULL; - continue; - } - - if (adev->power.state < lpi_constraints_table[i].min_dstate) - acpi_handle_info(handle, - "LPI: Constraint not met; min power state:%s current power state:%s\n", - acpi_power_state_string(lpi_constraints_table[i].min_dstate), - acpi_power_state_string(adev->power.state)); - } -} - -static void acpi_sleep_run_lps0_dsm(unsigned int func) -{ - union acpi_object *out_obj; - - if (!(lps0_dsm_func_mask & (1 << func))) - return; - - out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, rev_id, func, NULL); - ACPI_FREE(out_obj); - - acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n", - func, out_obj ? "successful" : "failed"); -} - -static bool acpi_match_vendor_name(void) -{ -#ifdef CONFIG_X86 - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return true; -#endif - return false; -} - -static int lps0_device_attach(struct acpi_device *adev, - const struct acpi_device_id *not_used) -{ - union acpi_object *out_obj; - - if (lps0_device_handle) - return 0; - - if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) - return 0; - - if (acpi_match_vendor_name()) { - guid_parse(ACPI_LPS0_DSM_UUID_AMD, &lps0_dsm_guid); - out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 0, 0, NULL); - rev_id = 0; - } else { - guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid); - out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL); - rev_id = 1; - } - - /* Check if the _DSM is present and as expected. */ - if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER) { - acpi_handle_debug(adev->handle, - "_DSM function 0 evaluation failed\n"); - return 0; - } - - lps0_dsm_func_mask = *(char *)out_obj->buffer.pointer; - - ACPI_FREE(out_obj); - - acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n", - lps0_dsm_func_mask); - - lps0_device_handle = adev->handle; - - if (acpi_match_vendor_name()) - lpi_device_get_constraints_amd(); - else - lpi_device_get_constraints(); - - /* - * Use suspend-to-idle by default if the default suspend mode was not - * set from the command line. - */ - if (mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3) - mem_sleep_current = PM_SUSPEND_TO_IDLE; - - /* - * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the - * EC GPE to be enabled while suspended for certain wakeup devices to - * work, so mark it as wakeup-capable. - */ - acpi_ec_mark_gpe_for_wake(); - - return 0; -} - -static struct acpi_scan_handler lps0_handler = { - .ids = lps0_device_ids, - .attach = lps0_device_attach, -}; - -static int acpi_s2idle_begin(void) +int acpi_s2idle_begin(void) { acpi_scan_lock_acquire(); return 0; } -static int acpi_s2idle_prepare(void) +int acpi_s2idle_prepare(void) { if (acpi_sci_irq_valid()) { enable_irq_wake(acpi_sci_irq); @@ -1107,24 +707,7 @@ static int acpi_s2idle_prepare(void) return 0; } -static int acpi_s2idle_prepare_late(void) -{ - if (!lps0_device_handle || sleep_no_lps0) - return 0; - - if (pm_debug_messages_on) - lpi_check_constraints(); - - if (acpi_match_vendor_name()) { - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD); - } else { - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); - } - return 0; -} - -static bool acpi_s2idle_wake(void) +bool acpi_s2idle_wake(void) { if (!acpi_sci_irq_valid()) return pm_wakeup_pending(); @@ -1190,20 +773,7 @@ static bool acpi_s2idle_wake(void) return false; } -static void acpi_s2idle_restore_early(void) -{ - if (!lps0_device_handle || sleep_no_lps0) - return; - - if (acpi_match_vendor_name()) { - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD); - } else { - acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); - } -} - -static void acpi_s2idle_restore(void) +void acpi_s2idle_restore(void) { /* * Drain pending events before restoring the working-state configuration @@ -1225,7 +795,7 @@ static void acpi_s2idle_restore(void) } } -static void acpi_s2idle_end(void) +void acpi_s2idle_end(void) { acpi_scan_lock_release(); } @@ -1233,13 +803,16 @@ static void acpi_s2idle_end(void) static const struct platform_s2idle_ops acpi_s2idle_ops = { .begin = acpi_s2idle_begin, .prepare = acpi_s2idle_prepare, - .prepare_late = acpi_s2idle_prepare_late, .wake = acpi_s2idle_wake, - .restore_early = acpi_s2idle_restore_early, .restore = acpi_s2idle_restore, .end = acpi_s2idle_end, }; +void __weak acpi_s2idle_setup(void) +{ + s2idle_set_ops(&acpi_s2idle_ops); +} + static void acpi_sleep_suspend_setup(void) { int i; @@ -1251,13 +824,11 @@ static void acpi_sleep_suspend_setup(void) suspend_set_ops(old_suspend_ordering ? &acpi_suspend_ops_old : &acpi_suspend_ops); - acpi_scan_add_handler(&lps0_handler); - s2idle_set_ops(&acpi_s2idle_ops); + acpi_s2idle_setup(); } #else /* !CONFIG_SUSPEND */ #define s2idle_wakeup (false) -#define lps0_device_handle (NULL) static inline void acpi_sleep_suspend_setup(void) {} #endif /* !CONFIG_SUSPEND */ diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h index 3d90480ce1b1e..1856f76ac83f7 100644 --- a/drivers/acpi/sleep.h +++ b/drivers/acpi/sleep.h @@ -15,3 +15,19 @@ static inline acpi_status acpi_set_waking_vector(u32 wakeup_address) return acpi_set_firmware_waking_vector( (acpi_physical_address)wakeup_address, 0); } + +extern int acpi_s2idle_begin(void); +extern int acpi_s2idle_prepare(void); +extern int acpi_s2idle_prepare_late(void); +extern bool acpi_s2idle_wake(void); +extern void acpi_s2idle_restore_early(void); +extern void acpi_s2idle_restore(void); +extern void acpi_s2idle_end(void); + +extern void acpi_s2idle_setup(void); + +#ifdef CONFIG_ACPI_SLEEP +extern bool acpi_sleep_default_s3; +#else +#define acpi_sleep_default_s3 (1) +#endif diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c new file mode 100644 index 0000000000000..25fea34b544c6 --- /dev/null +++ b/drivers/acpi/x86/s2idle.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Architecture-specific ACPI-based support for suspend-to-idle. + * + * Author: Rafael J. Wysocki + * Author: Srinivas Pandruvada + * Author: Shyam Sundar S K + * + * On platforms supporting the Low Power S0 Idle interface there is an ACPI + * device object with the PNP0D80 compatible device ID (System Power Management + * Controller) and a specific _DSM method under it. That method, if present, + * can be used to indicate to the platform that the OS is transitioning into a + * low-power state in which certain types of activity are not desirable or that + * it is leaving such a state, which allows the platform to adjust its operation + * mode accordingly. + */ + +#include +#include +#include + +#include "../sleep.h" + +#ifdef CONFIG_SUSPEND + +static bool sleep_no_lps0 __read_mostly; +module_param(sleep_no_lps0, bool, 0644); +MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface"); + +static const struct acpi_device_id lps0_device_ids[] = { + {"PNP0D80", }, + {"", }, +}; + +#define ACPI_LPS0_DSM_UUID "c4eb40a0-6cd2-11e2-bcfd-0800200c9a66" + +#define ACPI_LPS0_GET_DEVICE_CONSTRAINTS 1 +#define ACPI_LPS0_SCREEN_OFF 3 +#define ACPI_LPS0_SCREEN_ON 4 +#define ACPI_LPS0_ENTRY 5 +#define ACPI_LPS0_EXIT 6 + +/* AMD */ +#define ACPI_LPS0_DSM_UUID_AMD "e3f32452-febc-43ce-9039-932122d37721" +#define ACPI_LPS0_SCREEN_OFF_AMD 4 +#define ACPI_LPS0_SCREEN_ON_AMD 5 + +static acpi_handle lps0_device_handle; +static guid_t lps0_dsm_guid; +static char lps0_dsm_func_mask; + +/* Device constraint entry structure */ +struct lpi_device_info { + char *name; + int enabled; + union acpi_object *package; +}; + +/* Constraint package structure */ +struct lpi_device_constraint { + int uid; + int min_dstate; + int function_states; +}; + +struct lpi_constraints { + acpi_handle handle; + int min_dstate; +}; + +/* AMD */ +/* Device constraint entry structure */ +struct lpi_device_info_amd { + int revision; + int count; + union acpi_object *package; +}; + +/* Constraint package structure */ +struct lpi_device_constraint_amd { + char *name; + int enabled; + int function_states; + int min_dstate; +}; + +static struct lpi_constraints *lpi_constraints_table; +static int lpi_constraints_table_size; +static int rev_id; + +static void lpi_device_get_constraints_amd(void) +{ + union acpi_object *out_obj; + int i, j, k; + + out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid, + 1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS, + NULL, ACPI_TYPE_PACKAGE); + + if (!out_obj) + return; + + acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n", + out_obj ? "successful" : "failed"); + + for (i = 0; i < out_obj->package.count; i++) { + union acpi_object *package = &out_obj->package.elements[i]; + struct lpi_device_info_amd info = { }; + + if (package->type == ACPI_TYPE_INTEGER) { + switch (i) { + case 0: + info.revision = package->integer.value; + break; + case 1: + info.count = package->integer.value; + break; + } + } else if (package->type == ACPI_TYPE_PACKAGE) { + lpi_constraints_table = kcalloc(package->package.count, + sizeof(*lpi_constraints_table), + GFP_KERNEL); + + if (!lpi_constraints_table) + goto free_acpi_buffer; + + acpi_handle_debug(lps0_device_handle, + "LPI: constraints list begin:\n"); + + for (j = 0; j < package->package.count; ++j) { + union acpi_object *info_obj = &package->package.elements[j]; + struct lpi_device_constraint_amd dev_info = {}; + struct lpi_constraints *list; + acpi_status status; + + for (k = 0; k < info_obj->package.count; ++k) { + union acpi_object *obj = &info_obj->package.elements[k]; + union acpi_object *obj_new; + + list = &lpi_constraints_table[lpi_constraints_table_size]; + list->min_dstate = -1; + + obj_new = &obj[k]; + switch (k) { + case 0: + dev_info.enabled = obj->integer.value; + break; + case 1: + dev_info.name = obj->string.pointer; + break; + case 2: + dev_info.function_states = obj->integer.value; + break; + case 3: + dev_info.min_dstate = obj->integer.value; + break; + } + + if (!dev_info.enabled || !dev_info.name || + !dev_info.min_dstate) + continue; + + status = acpi_get_handle(NULL, dev_info.name, + &list->handle); + if (ACPI_FAILURE(status)) + continue; + + acpi_handle_debug(lps0_device_handle, + "Name:%s\n", dev_info.name); + + list->min_dstate = dev_info.min_dstate; + + if (list->min_dstate < 0) { + acpi_handle_debug(lps0_device_handle, + "Incomplete constraint defined\n"); + continue; + } + } + lpi_constraints_table_size++; + } + } + } + + acpi_handle_debug(lps0_device_handle, "LPI: constraints list end\n"); + +free_acpi_buffer: + ACPI_FREE(out_obj); +} + +static void lpi_device_get_constraints(void) +{ + union acpi_object *out_obj; + int i; + + out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid, + 1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS, + NULL, ACPI_TYPE_PACKAGE); + + acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n", + out_obj ? "successful" : "failed"); + + if (!out_obj) + return; + + lpi_constraints_table = kcalloc(out_obj->package.count, + sizeof(*lpi_constraints_table), + GFP_KERNEL); + if (!lpi_constraints_table) + goto free_acpi_buffer; + + acpi_handle_debug(lps0_device_handle, "LPI: constraints list begin:\n"); + + for (i = 0; i < out_obj->package.count; i++) { + struct lpi_constraints *constraint; + acpi_status status; + union acpi_object *package = &out_obj->package.elements[i]; + struct lpi_device_info info = { }; + int package_count = 0, j; + + if (!package) + continue; + + for (j = 0; j < package->package.count; ++j) { + union acpi_object *element = + &(package->package.elements[j]); + + switch (element->type) { + case ACPI_TYPE_INTEGER: + info.enabled = element->integer.value; + break; + case ACPI_TYPE_STRING: + info.name = element->string.pointer; + break; + case ACPI_TYPE_PACKAGE: + package_count = element->package.count; + info.package = element->package.elements; + break; + } + } + + if (!info.enabled || !info.package || !info.name) + continue; + + constraint = &lpi_constraints_table[lpi_constraints_table_size]; + + status = acpi_get_handle(NULL, info.name, &constraint->handle); + if (ACPI_FAILURE(status)) + continue; + + acpi_handle_debug(lps0_device_handle, + "index:%d Name:%s\n", i, info.name); + + constraint->min_dstate = -1; + + for (j = 0; j < package_count; ++j) { + union acpi_object *info_obj = &info.package[j]; + union acpi_object *cnstr_pkg; + union acpi_object *obj; + struct lpi_device_constraint dev_info; + + switch (info_obj->type) { + case ACPI_TYPE_INTEGER: + /* version */ + break; + case ACPI_TYPE_PACKAGE: + if (info_obj->package.count < 2) + break; + + cnstr_pkg = info_obj->package.elements; + obj = &cnstr_pkg[0]; + dev_info.uid = obj->integer.value; + obj = &cnstr_pkg[1]; + dev_info.min_dstate = obj->integer.value; + + acpi_handle_debug(lps0_device_handle, + "uid:%d min_dstate:%s\n", + dev_info.uid, + acpi_power_state_string(dev_info.min_dstate)); + + constraint->min_dstate = dev_info.min_dstate; + break; + } + } + + if (constraint->min_dstate < 0) { + acpi_handle_debug(lps0_device_handle, + "Incomplete constraint defined\n"); + continue; + } + + lpi_constraints_table_size++; + } + + acpi_handle_debug(lps0_device_handle, "LPI: constraints list end\n"); + +free_acpi_buffer: + ACPI_FREE(out_obj); +} + +static void lpi_check_constraints(void) +{ + int i; + + for (i = 0; i < lpi_constraints_table_size; ++i) { + acpi_handle handle = lpi_constraints_table[i].handle; + struct acpi_device *adev; + + if (!handle || acpi_bus_get_device(handle, &adev)) + continue; + + acpi_handle_debug(handle, + "LPI: required min power state:%s current power state:%s\n", + acpi_power_state_string(lpi_constraints_table[i].min_dstate), + acpi_power_state_string(adev->power.state)); + + if (!adev->flags.power_manageable) { + acpi_handle_info(handle, "LPI: Device not power manageable\n"); + lpi_constraints_table[i].handle = NULL; + continue; + } + + if (adev->power.state < lpi_constraints_table[i].min_dstate) + acpi_handle_info(handle, + "LPI: Constraint not met; min power state:%s current power state:%s\n", + acpi_power_state_string(lpi_constraints_table[i].min_dstate), + acpi_power_state_string(adev->power.state)); + } +} + +static void acpi_sleep_run_lps0_dsm(unsigned int func) +{ + union acpi_object *out_obj; + + if (!(lps0_dsm_func_mask & (1 << func))) + return; + + out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, rev_id, func, NULL); + ACPI_FREE(out_obj); + + acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n", + func, out_obj ? "successful" : "failed"); +} + +static bool acpi_s2idle_vendor_amd(void) +{ + return boot_cpu_data.x86_vendor == X86_VENDOR_AMD; +} + +static int lps0_device_attach(struct acpi_device *adev, + const struct acpi_device_id *not_used) +{ + union acpi_object *out_obj; + + if (lps0_device_handle) + return 0; + + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) + return 0; + + if (acpi_s2idle_vendor_amd()) { + guid_parse(ACPI_LPS0_DSM_UUID_AMD, &lps0_dsm_guid); + out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 0, 0, NULL); + rev_id = 0; + } else { + guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid); + out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL); + rev_id = 1; + } + + /* Check if the _DSM is present and as expected. */ + if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER) { + acpi_handle_debug(adev->handle, + "_DSM function 0 evaluation failed\n"); + return 0; + } + + lps0_dsm_func_mask = *(char *)out_obj->buffer.pointer; + + ACPI_FREE(out_obj); + + acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n", + lps0_dsm_func_mask); + + lps0_device_handle = adev->handle; + + if (acpi_s2idle_vendor_amd()) + lpi_device_get_constraints_amd(); + else + lpi_device_get_constraints(); + + /* + * Use suspend-to-idle by default if the default suspend mode was not + * set from the command line. + */ + if (mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3) + mem_sleep_current = PM_SUSPEND_TO_IDLE; + + /* + * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the + * EC GPE to be enabled while suspended for certain wakeup devices to + * work, so mark it as wakeup-capable. + */ + acpi_ec_mark_gpe_for_wake(); + + return 0; +} + +static struct acpi_scan_handler lps0_handler = { + .ids = lps0_device_ids, + .attach = lps0_device_attach, +}; + +int acpi_s2idle_prepare_late(void) +{ + if (!lps0_device_handle || sleep_no_lps0) + return 0; + + if (pm_debug_messages_on) + lpi_check_constraints(); + + if (acpi_s2idle_vendor_amd()) { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD); + } else { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); + } + + return 0; +} + +void acpi_s2idle_restore_early(void) +{ + if (!lps0_device_handle || sleep_no_lps0) + return; + + if (acpi_s2idle_vendor_amd()) { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD); + } else { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); + } +} + +static const struct platform_s2idle_ops acpi_s2idle_ops_lps0 = { + .begin = acpi_s2idle_begin, + .prepare = acpi_s2idle_prepare, + .prepare_late = acpi_s2idle_prepare_late, + .wake = acpi_s2idle_wake, + .restore_early = acpi_s2idle_restore_early, + .restore = acpi_s2idle_restore, + .end = acpi_s2idle_end, +}; + +void acpi_s2idle_setup(void) +{ + acpi_scan_add_handler(&lps0_handler); + s2idle_set_ops(&acpi_s2idle_ops_lps0); +} + +#endif /* CONFIG_SUSPEND */ -- GitLab From 658a337a606f48b7ebe451591f7681d383fa115e Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 17 Dec 2020 16:59:04 +0100 Subject: [PATCH 0748/1894] s390/dasd: fix hanging device offline processing For an LCU update a read unit address configuration IO is required. This is started using sleep_on(), which has early exit paths in case the device is not usable for IO. For example when it is in offline processing. In those cases the LCU update should fail and not be retried. Therefore lcu_update_work checks if EOPNOTSUPP is returned or not. Commit 41995342b40c ("s390/dasd: fix endless loop after read unit address configuration") accidentally removed the EOPNOTSUPP return code from read_unit_address_configuration(), which in turn might lead to an endless loop of the LCU update in offline processing. Fix by returning EOPNOTSUPP again if the device is not able to perform the request. Fixes: 41995342b40c ("s390/dasd: fix endless loop after read unit address configuration") Cc: stable@vger.kernel.org #5.3 Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_alias.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 99f86612f7751..31e8b5d48e868 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -462,11 +462,19 @@ static int read_unit_address_configuration(struct dasd_device *device, spin_unlock_irqrestore(&lcu->lock, flags); rc = dasd_sleep_on(cqr); - if (rc && !suborder_not_supported(cqr)) { + if (!rc) + goto out; + + if (suborder_not_supported(cqr)) { + /* suborder not supported or device unusable for IO */ + rc = -EOPNOTSUPP; + } else { + /* IO failed but should be retried */ spin_lock_irqsave(&lcu->lock, flags); lcu->flags |= NEED_UAC_UPDATE; spin_unlock_irqrestore(&lcu->lock, flags); } +out: dasd_sfree_request(cqr, cqr->memdev); return rc; } -- GitLab From a29ea01653493b94ea12bb2b89d1564a265081b6 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 17 Dec 2020 16:59:05 +0100 Subject: [PATCH 0749/1894] s390/dasd: prevent inconsistent LCU device data Prevent _lcu_update from adding a device to a pavgroup if the LCU still requires an update. The data is not reliable any longer and in parallel devices might have been moved on the lists already. This might lead to list corruptions or invalid PAV grouping. Only add devices to a pavgroup if the LCU is up to date. Additional steps are taken by the scheduled lcu update. Fixes: 8e09f21574ea ("[S390] dasd: add hyper PAV support to DASD device driver, part 1") Cc: stable@vger.kernel.org Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_alias.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 31e8b5d48e868..f841518de6c55 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -511,6 +511,14 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) return rc; spin_lock_irqsave(&lcu->lock, flags); + /* + * there is another update needed skip the remaining handling + * the data might already be outdated + * but especially do not add the device to an LCU with pending + * update + */ + if (lcu->flags & NEED_UAC_UPDATE) + goto out; lcu->pav = NO_PAV; for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) { switch (lcu->uac->unit[i].ua_type) { @@ -529,6 +537,7 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) alias_list) { _add_device_to_lcu(lcu, device, refdev); } +out: spin_unlock_irqrestore(&lcu->lock, flags); return 0; } -- GitLab From 0ede91f83aa335da1c3ec68eb0f9e228f269f6d8 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 17 Dec 2020 16:59:06 +0100 Subject: [PATCH 0750/1894] s390/dasd: fix list corruption of pavgroup group list dasd_alias_add_device() moves devices to the active_devices list in case of a scheduled LCU update regardless if they have previously been in a pavgroup or not. Example: device A and B are in the same pavgroup. Device A has already been in a pavgroup and the private->pavgroup pointer is set and points to a valid pavgroup. While going through dasd_add_device it is moved from the pavgroup to the active_devices list. In parallel device B might be removed from the same pavgroup in remove_device_from_lcu() which in turn checks if the group is empty and deletes it accordingly because device A has already been removed from there. When now device A enters remove_device_from_lcu() it is tried to remove it from the pavgroup again because the pavgroup pointer is still set and again the empty group will be cleaned up which leads to a list corruption. Fix by setting private->pavgroup to NULL in dasd_add_device. If the device has been the last device on the pavgroup an empty pavgroup remains but this will be cleaned up by the scheduled lcu_update which iterates over all existing pavgroups. Fixes: 8e09f21574ea ("[S390] dasd: add hyper PAV support to DASD device driver, part 1") Cc: stable@vger.kernel.org Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_alias.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index f841518de6c55..60344e13e87b4 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -642,6 +642,7 @@ int dasd_alias_add_device(struct dasd_device *device) } if (lcu->flags & UPDATE_PENDING) { list_move(&device->alias_list, &lcu->active_devices); + private->pavgroup = NULL; _schedule_lcu_update(lcu, device); } spin_unlock_irqrestore(&lcu->lock, flags); -- GitLab From 53a7f655834c7c335bf683f248208d4fbe4b47bc Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 17 Dec 2020 16:59:07 +0100 Subject: [PATCH 0751/1894] s390/dasd: fix list corruption of lcu list In dasd_alias_disconnect_device_from_lcu the device is removed from any list on the LCU. Afterwards the LCU is removed from the lcu list if it does not contain devices any longer. The lcu->lock protects the lcu from parallel updates. But to cancel all workers and wait for completion the lcu->lock has to be unlocked. If two devices are removed in parallel and both are removed from the LCU the first device that takes the lcu->lock again will delete the LCU because it is already empty but the second device also tries to free the LCU which leads to a list corruption of the lcu list. Fix by removing the device right before the lcu is checked without unlocking the lcu->lock in between. Fixes: 8e09f21574ea ("[S390] dasd: add hyper PAV support to DASD device driver, part 1") Cc: stable@vger.kernel.org Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 60344e13e87b4..dc78a523a69f2 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -256,7 +256,6 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) return; device->discipline->get_uid(device, &uid); spin_lock_irqsave(&lcu->lock, flags); - list_del_init(&device->alias_list); /* make sure that the workers don't use this device */ if (device == lcu->suc_data.device) { spin_unlock_irqrestore(&lcu->lock, flags); @@ -283,6 +282,7 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) spin_lock_irqsave(&aliastree.lock, flags); spin_lock(&lcu->lock); + list_del_init(&device->alias_list); if (list_empty(&lcu->grouplist) && list_empty(&lcu->active_devices) && list_empty(&lcu->inactive_devices)) { -- GitLab From 71425189b2b75336d869cfdedea45c9d319fc9c9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Dec 2020 20:13:54 +0100 Subject: [PATCH 0752/1894] blk-mq: Don't complete on a remote CPU in force threaded mode With force threaded interrupts enabled, raising softirq from an SMP function call will always result in waking the ksoftirqd thread. This is not optimal given that the thread runs at SCHED_OTHER priority. Completing the request in hard IRQ-context on PREEMPT_RT (which enforces the force threaded mode) is bad because the completion handler may acquire sleeping locks which violate the locking context. Disable request completing on a remote CPU in force threaded mode. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Jens Axboe --- block/blk-mq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index a428798e1c5c6..c338c9bc5a2c5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -650,6 +650,14 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) if (!IS_ENABLED(CONFIG_SMP) || !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) return false; + /* + * With force threaded interrupts enabled, raising softirq from an SMP + * function call will always result in waking the ksoftirqd thread. + * This is probably worse than completing the request on a different + * cache domain. + */ + if (force_irqthreads) + return false; /* same CPU or cache domain? Complete locally */ if (cpu == rq->mq_ctx->cpu || -- GitLab From 3ac874fa84d1baaf0c0175f2a1499f5d88d528b2 Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Thu, 22 Oct 2020 12:39:36 +0200 Subject: [PATCH 0753/1894] i40e: Fix Error I40E_AQ_RC_EINVAL when removing VFs When removing VFs for PF added to bridge there was an error I40E_AQ_RC_EINVAL. It was caused by not properly resetting and reinitializing PF when adding/removing VFs. Changed how reset is performed when adding/removing VFs to properly reinitialize PFs VSI. Fixes: fc60861e9b00 ("i40e: start up in VEPA mode by default") Signed-off-by: Sylwester Dziedziuch Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 3 +++ drivers/net/ethernet/intel/i40e/i40e_main.c | 10 ++++++++++ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d231a2cdd98ff..118473dfdcbd2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -120,6 +120,7 @@ enum i40e_state_t { __I40E_RESET_INTR_RECEIVED, __I40E_REINIT_REQUESTED, __I40E_PF_RESET_REQUESTED, + __I40E_PF_RESET_AND_REBUILD_REQUESTED, __I40E_CORE_RESET_REQUESTED, __I40E_GLOBAL_RESET_REQUESTED, __I40E_EMP_RESET_INTR_RECEIVED, @@ -146,6 +147,8 @@ enum i40e_state_t { }; #define I40E_PF_RESET_FLAG BIT_ULL(__I40E_PF_RESET_REQUESTED) +#define I40E_PF_RESET_AND_REBUILD_FLAG \ + BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) /* VSI state flags */ enum i40e_vsi_state_t { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1337686bd0998..1db482d310c2d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -36,6 +36,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired); +static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, + bool lock_acquired); static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf); @@ -8536,6 +8538,14 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) "FW LLDP is disabled\n" : "FW LLDP is enabled\n"); + } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { + /* Request a PF Reset + * + * Resets PF and reinitializes PFs VSI. + */ + i40e_prep_for_reset(pf, lock_acquired); + i40e_reset_and_rebuild(pf, true, lock_acquired); + } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { int v; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 729c4f0d5ac52..21ee56420c3ae 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1772,7 +1772,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) { if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); + i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } ret = i40e_pci_sriov_enable(pdev, num_vfs); goto sriov_configure_out; @@ -1781,7 +1781,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pf->pdev)) { i40e_free_vfs(pf); pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); + i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); ret = -EINVAL; -- GitLab From 0b9491b621196a5d7f163dde81d98e0687bdba97 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:27:33 +0800 Subject: [PATCH 0754/1894] watchdog: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Zheng Yongjun Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201216132733.15635-1-zhengyongjun3@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mpc8xxx_wdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index 3fc457bc16db3..2f7ded32e8780 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -175,8 +175,8 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) spin_lock_init(&ddata->lock); - ddata->wdd.info = &mpc8xxx_wdt_info, - ddata->wdd.ops = &mpc8xxx_wdt_ops, + ddata->wdd.info = &mpc8xxx_wdt_info; + ddata->wdd.ops = &mpc8xxx_wdt_ops; ddata->wdd.timeout = WATCHDOG_TIMEOUT; watchdog_init_timeout(&ddata->wdd, timeout, dev); -- GitLab From 6cc980e3f52e2e8db6d0d3bad076b495bd492658 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Wed, 16 Dec 2020 17:04:23 -0500 Subject: [PATCH 0755/1894] drm/amdkfd: PCIe atomics required for gfx10 GFX10 CP firmware expects PCIe atomics support. Don't enumerate GFX10 devices on platforms (PCIe v2) that don't support PCIe atomics. Currently, some of the applications like clinfo soft hangs on platforms without PCIe atomics support. Signed-off-by: Harish Kasiviswanathan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 50922ff2927b4..72c893fff61ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -422,7 +422,7 @@ static const struct kfd_device_info navi10_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -440,7 +440,7 @@ static const struct kfd_device_info navi12_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -458,7 +458,7 @@ static const struct kfd_device_info navi14_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -476,7 +476,7 @@ static const struct kfd_device_info sienna_cichlid_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 4, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -494,7 +494,7 @@ static const struct kfd_device_info navy_flounder_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -530,7 +530,7 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, -- GitLab From 9e3a6ab74ff80128c337d5f95ce1867a452dc67e Mon Sep 17 00:00:00 2001 From: Xiaomeng Hou Date: Thu, 17 Dec 2020 10:41:10 +0800 Subject: [PATCH 0756/1894] drm/amd/pm: check pmfw version before issuing RlcPowerNotify message Only pmfw version behind v4.63.23.00 could support this message. Signed-off-by: Xiaomeng Hou Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 9bccf2ad038c8..8cb4fcee9a2c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -724,8 +724,13 @@ static int vangogh_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) static int vangogh_system_features_control(struct smu_context *smu, bool en) { - return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify, - en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL); + struct amdgpu_device *adev = smu->adev; + + if (adev->pm.fw_version >= 0x43f1700) + return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify, + en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL); + else + return 0; } static const struct pptable_funcs vangogh_ppt_funcs = { -- GitLab From 088fb29b40f2c78bfe01cebce1a1506b6f7e56d1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 16 Dec 2020 11:36:28 -0500 Subject: [PATCH 0757/1894] drm/amdgpu: fix vbios reservation handling on SR-IOV There is no reserveration so set the size to 0. Fixes a regression on SR-IOV. Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c2ced5be6d7bf..6e679db5e46f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -496,7 +496,8 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) break; } - if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) { + if (amdgpu_sriov_vf(adev) || + !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) { size = 0; } else { size = amdgpu_gmc_get_vbios_fb_size(adev); -- GitLab From 8bee683384087a6275c9183a483435225f7bb209 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 14 Dec 2020 09:51:27 +0100 Subject: [PATCH 0758/1894] xsk: Fix memory leak for failed bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a possible memory leak when a bind of an AF_XDP socket fails. When the fill and completion rings are created, they are tied to the socket. But when the buffer pool is later created at bind time, the ownership of these two rings are transferred to the buffer pool as they might be shared between sockets (and the buffer pool cannot be created until we know what we are binding to). So, before the buffer pool is created, these two rings are cleaned up with the socket, and after they have been transferred they are cleaned up together with the buffer pool. The problem is that ownership was transferred before it was absolutely certain that the buffer pool could be created and initialized correctly and when one of these errors occurred, the fill and completion rings did neither belong to the socket nor the pool and where therefore leaked. Solve this by moving the ownership transfer to the point where the buffer pool has been completely set up and there is no way it can fail. Fixes: 7361f9c3d719 ("xsk: Move fill and completion rings to buffer pool") Reported-by: syzbot+cfa88ddd0655afa88763@syzkaller.appspotmail.com Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201214085127.3960-1-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 4 ++++ net/xdp/xsk_buff_pool.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ac4a317038f1b..c6532d77fde76 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -878,6 +878,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } } + /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */ + xs->fq_tmp = NULL; + xs->cq_tmp = NULL; + xs->dev = dev; xs->zc = xs->umem->zc; xs->queue_id = qid; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 67a4494d63b68..818b750609220 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -75,8 +75,6 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->fq = xs->fq_tmp; pool->cq = xs->cq_tmp; - xs->fq_tmp = NULL; - xs->cq_tmp = NULL; for (i = 0; i < pool->free_heads_cnt; i++) { xskb = &pool->heads[i]; -- GitLab From 8bcbe3132c66c07d03f64d5da80be753359f2e92 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 20 Nov 2020 17:20:57 +0800 Subject: [PATCH 0759/1894] device-dax: delete a redundancy check in dev_dax_validate_align() After we have done the alignment check for the length of each range, the alignment check for dev_dax_size(dev_dax) is no longer needed, because it get the sum of the length of each range. Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20201120092057.2144-1-thunder.leizhen@huawei.com Signed-off-by: Dan Williams --- drivers/dax/bus.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 27513d311242e..9761cb40d4bbe 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1114,16 +1114,9 @@ static ssize_t align_show(struct device *dev, static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) { - resource_size_t dev_size = dev_dax_size(dev_dax); struct device *dev = &dev_dax->dev; int i; - if (dev_size > 0 && !alloc_is_aligned(dev_dax, dev_size)) { - dev_dbg(dev, "%s: align %u invalid for size %pa\n", - __func__, dev_dax->align, &dev_size); - return -EINVAL; - } - for (i = 0; i < dev_dax->nr_range; i++) { size_t len = range_len(&dev_dax->ranges[i].range); -- GitLab From f1340265726e0edf8a8cef28e665b28ad6302ce9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Dec 2020 18:18:06 -0800 Subject: [PATCH 0760/1894] iavf: fix double-release of rtnl_lock This code does not jump to exit on an error in iavf_lan_add_device(), so the rtnl_unlock() from the normal path will follow. Fixes: b66c7bc1cd4d ("iavf: Refactor init state machine") Signed-off-by: Jakub Kicinski Reviewed-by: Tony Nguyen Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 95543dfd4fe77..0a867d64d4675 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1834,11 +1834,9 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) netif_tx_stop_all_queues(netdev); if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_add_device(adapter); - if (err) { - rtnl_unlock(); + if (err) dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n", err); - } } dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr); if (netdev->features & NETIF_F_GRO) -- GitLab From 6a29ab57f4e903264ea7b6663352244379c0a8e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Dec 2020 14:01:48 +0300 Subject: [PATCH 0761/1894] cifs: Delete a stray unlock in cifs_swn_reconnect() The unlock is done in the caller, this is a stray which leads to a double unlock bug. Fixes: bf80e5d4259a ("cifs: Send witness register and unregister commands to userspace daemon") Signed-off-by: Dan Carpenter Reviewed-by: Samuel Cabrero Signed-off-by: Steve French --- fs/cifs/cifs_swn.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index c594e588a8b57..b2ef082d64380 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -285,8 +285,6 @@ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon) continue; } - mutex_unlock(&cifs_swnreg_idr_mutex); - cifs_dbg(FYI, "Existing swn registration for %s:%s found\n", swnreg->net_name, swnreg->share_name); -- GitLab From eedf8e88e5f08d95e1c6a33189bb4cdf2db5b79f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Dec 2020 14:02:29 +0300 Subject: [PATCH 0762/1894] cifs: Unlock on errors in cifs_swn_reconnect() There are three error paths which need to unlock before returning. Fixes: 121d947d4fe1 ("cifs: Handle witness client move notification") Signed-off-by: Dan Carpenter Reviewed-by: Samuel Cabrero Signed-off-by: Steve French --- fs/cifs/cifs_swn.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index b2ef082d64380..91163d3cf8b7d 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -480,16 +480,16 @@ static int cifs_swn_store_swn_addr(const struct sockaddr_storage *new, static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *addr) { + int ret = 0; + /* Store the reconnect address */ mutex_lock(&tcon->ses->server->srv_mutex); if (!cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr)) { - int ret; - ret = cifs_swn_store_swn_addr(addr, &tcon->ses->server->dstaddr, &tcon->ses->server->swn_dstaddr); if (ret < 0) { cifs_dbg(VFS, "%s: failed to store address: %d\n", __func__, ret); - return ret; + goto unlock; } tcon->ses->server->use_swn_dstaddr = true; @@ -500,7 +500,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a if (ret < 0) { cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n", __func__, ret); - return ret; + goto unlock; } /* @@ -511,7 +511,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a if (ret < 0) { cifs_dbg(VFS, "%s: Failed to register for witness notifications: %d\n", __func__, ret); - return ret; + goto unlock; } spin_lock(&GlobalMid_Lock); @@ -519,9 +519,10 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a tcon->ses->server->tcpStatus = CifsNeedReconnect; spin_unlock(&GlobalMid_Lock); } +unlock: mutex_unlock(&tcon->ses->server->srv_mutex); - return 0; + return ret; } static int cifs_swn_client_move(struct cifs_swn_reg *swnreg, struct sockaddr_storage *addr) -- GitLab From 0f2c66ae5c8d9c6250d97060902eeeaa8a06446c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Dec 2020 14:03:35 +0300 Subject: [PATCH 0763/1894] cifs: Re-indent cifs_swn_reconnect() This code is slightly nicer if we flip the cifs_sockaddr_equal() around and pull all the code in one tab. Signed-off-by: Dan Carpenter Reviewed-by: Samuel Cabrero Signed-off-by: Steve French --- fs/cifs/cifs_swn.c | 64 ++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index 91163d3cf8b7d..d35f599aa00e6 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -484,41 +484,43 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a /* Store the reconnect address */ mutex_lock(&tcon->ses->server->srv_mutex); - if (!cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr)) { - ret = cifs_swn_store_swn_addr(addr, &tcon->ses->server->dstaddr, - &tcon->ses->server->swn_dstaddr); - if (ret < 0) { - cifs_dbg(VFS, "%s: failed to store address: %d\n", __func__, ret); - goto unlock; - } - tcon->ses->server->use_swn_dstaddr = true; + if (cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr)) + goto unlock; - /* - * Unregister to stop receiving notifications for the old IP address. - */ - ret = cifs_swn_unregister(tcon); - if (ret < 0) { - cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n", - __func__, ret); - goto unlock; - } + ret = cifs_swn_store_swn_addr(addr, &tcon->ses->server->dstaddr, + &tcon->ses->server->swn_dstaddr); + if (ret < 0) { + cifs_dbg(VFS, "%s: failed to store address: %d\n", __func__, ret); + goto unlock; + } + tcon->ses->server->use_swn_dstaddr = true; - /* - * And register to receive notifications for the new IP address now that we have - * stored the new address. - */ - ret = cifs_swn_register(tcon); - if (ret < 0) { - cifs_dbg(VFS, "%s: Failed to register for witness notifications: %d\n", - __func__, ret); - goto unlock; - } + /* + * Unregister to stop receiving notifications for the old IP address. + */ + ret = cifs_swn_unregister(tcon); + if (ret < 0) { + cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n", + __func__, ret); + goto unlock; + } - spin_lock(&GlobalMid_Lock); - if (tcon->ses->server->tcpStatus != CifsExiting) - tcon->ses->server->tcpStatus = CifsNeedReconnect; - spin_unlock(&GlobalMid_Lock); + /* + * And register to receive notifications for the new IP address now that we have + * stored the new address. + */ + ret = cifs_swn_register(tcon); + if (ret < 0) { + cifs_dbg(VFS, "%s: Failed to register for witness notifications: %d\n", + __func__, ret); + goto unlock; } + + spin_lock(&GlobalMid_Lock); + if (tcon->ses->server->tcpStatus != CifsExiting) + tcon->ses->server->tcpStatus = CifsNeedReconnect; + spin_unlock(&GlobalMid_Lock); + unlock: mutex_unlock(&tcon->ses->server->srv_mutex); -- GitLab From f6f92968e1e5a7a9d211faaebefc26ebe408dad7 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Thu, 17 Dec 2020 09:04:57 +0200 Subject: [PATCH 0764/1894] ath11k: qmi: try to allocate a big block of DMA memory first Not all firmware versions support allocating DMA memory in smaller blocks so first try to allocate big block of DMA memory for QMI. If the allocation fails, let firmware request multiple blocks of DMA memory with smaller size. This also fixes an unnecessary error message seen during ath11k probe on QCA6390: ath11k_pci 0000:06:00.0: Respond mem req failed, result: 1, err: 0 ath11k_pci 0000:06:00.0: qmi failed to respond fw mem req:-22 Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1608127593-15192-1-git-send-email-kvalo@codeaurora.org --- drivers/net/wireless/ath/ath11k/qmi.c | 24 ++++++++++++++++++++++-- drivers/net/wireless/ath/ath11k/qmi.h | 1 + 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index f0b5c50974f3e..0db623ff4bb9b 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1660,6 +1660,7 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) struct qmi_wlanfw_respond_mem_resp_msg_v01 resp; struct qmi_txn txn = {}; int ret = 0, i; + bool delayed; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) @@ -1672,11 +1673,13 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) * failure to FW and FW will then request mulitple blocks of small * chunk size memory. */ - if (!ab->bus_params.fixed_mem_region && ab->qmi.mem_seg_count <= 2) { + if (!ab->bus_params.fixed_mem_region && ab->qmi.target_mem_delayed) { + delayed = true; ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi delays mem_request %d\n", ab->qmi.mem_seg_count); memset(req, 0, sizeof(*req)); } else { + delayed = false; req->mem_seg_len = ab->qmi.mem_seg_count; for (i = 0; i < req->mem_seg_len ; i++) { @@ -1708,6 +1711,12 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) } if (resp.resp.result != QMI_RESULT_SUCCESS_V01) { + /* the error response is expected when + * target_mem_delayed is true. + */ + if (delayed && resp.resp.error == 0) + goto out; + ath11k_warn(ab, "Respond mem req failed, result: %d, err: %d\n", resp.resp.result, resp.resp.error); ret = -EINVAL; @@ -1742,6 +1751,8 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab) int i; struct target_mem_chunk *chunk; + ab->qmi.target_mem_delayed = false; + for (i = 0; i < ab->qmi.mem_seg_count; i++) { chunk = &ab->qmi.target_mem[i]; chunk->vaddr = dma_alloc_coherent(ab->dev, @@ -1749,6 +1760,15 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab) &chunk->paddr, GFP_KERNEL); if (!chunk->vaddr) { + if (ab->qmi.mem_seg_count <= 2) { + ath11k_dbg(ab, ATH11K_DBG_QMI, + "qmi dma allocation failed (%d B type %u), will try later with small size\n", + chunk->size, + chunk->type); + ath11k_qmi_free_target_mem_chunk(ab); + ab->qmi.target_mem_delayed = true; + return 0; + } ath11k_err(ab, "failed to alloc memory, size: 0x%x, type: %u\n", chunk->size, chunk->type); @@ -2517,7 +2537,7 @@ static void ath11k_qmi_msg_mem_request_cb(struct qmi_handle *qmi_hdl, ret); return; } - } else if (msg->mem_seg_len > 2) { + } else { ret = ath11k_qmi_alloc_target_mem_chunk(ab); if (ret) { ath11k_warn(ab, "qmi failed to alloc target memory: %d\n", diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h index 92925c9eac674..7bad374cc23a6 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.h +++ b/drivers/net/wireless/ath/ath11k/qmi.h @@ -125,6 +125,7 @@ struct ath11k_qmi { struct target_mem_chunk target_mem[ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01]; u32 mem_seg_count; u32 target_mem_mode; + bool target_mem_delayed; u8 cal_done; struct target_info target; struct m3_mem_region m3_mem; -- GitLab From e9603f4bdcc04417f1c7b3585e63654819dc11f6 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Thu, 17 Dec 2020 17:22:10 +0200 Subject: [PATCH 0765/1894] ath11k: pci: disable ASPM L0sLs before downloading firmware Sometimes QCA6390 doesn't switch to amss state as device enters L1ss state, so disable L0sL1s during firmware downloading. Driver recovers the ASPM to default value in start callback or powerdown callback. Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1608218530-15426-1-git-send-email-kvalo@codeaurora.org --- drivers/net/wireless/ath/ath11k/pci.c | 36 +++++++++++++++++++++++++++ drivers/net/wireless/ath/ath11k/pci.h | 2 ++ 2 files changed, 38 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 9f9a824a4c2d6..20b415cd96c4a 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -886,6 +886,32 @@ static void ath11k_pci_free_region(struct ath11k_pci *ab_pci) pci_disable_device(pci_dev); } +static void ath11k_pci_aspm_disable(struct ath11k_pci *ab_pci) +{ + struct ath11k_base *ab = ab_pci->ab; + + pcie_capability_read_word(ab_pci->pdev, PCI_EXP_LNKCTL, + &ab_pci->link_ctl); + + ath11k_dbg(ab, ATH11K_DBG_PCI, "pci link_ctl 0x%04x L0s %d L1 %d\n", + ab_pci->link_ctl, + u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L0S), + u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); + + /* disable L0s and L1 */ + pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL, + ab_pci->link_ctl & ~PCI_EXP_LNKCTL_ASPMC); + + set_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags); +} + +static void ath11k_pci_aspm_restore(struct ath11k_pci *ab_pci) +{ + if (test_and_clear_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags)) + pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL, + ab_pci->link_ctl); +} + static int ath11k_pci_power_up(struct ath11k_base *ab) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); @@ -895,6 +921,11 @@ static int ath11k_pci_power_up(struct ath11k_base *ab) clear_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags); ath11k_pci_sw_reset(ab_pci->ab, true); + /* Disable ASPM during firmware download due to problems switching + * to AMSS state. + */ + ath11k_pci_aspm_disable(ab_pci); + ret = ath11k_mhi_start(ab_pci); if (ret) { ath11k_err(ab, "failed to start mhi: %d\n", ret); @@ -908,6 +939,9 @@ static void ath11k_pci_power_down(struct ath11k_base *ab) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); + /* restore aspm in case firmware bootup fails */ + ath11k_pci_aspm_restore(ab_pci); + ath11k_pci_force_wake(ab_pci->ab); ath11k_mhi_stop(ab_pci); clear_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags); @@ -965,6 +999,8 @@ static int ath11k_pci_start(struct ath11k_base *ab) set_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags); + ath11k_pci_aspm_restore(ab_pci); + ath11k_pci_ce_irqs_enable(ab); ath11k_ce_rx_post_buf(ab); diff --git a/drivers/net/wireless/ath/ath11k/pci.h b/drivers/net/wireless/ath/ath11k/pci.h index 0432a702416b4..fe44d0dfce195 100644 --- a/drivers/net/wireless/ath/ath11k/pci.h +++ b/drivers/net/wireless/ath/ath11k/pci.h @@ -63,6 +63,7 @@ struct ath11k_msi_config { enum ath11k_pci_flags { ATH11K_PCI_FLAG_INIT_DONE, ATH11K_PCI_FLAG_IS_MSI_64, + ATH11K_PCI_ASPM_RESTORE, }; struct ath11k_pci { @@ -80,6 +81,7 @@ struct ath11k_pci { /* enum ath11k_pci_flags */ unsigned long flags; + u16 link_ctl; }; static inline struct ath11k_pci *ath11k_pci_priv(struct ath11k_base *ab) -- GitLab From 0bf1bafb17df03fbd0e8b9a086c39e6f24af7193 Mon Sep 17 00:00:00 2001 From: Samuel Cabrero Date: Fri, 18 Dec 2020 10:29:49 +0100 Subject: [PATCH 0766/1894] cifs: Avoid error pointer dereference The patch 7d6535b72042: "cifs: Simplify reconnect code when dfs upcall is enabled" leads to the following static checker warning: fs/cifs/connect.c:160 reconn_set_next_dfs_target() error: 'server->hostname' dereferencing possible ERR_PTR() Avoid dereferencing the error pointer by early returning on error condition. Reported-by: Dan Carpenter Signed-off-by: Samuel Cabrero Signed-off-by: Steve French --- fs/cifs/connect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 509a41ff56b83..b9df85506938d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -155,6 +155,7 @@ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server, cifs_dbg(FYI, "%s: failed to extract hostname from target: %ld\n", __func__, PTR_ERR(server->hostname)); + return; } rc = reconn_set_ipaddr_from_hostname(server); -- GitLab From 29c9dece56ca82c510c39a0e9403b80bdb3032d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 17 Dec 2020 17:36:57 +0100 Subject: [PATCH 0767/1894] drm/qxl: don't allocate a dma_address array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That seems to be unused. Signed-off-by: Christian König Reviewed-by: David Airlie Link: https://patchwork.freedesktop.org/patch/408787/ --- drivers/gpu/drm/qxl/qxl_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 128c38c8a837a..7dd0c69baa478 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -115,7 +115,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct ttm_buffer_object *bo, ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL); if (ttm == NULL) return NULL; - if (ttm_dma_tt_init(ttm, bo, page_flags, ttm_cached)) { + if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) { kfree(ttm); return NULL; } -- GitLab From 3d45f221ce627d13e2e6ef3274f06750c84a6542 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 2 Dec 2020 11:55:58 +0000 Subject: [PATCH 0768/1894] btrfs: fix deadlock when cloning inline extent and low on free metadata space When cloning an inline extent there are cases where we can not just copy the inline extent from the source range to the target range (e.g. when the target range starts at an offset greater than zero). In such cases we copy the inline extent's data into a page of the destination inode and then dirty that page. However, after that we will need to start a transaction for each processed extent and, if we are ever low on available metadata space, we may need to flush existing delalloc for all dirty inodes in an attempt to release metadata space - if that happens we may deadlock: * the async reclaim task queued a delalloc work to flush delalloc for the destination inode of the clone operation; * the task executing that delalloc work gets blocked waiting for the range with the dirty page to be unlocked, which is currently locked by the task doing the clone operation; * the async reclaim task blocks waiting for the delalloc work to complete; * the cloning task is waiting on the waitqueue of its reservation ticket while holding the range with the dirty page locked in the inode's io_tree; * if metadata space is not released by some other task (like delalloc for some other inode completing for example), the clone task waits forever and as a consequence the delalloc work and async reclaim tasks will hang forever as well. Releasing more space on the other hand may require starting a transaction, which will hang as well when trying to reserve metadata space, resulting in a deadlock between all these tasks. When this happens, traces like the following show up in dmesg/syslog: [87452.323003] INFO: task kworker/u16:11:1810830 blocked for more than 120 seconds. [87452.323644] Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 [87452.324248] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [87452.324852] task:kworker/u16:11 state:D stack: 0 pid:1810830 ppid: 2 flags:0x00004000 [87452.325520] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs] [87452.326136] Call Trace: [87452.326737] __schedule+0x5d1/0xcf0 [87452.327390] schedule+0x45/0xe0 [87452.328174] lock_extent_bits+0x1e6/0x2d0 [btrfs] [87452.328894] ? finish_wait+0x90/0x90 [87452.329474] btrfs_invalidatepage+0x32c/0x390 [btrfs] [87452.330133] ? __mod_memcg_state+0x8e/0x160 [87452.330738] __extent_writepage+0x2d4/0x400 [btrfs] [87452.331405] extent_write_cache_pages+0x2b2/0x500 [btrfs] [87452.332007] ? lock_release+0x20e/0x4c0 [87452.332557] ? trace_hardirqs_on+0x1b/0xf0 [87452.333127] extent_writepages+0x43/0x90 [btrfs] [87452.333653] ? lock_acquire+0x1a3/0x490 [87452.334177] do_writepages+0x43/0xe0 [87452.334699] ? __filemap_fdatawrite_range+0xa4/0x100 [87452.335720] __filemap_fdatawrite_range+0xc5/0x100 [87452.336500] btrfs_run_delalloc_work+0x17/0x40 [btrfs] [87452.337216] btrfs_work_helper+0xf1/0x600 [btrfs] [87452.337838] process_one_work+0x24e/0x5e0 [87452.338437] worker_thread+0x50/0x3b0 [87452.339137] ? process_one_work+0x5e0/0x5e0 [87452.339884] kthread+0x153/0x170 [87452.340507] ? kthread_mod_delayed_work+0xc0/0xc0 [87452.341153] ret_from_fork+0x22/0x30 [87452.341806] INFO: task kworker/u16:1:2426217 blocked for more than 120 seconds. [87452.342487] Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 [87452.343274] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [87452.344049] task:kworker/u16:1 state:D stack: 0 pid:2426217 ppid: 2 flags:0x00004000 [87452.344974] Workqueue: events_unbound btrfs_async_reclaim_metadata_space [btrfs] [87452.345655] Call Trace: [87452.346305] __schedule+0x5d1/0xcf0 [87452.346947] ? kvm_clock_read+0x14/0x30 [87452.347676] ? wait_for_completion+0x81/0x110 [87452.348389] schedule+0x45/0xe0 [87452.349077] schedule_timeout+0x30c/0x580 [87452.349718] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [87452.350340] ? lock_acquire+0x1a3/0x490 [87452.351006] ? try_to_wake_up+0x7a/0xa20 [87452.351541] ? lock_release+0x20e/0x4c0 [87452.352040] ? lock_acquired+0x199/0x490 [87452.352517] ? wait_for_completion+0x81/0x110 [87452.353000] wait_for_completion+0xab/0x110 [87452.353490] start_delalloc_inodes+0x2af/0x390 [btrfs] [87452.353973] btrfs_start_delalloc_roots+0x12d/0x250 [btrfs] [87452.354455] flush_space+0x24f/0x660 [btrfs] [87452.355063] btrfs_async_reclaim_metadata_space+0x1bb/0x480 [btrfs] [87452.355565] process_one_work+0x24e/0x5e0 [87452.356024] worker_thread+0x20f/0x3b0 [87452.356487] ? process_one_work+0x5e0/0x5e0 [87452.356973] kthread+0x153/0x170 [87452.357434] ? kthread_mod_delayed_work+0xc0/0xc0 [87452.357880] ret_from_fork+0x22/0x30 (...) < stack traces of several tasks waiting for the locks of the inodes of the clone operation > (...) [92867.444138] RSP: 002b:00007ffc3371bbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000052 [92867.444624] RAX: ffffffffffffffda RBX: 00007ffc3371bea0 RCX: 00007f61efe73f97 [92867.445116] RDX: 0000000000000000 RSI: 0000560fbd5d7a40 RDI: 0000560fbd5d8960 [92867.445595] RBP: 00007ffc3371beb0 R08: 0000000000000001 R09: 0000000000000003 [92867.446070] R10: 00007ffc3371b996 R11: 0000000000000246 R12: 0000000000000000 [92867.446820] R13: 000000000000001f R14: 00007ffc3371bea0 R15: 00007ffc3371beb0 [92867.447361] task:fsstress state:D stack: 0 pid:2508238 ppid:2508153 flags:0x00004000 [92867.447920] Call Trace: [92867.448435] __schedule+0x5d1/0xcf0 [92867.448934] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [92867.449423] schedule+0x45/0xe0 [92867.449916] __reserve_bytes+0x4a4/0xb10 [btrfs] [92867.450576] ? finish_wait+0x90/0x90 [92867.451202] btrfs_reserve_metadata_bytes+0x29/0x190 [btrfs] [92867.451815] btrfs_block_rsv_add+0x1f/0x50 [btrfs] [92867.452412] start_transaction+0x2d1/0x760 [btrfs] [92867.453216] clone_copy_inline_extent+0x333/0x490 [btrfs] [92867.453848] ? lock_release+0x20e/0x4c0 [92867.454539] ? btrfs_search_slot+0x9a7/0xc30 [btrfs] [92867.455218] btrfs_clone+0x569/0x7e0 [btrfs] [92867.455952] btrfs_clone_files+0xf6/0x150 [btrfs] [92867.456588] btrfs_remap_file_range+0x324/0x3d0 [btrfs] [92867.457213] do_clone_file_range+0xd4/0x1f0 [92867.457828] vfs_clone_file_range+0x4d/0x230 [92867.458355] ? lock_release+0x20e/0x4c0 [92867.458890] ioctl_file_clone+0x8f/0xc0 [92867.459377] do_vfs_ioctl+0x342/0x750 [92867.459913] __x64_sys_ioctl+0x62/0xb0 [92867.460377] do_syscall_64+0x33/0x80 [92867.460842] entry_SYSCALL_64_after_hwframe+0x44/0xa9 (...) < stack traces of more tasks blocked on metadata reservation like the clone task above, because the async reclaim task has deadlocked > (...) Another thing to notice is that the worker task that is deadlocked when trying to flush the destination inode of the clone operation is at btrfs_invalidatepage(). This is simply because the clone operation has a destination offset greater than the i_size and we only update the i_size of the destination file after cloning an extent (just like we do in the buffered write path). Since the async reclaim path uses btrfs_start_delalloc_roots() to trigger the flushing of delalloc for all inodes that have delalloc, add a runtime flag to an inode to signal it should not be flushed, and for inodes with that flag set, start_delalloc_inodes() will simply skip them. When the cloning code needs to dirty a page to copy an inline extent, set that flag on the inode and then clear it when the clone operation finishes. This could be sporadically triggered with test case generic/269 from fstests, which exercises many fsstress processes running in parallel with several dd processes filling up the entire filesystem. CC: stable@vger.kernel.org # 5.9+ Fixes: 05a5a7621ce6 ("Btrfs: implement full reflink support for inline extents") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 9 +++++++++ fs/btrfs/ctree.h | 3 ++- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/inode.c | 15 +++++++++++---- fs/btrfs/ioctl.c | 2 +- fs/btrfs/reflink.c | 15 +++++++++++++++ fs/btrfs/space-info.c | 2 +- 7 files changed, 40 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 555cbcef65857..d9bf53d9ff907 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -42,6 +42,15 @@ enum { * to an inode. */ BTRFS_INODE_NO_XATTRS, + /* + * Set when we are in a context where we need to start a transaction and + * have dirty pages with the respective file range locked. This is to + * ensure that when reserving space for the transaction, if we are low + * on available space and need to flush delalloc, we will not flush + * delalloc for this inode, because that could result in a deadlock (on + * the file range, inode's io_tree). + */ + BTRFS_INODE_NO_DELALLOC_FLUSH, }; /* in memory btrfs inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9dde7707873a2..2674f24cf2e00 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3074,7 +3074,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u32 min_type); int btrfs_start_delalloc_snapshot(struct btrfs_root *root); -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr); +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, unsigned int extra_bits, struct extent_state **cached_state); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index a98e33f232d55..324f646d6e5e2 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -715,7 +715,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * flush all outstanding I/O and inode extent mappings before the * copy operation is declared as being finished */ - ret = btrfs_start_delalloc_roots(fs_info, U64_MAX); + ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false); if (ret) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e23780acfaeb..070716650df87 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot) +static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot, + bool in_reclaim_context) { struct btrfs_inode *binode; struct inode *inode; @@ -9411,6 +9412,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot list_move_tail(&binode->delalloc_inodes, &root->delalloc_inodes); + + if (in_reclaim_context && + test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags)) + continue; + inode = igrab(&binode->vfs_inode); if (!inode) { cond_resched_lock(&root->delalloc_lock); @@ -9464,10 +9470,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &nr, true); + return start_delalloc_inodes(root, &nr, true, false); } -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr) +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + bool in_reclaim_context) { struct btrfs_root *root; struct list_head splice; @@ -9490,7 +9497,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr) &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = start_delalloc_inodes(root, &nr, false); + ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context); btrfs_put_root(root); if (ret < 0) goto out; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 703212ff50a56..dde49a791f3e2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4951,7 +4951,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_SYNC: { int ret; - ret = btrfs_start_delalloc_roots(fs_info, U64_MAX); + ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false); if (ret) return ret; ret = btrfs_sync_fs(inode->i_sb, 1); diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index ab80896315bef..b03e7891394e3 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -89,6 +89,19 @@ static int copy_inline_to_page(struct btrfs_inode *inode, if (ret) goto out_unlock; + /* + * After dirtying the page our caller will need to start a transaction, + * and if we are low on metadata free space, that can cause flushing of + * delalloc for all inodes in order to get metadata space released. + * However we are holding the range locked for the whole duration of + * the clone/dedupe operation, so we may deadlock if that happens and no + * other task releases enough space. So mark this inode as not being + * possible to flush to avoid such deadlock. We will clear that flag + * when we finish cloning all extents, since a transaction is started + * after finding each extent to clone. + */ + set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags); + if (comp_type == BTRFS_COMPRESS_NONE) { char *map; @@ -549,6 +562,8 @@ process_slot: out: btrfs_free_path(path); kvfree(buf); + clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags); + return ret; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 64099565ab8f5..67e55c5479b8e 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -532,7 +532,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, loops = 0; while ((delalloc_bytes || dio_bytes) && loops < 3) { - btrfs_start_delalloc_roots(fs_info, items); + btrfs_start_delalloc_roots(fs_info, items, true); loops++; if (wait_ordered && !trans) { -- GitLab From 9a664971569daf68254928149f580b4f5856d274 Mon Sep 17 00:00:00 2001 From: ethanwu Date: Tue, 1 Dec 2020 17:25:12 +0800 Subject: [PATCH 0769/1894] btrfs: correctly calculate item size used when item key collision happens Item key collision is allowed for some item types, like dir item and inode refs, but the overall item size is limited by the nodesize. item size(ins_len) passed from btrfs_insert_empty_items to btrfs_search_slot already contains size of btrfs_item. When btrfs_search_slot reaches leaf, we'll see if we need to split leaf. The check incorrectly reports that split leaf is required, because it treats the space required by the newly inserted item as btrfs_item + item data. But in item key collision case, only item data is actually needed, the newly inserted item could merge into the existing one. No new btrfs_item will be inserted. And split_leaf return EOVERFLOW from following code: if (extend && data_size + btrfs_item_size_nr(l, slot) + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info)) return -EOVERFLOW; In most cases, when callers receive EOVERFLOW, they either return this error or handle in different ways. For example, in normal dir item creation the userspace will get errno EOVERFLOW; in inode ref case INODE_EXTREF is used instead. However, this is not the case for rename. To avoid the unrecoverable situation in rename, btrfs_check_dir_item_collision is called in early phase of rename. In this function, when item key collision is detected leaf space is checked: data_size = sizeof(*di) + name_len; if (data_size + btrfs_item_size_nr(leaf, slot) + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root->fs_info)) the sizeof(struct btrfs_item) + btrfs_item_size_nr(leaf, slot) here refers to existing item size, the condition here correctly calculates the needed size for collision case rather than the wrong case above. The consequence of inconsistent condition check between btrfs_check_dir_item_collision and btrfs_search_slot when item key collision happens is that we might pass check here but fail later at btrfs_search_slot. Rename fails and volume is forced readonly [436149.586170] ------------[ cut here ]------------ [436149.586173] BTRFS: Transaction aborted (error -75) [436149.586196] WARNING: CPU: 0 PID: 16733 at fs/btrfs/inode.c:9870 btrfs_rename2+0x1938/0x1b70 [btrfs] [436149.586227] CPU: 0 PID: 16733 Comm: python Tainted: G D 4.18.0-rc5+ #1 [436149.586228] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016 [436149.586238] RIP: 0010:btrfs_rename2+0x1938/0x1b70 [btrfs] [436149.586254] RSP: 0018:ffffa327043a7ce0 EFLAGS: 00010286 [436149.586255] RAX: 0000000000000000 RBX: ffff8d8a17d13340 RCX: 0000000000000006 [436149.586256] RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff8d8a7fc164b0 [436149.586257] RBP: ffffa327043a7da0 R08: 0000000000000560 R09: 7265282064657472 [436149.586258] R10: 0000000000000000 R11: 6361736e61725420 R12: ffff8d8a0d4c8b08 [436149.586258] R13: ffff8d8a17d13340 R14: ffff8d8a33e0a540 R15: 00000000000001fe [436149.586260] FS: 00007fa313933740(0000) GS:ffff8d8a7fc00000(0000) knlGS:0000000000000000 [436149.586261] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [436149.586262] CR2: 000055d8d9c9a720 CR3: 000000007aae0003 CR4: 00000000003606f0 [436149.586295] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [436149.586296] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [436149.586296] Call Trace: [436149.586311] vfs_rename+0x383/0x920 [436149.586313] ? vfs_rename+0x383/0x920 [436149.586315] do_renameat2+0x4ca/0x590 [436149.586317] __x64_sys_rename+0x20/0x30 [436149.586324] do_syscall_64+0x5a/0x120 [436149.586330] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [436149.586332] RIP: 0033:0x7fa3133b1d37 [436149.586348] RSP: 002b:00007fffd3e43908 EFLAGS: 00000246 ORIG_RAX: 0000000000000052 [436149.586349] RAX: ffffffffffffffda RBX: 00007fa3133b1d30 RCX: 00007fa3133b1d37 [436149.586350] RDX: 000055d8da06b5e0 RSI: 000055d8da225d60 RDI: 000055d8da2c4da0 [436149.586351] RBP: 000055d8da2252f0 R08: 00007fa313782000 R09: 00000000000177e0 [436149.586351] R10: 000055d8da010680 R11: 0000000000000246 R12: 00007fa313840b00 Thanks to Hans van Kranenburg for information about crc32 hash collision tools, I was able to reproduce the dir item collision with following python script. https://github.com/wutzuchieh/misc_tools/blob/master/crc32_forge.py Run it under a btrfs volume will trigger the abort transaction. It simply creates files and rename them to forged names that leads to hash collision. There are two ways to fix this. One is to simply revert the patch 878f2d2cb355 ("Btrfs: fix max dir item size calculation") to make the condition consistent although that patch is correct about the size. The other way is to handle the leaf space check correctly when collision happens. I prefer the second one since it correct leaf space check in collision case. This fix will not account sizeof(struct btrfs_item) when the item already exists. There are two places where ins_len doesn't contain sizeof(struct btrfs_item), however. 1. extent-tree.c: lookup_inline_extent_backref 2. file-item.c: btrfs_csum_file_blocks to make the logic of btrfs_search_slot more clear, we add a flag search_for_extension in btrfs_path. This flag indicates that ins_len passed to btrfs_search_slot doesn't contain sizeof(struct btrfs_item). When key exists, btrfs_search_slot will use the actual size needed to calculate the required leaf space. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: ethanwu Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 24 ++++++++++++++++++++++-- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/file-item.c | 2 ++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 07810891e2045..cc89b63d65a4d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2555,8 +2555,14 @@ out: * @p: Holds all btree nodes along the search path * @root: The root node of the tree * @key: The key we are looking for - * @ins_len: Indicates purpose of search, for inserts it is 1, for - * deletions it's -1. 0 for plain searches + * @ins_len: Indicates purpose of search: + * >0 for inserts it's size of item inserted (*) + * <0 for deletions + * 0 for plain searches, not modifying the tree + * + * (*) If size of item inserted doesn't include + * sizeof(struct btrfs_item), then p->search_for_extension must + * be set. * @cow: boolean should CoW operations be performed. Must always be 1 * when modifying the tree. * @@ -2717,6 +2723,20 @@ cow_done: if (level == 0) { p->slots[level] = slot; + /* + * Item key already exists. In this case, if we are + * allowed to insert the item (for example, in dir_item + * case, item key collision is allowed), it will be + * merged with the original item. Only the item size + * grows, no new btrfs item will be added. If + * search_for_extension is not set, ins_len already + * accounts the size btrfs_item, deduct it here so leaf + * space check will be correct. + */ + if (ret == 0 && ins_len > 0 && !p->search_for_extension) { + ASSERT(ins_len >= sizeof(struct btrfs_item)); + ins_len -= sizeof(struct btrfs_item); + } if (ins_len > 0 && btrfs_leaf_free_space(b) < ins_len) { if (write_lock_level < 1) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2674f24cf2e00..3935d297d1981 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -368,6 +368,12 @@ struct btrfs_path { unsigned int search_commit_root:1; unsigned int need_commit_sem:1; unsigned int skip_release_on_error:1; + /* + * Indicate that new item (btrfs_search_slot) is extending already + * existing item and ins_len contains only the data size and not item + * header (ie. sizeof(struct btrfs_item) is not included). + */ + unsigned int search_for_extension:1; }; #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ sizeof(struct btrfs_item)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 56ea380f5a178..d79b8369e6aaf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -844,6 +844,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, want = extent_ref_type(parent, owner); if (insert) { extra_size = btrfs_extent_inline_ref_size(want); + path->search_for_extension = 1; path->keep_locks = 1; } else extra_size = -1; @@ -996,6 +997,7 @@ again: out: if (insert) { path->keep_locks = 0; + path->search_for_extension = 0; btrfs_unlock_up_safe(path, 1); } return err; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1545c22ef2804..6ccfc019ad909 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1016,8 +1016,10 @@ again: } btrfs_release_path(path); + path->search_for_extension = 1; ret = btrfs_search_slot(trans, root, &file_key, path, csum_size, 1); + path->search_for_extension = 0; if (ret < 0) goto out; -- GitLab From ae5e070eaca9dbebde3459dd8f4c2756f8c097d0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 4 Dec 2020 09:24:47 +0800 Subject: [PATCH 0770/1894] btrfs: qgroup: don't try to wait flushing if we're already holding a transaction There is a chance of racing for qgroup flushing which may lead to deadlock: Thread A | Thread B (not holding trans handle) | (holding a trans handle) --------------------------------+-------------------------------- __btrfs_qgroup_reserve_meta() | __btrfs_qgroup_reserve_meta() |- try_flush_qgroup() | |- try_flush_qgroup() |- QGROUP_FLUSHING bit set | | | | |- test_and_set_bit() | | |- wait_event() |- btrfs_join_transaction() | |- btrfs_commit_transaction()| !!! DEAD LOCK !!! Since thread A wants to commit transaction, but thread B is holding a transaction handle, blocking the commit. At the same time, thread B is waiting for thread A to finish its commit. This is just a hot fix, and would lead to more EDQUOT when we're near the qgroup limit. The proper fix would be to make all metadata/data reservations happen without holding a transaction handle. CC: stable@vger.kernel.org # 5.9+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fe3046007f52a..47f27658eac18 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3530,16 +3530,6 @@ static int try_flush_qgroup(struct btrfs_root *root) int ret; bool can_commit = true; - /* - * We don't want to run flush again and again, so if there is a running - * one, we won't try to start a new flush, but exit directly. - */ - if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { - wait_event(root->qgroup_flush_wait, - !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); - return 0; - } - /* * If current process holds a transaction, we shouldn't flush, as we * assume all space reservation happens before a transaction handle is @@ -3554,6 +3544,26 @@ static int try_flush_qgroup(struct btrfs_root *root) current->journal_info != BTRFS_SEND_TRANS_STUB) can_commit = false; + /* + * We don't want to run flush again and again, so if there is a running + * one, we won't try to start a new flush, but exit directly. + */ + if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { + /* + * We are already holding a transaction, thus we can block other + * threads from flushing. So exit right now. This increases + * the chance of EDQUOT for heavy load and near limit cases. + * But we can argue that if we're already near limit, EDQUOT is + * unavoidable anyway. + */ + if (!can_commit) + return 0; + + wait_event(root->qgroup_flush_wait, + !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); + return 0; + } + ret = btrfs_start_delalloc_snapshot(root); if (ret < 0) goto out; -- GitLab From 0b3f407e6728d990ae1630a02c7b952c21c288d3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 10 Dec 2020 12:09:02 +0000 Subject: [PATCH 0771/1894] btrfs: send: fix wrong file path when there is an inode with a pending rmdir When doing an incremental send, if we have a new inode that happens to have the same number that an old directory inode had in the base snapshot and that old directory has a pending rmdir operation, we end up computing a wrong path for the new inode, causing the receiver to fail. Example reproducer: $ cat test-send-rmdir.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV >/dev/null mount $DEV $MNT mkdir $MNT/dir touch $MNT/dir/file1 touch $MNT/dir/file2 touch $MNT/dir/file3 # Filesystem looks like: # # . (ino 256) # |----- dir/ (ino 257) # |----- file1 (ino 258) # |----- file2 (ino 259) # |----- file3 (ino 260) # btrfs subvolume snapshot -r $MNT $MNT/snap1 btrfs send -f /tmp/snap1.send $MNT/snap1 # Now remove our directory and all its files. rm -fr $MNT/dir # Unmount the filesystem and mount it again. This is to ensure that # the next inode that is created ends up with the same inode number # that our directory "dir" had, 257, which is the first free "objectid" # available after mounting again the filesystem. umount $MNT mount $DEV $MNT # Now create a new file (it could be a directory as well). touch $MNT/newfile # Filesystem now looks like: # # . (ino 256) # |----- newfile (ino 257) # btrfs subvolume snapshot -r $MNT $MNT/snap2 btrfs send -f /tmp/snap2.send -p $MNT/snap1 $MNT/snap2 # Now unmount the filesystem, create a new one, mount it and try to apply # both send streams to recreate both snapshots. umount $DEV mkfs.btrfs -f $DEV >/dev/null mount $DEV $MNT btrfs receive -f /tmp/snap1.send $MNT btrfs receive -f /tmp/snap2.send $MNT umount $MNT When running the test, the receive operation for the incremental stream fails: $ ./test-send-rmdir.sh Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1' At subvol /mnt/sdi/snap1 Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2' At subvol /mnt/sdi/snap2 At subvol snap1 At snapshot snap2 ERROR: chown o257-9-0 failed: No such file or directory So fix this by tracking directories that have a pending rmdir by inode number and generation number, instead of only inode number. A test case for fstests follows soon. Reported-by: Massimo B. Tested-by: Massimo B. Link: https://lore.kernel.org/linux-btrfs/6ae34776e85912960a253a8327068a892998e685.camel@gmx.net/ CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index d719a2755a40d..ae97f4dbaff30 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -236,6 +236,7 @@ struct waiting_dir_move { * after this directory is moved, we can try to rmdir the ino rmdir_ino. */ u64 rmdir_ino; + u64 rmdir_gen; bool orphanized; }; @@ -316,7 +317,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); static struct waiting_dir_move * get_waiting_dir_move(struct send_ctx *sctx, u64 ino); -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen); static int need_send_hole(struct send_ctx *sctx) { @@ -2299,7 +2300,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) { + if (is_waiting_for_rm(sctx, ino, gen)) { ret = gen_unique_name(sctx, ino, gen, name); if (ret < 0) goto out; @@ -2858,8 +2859,8 @@ out: return ret; } -static struct orphan_dir_info * -add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 dir_gen) { struct rb_node **p = &sctx->orphan_dirs.rb_node; struct rb_node *parent = NULL; @@ -2868,20 +2869,23 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) while (*p) { parent = *p; entry = rb_entry(parent, struct orphan_dir_info, node); - if (dir_ino < entry->ino) { + if (dir_ino < entry->ino) p = &(*p)->rb_left; - } else if (dir_ino > entry->ino) { + else if (dir_ino > entry->ino) p = &(*p)->rb_right; - } else { + else if (dir_gen < entry->gen) + p = &(*p)->rb_left; + else if (dir_gen > entry->gen) + p = &(*p)->rb_right; + else return entry; - } } odi = kmalloc(sizeof(*odi), GFP_KERNEL); if (!odi) return ERR_PTR(-ENOMEM); odi->ino = dir_ino; - odi->gen = 0; + odi->gen = dir_gen; odi->last_dir_index_offset = 0; rb_link_node(&odi->node, parent, p); @@ -2889,8 +2893,8 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) return odi; } -static struct orphan_dir_info * -get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 gen) { struct rb_node *n = sctx->orphan_dirs.rb_node; struct orphan_dir_info *entry; @@ -2901,15 +2905,19 @@ get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) n = n->rb_left; else if (dir_ino > entry->ino) n = n->rb_right; + else if (gen < entry->gen) + n = n->rb_left; + else if (gen > entry->gen) + n = n->rb_right; else return entry; } return NULL; } -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen) { - struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); + struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen); return odi != NULL; } @@ -2954,7 +2962,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, key.type = BTRFS_DIR_INDEX_KEY; key.offset = 0; - odi = get_orphan_dir_info(sctx, dir); + odi = get_orphan_dir_info(sctx, dir, dir_gen); if (odi) key.offset = odi->last_dir_index_offset; @@ -2985,7 +2993,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, dm = get_waiting_dir_move(sctx, loc.objectid); if (dm) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -2993,12 +3001,13 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, odi->gen = dir_gen; odi->last_dir_index_offset = found_key.offset; dm->rmdir_ino = dir; + dm->rmdir_gen = dir_gen; ret = 0; goto out; } if (loc.objectid > send_progress) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -3038,6 +3047,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) return -ENOMEM; dm->ino = ino; dm->rmdir_ino = 0; + dm->rmdir_gen = 0; dm->orphanized = orphanized; while (*p) { @@ -3183,7 +3193,7 @@ static int path_loop(struct send_ctx *sctx, struct fs_path *name, while (ino != BTRFS_FIRST_FREE_OBJECTID) { fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) + if (is_waiting_for_rm(sctx, ino, gen)) break; if (is_waiting_for_move(sctx, ino)) { if (*ancestor_ino == 0) @@ -3223,6 +3233,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) u64 parent_ino, parent_gen; struct waiting_dir_move *dm = NULL; u64 rmdir_ino = 0; + u64 rmdir_gen; u64 ancestor; bool is_orphan; int ret; @@ -3237,6 +3248,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); rmdir_ino = dm->rmdir_ino; + rmdir_gen = dm->rmdir_gen; is_orphan = dm->orphanized; free_waiting_dir_move(sctx, dm); @@ -3273,6 +3285,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); dm->rmdir_ino = rmdir_ino; + dm->rmdir_gen = rmdir_gen; } goto out; } @@ -3291,7 +3304,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) struct orphan_dir_info *odi; u64 gen; - odi = get_orphan_dir_info(sctx, rmdir_ino); + odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen); if (!odi) { /* already deleted */ goto finish; -- GitLab From 675a4fc8f3149e93f35fb5739fd8d4764206ba0b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 15 Dec 2020 12:00:26 -0500 Subject: [PATCH 0772/1894] btrfs: tests: initialize test inodes location I noticed that sometimes the module failed to load because the self tests failed like this: BTRFS: selftest: fs/btrfs/tests/inode-tests.c:963 miscount, wanted 1, got 0 This turned out to be because sometimes the btrfs ino would be the btree inode number, and thus we'd skip calling the set extent delalloc bit helper, and thus not adjust ->outstanding_extents. Fix this by making sure we initialize test inodes with a valid inode number so that we don't get random failures during self tests. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tests/btrfs-tests.c | 10 ++++++++-- fs/btrfs/tests/inode-tests.c | 9 --------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 8ca334d554afb..6bd97bd4cb371 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -55,8 +55,14 @@ struct inode *btrfs_new_test_inode(void) struct inode *inode; inode = new_inode(test_mnt->mnt_sb); - if (inode) - inode_init_owner(inode, NULL, S_IFREG); + if (!inode) + return NULL; + + inode->i_mode = S_IFREG; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; + BTRFS_I(inode)->location.offset = 0; + inode_init_owner(inode, NULL, S_IFREG); return inode; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 04022069761de..c9874b12d337c 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -232,11 +232,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) return ret; } - inode->i_mode = S_IFREG; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; - BTRFS_I(inode)->location.offset = 0; - fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { test_std_err(TEST_ALLOC_FS_INFO); @@ -835,10 +830,6 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) return ret; } - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; - BTRFS_I(inode)->location.offset = 0; - fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { test_std_err(TEST_ALLOC_FS_INFO); -- GitLab From ea9ed87c73e87e044b2c58d658eb4ba5216bc488 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Dec 2020 15:56:20 +0000 Subject: [PATCH 0773/1894] btrfs: fix async discard stall Might happen that bg->discard_eligible_time was changed without rescheduling, so btrfs_discard_workfn() wakes up earlier than that new time, peek_discard_list() returns NULL, and all work halts and goes to sleep without further rescheduling even there are block groups to discard. It happens pretty often, but not so visible from the userspace because after some time it usually will be kicked off anyway by someone else calling btrfs_discard_reschedule_work(). Fix it by continue rescheduling if block group discard lists are not empty. Reviewed-by: Josef Bacik Signed-off-by: Pavel Begunkov Signed-off-by: David Sterba --- fs/btrfs/discard.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 1db966bf85b24..36431d7e1334c 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group( static struct btrfs_block_group *peek_discard_list( struct btrfs_discard_ctl *discard_ctl, enum btrfs_discard_state *discard_state, - int *discard_index) + int *discard_index, u64 now) { struct btrfs_block_group *block_group; - const u64 now = ktime_get_ns(); spin_lock(&discard_ctl->lock); again: block_group = find_next_block_group(discard_ctl, now); - if (block_group && now > block_group->discard_eligible_time) { + if (block_group && now >= block_group->discard_eligible_time) { if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && block_group->used != 0) { if (btrfs_is_block_group_data_only(block_group)) @@ -222,12 +221,11 @@ again: block_group->discard_state = BTRFS_DISCARD_EXTENTS; } discard_ctl->block_group = block_group; + } + if (block_group) { *discard_state = block_group->discard_state; *discard_index = block_group->discard_index; - } else { - block_group = NULL; } - spin_unlock(&discard_ctl->lock); return block_group; @@ -438,13 +436,18 @@ static void btrfs_discard_workfn(struct work_struct *work) int discard_index = 0; u64 trimmed = 0; u64 minlen = 0; + u64 now = ktime_get_ns(); discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); block_group = peek_discard_list(discard_ctl, &discard_state, - &discard_index); + &discard_index, now); if (!block_group || !btrfs_run_discard_work(discard_ctl)) return; + if (now < block_group->discard_eligible_time) { + btrfs_discard_schedule_work(discard_ctl, false); + return; + } /* Perform discarding */ minlen = discard_minlen[discard_index]; -- GitLab From 1ea2872fc6f2aaee0a4b4f1578b83ffd9f55c6a7 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Dec 2020 15:56:21 +0000 Subject: [PATCH 0774/1894] btrfs: fix racy access to discard_ctl data Because only one discard worker may be running at any given point, it could have been safe to modify ->prev_discard, etc. without synchronization, if not for @override flag in btrfs_discard_schedule_work() and delayed_work_pending() returning false while workfn is running. That may lead to torn reads of u64 for some architectures, but that's not a big problem as only slightly affects the discard rate. Suggested-by: Josef Bacik Reviewed-by: Josef Bacik Signed-off-by: Pavel Begunkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/discard.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 36431d7e1334c..d641f451f8405 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -477,13 +477,6 @@ static void btrfs_discard_workfn(struct work_struct *work) discard_ctl->discard_extent_bytes += trimmed; } - /* - * Updated without locks as this is inside the workfn and nothing else - * is reading the values - */ - discard_ctl->prev_discard = trimmed; - discard_ctl->prev_discard_time = ktime_get_ns(); - /* Determine next steps for a block_group */ if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { if (discard_state == BTRFS_DISCARD_BITMAPS) { @@ -499,7 +492,10 @@ static void btrfs_discard_workfn(struct work_struct *work) } } + now = ktime_get_ns(); spin_lock(&discard_ctl->lock); + discard_ctl->prev_discard = trimmed; + discard_ctl->prev_discard_time = now; discard_ctl->block_group = NULL; spin_unlock(&discard_ctl->lock); -- GitLab From 8fc058597a283e9a37720abb0e8d68e342b9387d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Dec 2020 15:56:22 +0000 Subject: [PATCH 0775/1894] btrfs: merge critical sections of discard lock in workfn btrfs_discard_workfn() drops discard_ctl->lock just to take it again in a moment in btrfs_discard_schedule_work(). Avoid that and also reuse ktime. Reviewed-by: Josef Bacik Signed-off-by: Pavel Begunkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/discard.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index d641f451f8405..2b8383d411449 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -328,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, btrfs_discard_schedule_work(discard_ctl, false); } -/** - * btrfs_discard_schedule_work - responsible for scheduling the discard work - * @discard_ctl: discard control - * @override: override the current timer - * - * Discards are issued by a delayed workqueue item. @override is used to - * update the current delay as the baseline delay interval is reevaluated on - * transaction commit. This is also maxed with any other rate limit. - */ -void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, - bool override) +static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, + u64 now, bool override) { struct btrfs_block_group *block_group; - const u64 now = ktime_get_ns(); - - spin_lock(&discard_ctl->lock); if (!btrfs_run_discard_work(discard_ctl)) - goto out; - + return; if (!override && delayed_work_pending(&discard_ctl->work)) - goto out; + return; block_group = find_next_block_group(discard_ctl, now); if (block_group) { @@ -391,7 +378,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, mod_delayed_work(discard_ctl->discard_workers, &discard_ctl->work, nsecs_to_jiffies(delay)); } -out: +} + +/* + * btrfs_discard_schedule_work - responsible for scheduling the discard work + * @discard_ctl: discard control + * @override: override the current timer + * + * Discards are issued by a delayed workqueue item. @override is used to + * update the current delay as the baseline delay interval is reevaluated on + * transaction commit. This is also maxed with any other rate limit. + */ +void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, + bool override) +{ + const u64 now = ktime_get_ns(); + + spin_lock(&discard_ctl->lock); + __btrfs_discard_schedule_work(discard_ctl, now, override); spin_unlock(&discard_ctl->lock); } @@ -497,9 +501,8 @@ static void btrfs_discard_workfn(struct work_struct *work) discard_ctl->prev_discard = trimmed; discard_ctl->prev_discard_time = now; discard_ctl->block_group = NULL; + __btrfs_discard_schedule_work(discard_ctl, now, false); spin_unlock(&discard_ctl->lock); - - btrfs_discard_schedule_work(discard_ctl, false); } /** -- GitLab From cb13eea3b49055bd78e6ddf39defd6340f7379fc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:45 +0000 Subject: [PATCH 0776/1894] btrfs: fix transaction leak and crash after RO remount caused by qgroup rescan If we remount a filesystem in RO mode while the qgroup rescan worker is running, we can end up having it still running after the remount is done, and at unmount time we may end up with an open transaction that ends up never getting committed. If that happens we end up with several memory leaks and can crash when hardware acceleration is unavailable for crc32c. Possibly it can lead to other nasty surprises too, due to use-after-free issues. The following steps explain how the problem happens. 1) We have a filesystem mounted in RW mode and the qgroup rescan worker is running; 2) We remount the filesystem in RO mode, and never stop/pause the rescan worker, so after the remount the rescan worker is still running. The important detail here is that the rescan task is still running after the remount operation committed any ongoing transaction through its call to btrfs_commit_super(); 3) The rescan is still running, and after the remount completed, the rescan worker started a transaction, after it finished iterating all leaves of the extent tree, to update the qgroup status item in the quotas tree. It does not commit the transaction, it only releases its handle on the transaction; 4) A filesystem unmount operation starts shortly after; 5) The unmount task, at close_ctree(), stops the transaction kthread, which had not had a chance to commit the open transaction since it was sleeping and the commit interval (default of 30 seconds) has not yet elapsed since the last time it committed a transaction; 6) So after stopping the transaction kthread we still have the transaction used to update the qgroup status item open. At close_ctree(), when the filesystem is in RO mode and no transaction abort happened (or the filesystem is in error mode), we do not expect to have any transaction open, so we do not call btrfs_commit_super(); 7) We then proceed to destroy the work queues, free the roots and block groups, etc. After that we drop the last reference on the btree inode by calling iput() on it. Since there are dirty pages for the btree inode, corresponding to the COWed extent buffer for the quotas btree, btree_write_cache_pages() is invoked to flush those dirty pages. This results in creating a bio and submitting it, which makes us end up at btrfs_submit_metadata_bio(); 8) At btrfs_submit_metadata_bio() we end up at the if-then-else branch that calls btrfs_wq_submit_bio(), because check_async_write() returned a value of 1. This value of 1 is because we did not have hardware acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not set in fs_info->flags; 9) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the workqueue at fs_info->workers, which was already freed before by the call to btrfs_stop_all_workers() at close_ctree(). This results in an invalid memory access due to a use-after-free, leading to a crash. When this happens, before the crash there are several warnings triggered, since we have reserved metadata space in a block group, the delayed refs reservation, etc: ------------[ cut here ]------------ WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 4 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs] Code: f0 01 00 00 48 39 c2 75 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8 RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800 RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x17f/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 48 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c6 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 2 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Code: 48 83 bb b0 03 00 00 00 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x24c/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c7 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 5 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Code: ad de 49 be 22 01 00 (...) RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206 RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246 RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c8 ]--- BTRFS info (device sdc): space_info 4 has 268238848 free, is not full BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536 BTRFS info (device sdc): global_block_rsv: size 0 reserved 0 BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0 BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0 And the crash, which only happens when we do not have crc32c hardware acceleration, produces the following trace immediately after those warnings: stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 2 PID: 1749129 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs] Code: 54 55 53 48 89 f3 (...) RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282 RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0 RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8 R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000 FS: 00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_wq_submit_bio+0xb3/0xd0 [btrfs] btrfs_submit_metadata_bio+0x44/0xc0 [btrfs] submit_one_bio+0x61/0x70 [btrfs] btree_write_cache_pages+0x414/0x450 [btrfs] ? kobject_put+0x9a/0x1d0 ? trace_hardirqs_on+0x1b/0xf0 ? _raw_spin_unlock_irqrestore+0x3c/0x60 ? free_debug_processing+0x1e1/0x2b0 do_writepages+0x43/0xe0 ? lock_acquired+0x199/0x490 __writeback_single_inode+0x59/0x650 writeback_single_inode+0xaf/0x120 write_inode_now+0x94/0xd0 iput+0x187/0x2b0 close_ctree+0x2c6/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f3cfebabee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000 RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0 R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000 R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60 Modules linked in: btrfs dm_snapshot dm_thin_pool (...) ---[ end trace dd74718fef1ed5cc ]--- Finally when we remove the btrfs module (rmmod btrfs), there are several warnings about objects that were allocated from our slabs but were never freed, consequence of the transaction that was never committed and got leaked: ============================================================================= BUG btrfs_delayed_ref_head (Tainted: G B W ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x0000000050cbdd61 @offset=12104 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x0000000086e9b0ff @offset=12776 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs] commit_cowonly_roots+0x248/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 0b (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_tree_ref (Tainted: G B W ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000001a340018 @offset=4408 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_commit_transaction+0x60/0xc40 [btrfs] create_subvol+0x56a/0x990 [btrfs] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] btrfs_ioctl+0x1a92/0x36f0 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x000000002b46292a @offset=13648 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_extent_op (Tainted: G B W ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? __mutex_unlock_slowpath+0x45/0x2a0 kmem_cache_destroy+0x55/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000004cf95ea8 @offset=6264 INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1 Fix this issue by having the remount path stop the qgroup rescan worker when we are remounting RO and teach the rescan worker to stop when a remount is in progress. If later a remount in RW mode happens, we are already resuming the qgroup rescan worker through the call to btrfs_qgroup_rescan_resume(), so we do not need to worry about that. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 13 ++++++++++--- fs/btrfs/super.c | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 47f27658eac18..808370ada8889 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3190,6 +3190,12 @@ out: return ret; } +static bool rescan_should_stop(struct btrfs_fs_info *fs_info) +{ + return btrfs_fs_closing(fs_info) || + test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); +} + static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) { struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, @@ -3198,6 +3204,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) struct btrfs_trans_handle *trans = NULL; int err = -ENOMEM; int ret = 0; + bool stopped = false; path = btrfs_alloc_path(); if (!path) @@ -3210,7 +3217,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) path->skip_locking = 1; err = 0; - while (!err && !btrfs_fs_closing(fs_info)) { + while (!err && !(stopped = rescan_should_stop(fs_info))) { trans = btrfs_start_transaction(fs_info->fs_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -3253,7 +3260,7 @@ out: } mutex_lock(&fs_info->qgroup_rescan_lock); - if (!btrfs_fs_closing(fs_info)) + if (!stopped) fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; if (trans) { ret = update_qgroup_status_item(trans); @@ -3272,7 +3279,7 @@ out: btrfs_end_transaction(trans); - if (btrfs_fs_closing(fs_info)) { + if (stopped) { btrfs_info(fs_info, "qgroup scan paused"); } else if (err >= 0) { btrfs_info(fs_info, "qgroup scan completed%s", diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 022f208100892..b24fa62375e06 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1968,6 +1968,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); + /* + * Pause the qgroup rescan worker if it is running. We don't want + * it to be still running after we are in RO mode, as after that, + * by the time we unmount, it might have left a transaction open, + * so we would leak the transaction and/or crash. + */ + btrfs_qgroup_wait_for_completion(fs_info, false); + ret = btrfs_commit_super(fs_info); if (ret) goto restore; -- GitLab From 638331fa56caeaa8b4d31cc1dfbe0ce989bcff67 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:46 +0000 Subject: [PATCH 0777/1894] btrfs: fix transaction leak and crash after cleaning up orphans on RO mount When we delete a root (subvolume or snapshot), at the very end of the operation, we attempt to remove the root's orphan item from the root tree, at btrfs_drop_snapshot(), by calling btrfs_del_orphan_item(). We ignore any error from btrfs_del_orphan_item() since it is not a serious problem and the next time the filesystem is mounted we remove such stray orphan items at btrfs_find_orphan_roots(). However if the filesystem is mounted RO and we have stray orphan items for any previously deleted root, we can end up leaking a transaction and other data structures when unmounting the filesystem, as well as crashing if we do not have hardware acceleration for crc32c available. The steps that lead to the transaction leak are the following: 1) The filesystem is mounted in RW mode; 2) A subvolume is deleted; 3) When the cleaner kthread runs btrfs_drop_snapshot() to delete the root, it gets a failure at btrfs_del_orphan_item(), which is ignored, due to an ENOMEM when allocating a path for example. So the orphan item for the root remains in the root tree; 4) The filesystem is unmounted; 5) The filesystem is mounted RO (-o ro). During the mount path we call btrfs_find_orphan_roots(), which iterates the root tree searching for orphan items. It finds the orphan item for our deleted root, and since it can not find the root, it starts a transaction to delete the orphan item (by calling btrfs_del_orphan_item()); 6) The RO mount completes; 7) Before the transaction kthread commits the transaction created for deleting the orphan item (i.e. less than 30 seconds elapsed since the mount, the default commit interval), a filesystem unmount operation is started; 8) At close_ctree(), we stop the transaction kthread, but we still have a transaction open with at least one dirty extent buffer, a leaf for the tree root which was COWed when deleting the orphan item; 9) We then proceed to destroy the work queues, free the roots and block groups, etc. After that we drop the last reference on the btree inode by calling iput() on it. Since there are dirty pages for the btree inode, corresponding to the COWed extent buffer, btree_write_cache_pages() is invoked to flush those dirty pages. This results in creating a bio and submitting it, which makes us end up at btrfs_submit_metadata_bio(); 10) At btrfs_submit_metadata_bio() we end up at the if-then-else branch that calls btrfs_wq_submit_bio(), because check_async_write() returned a value of 1. This value of 1 is because we did not have hardware acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not set in fs_info->flags; 11) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the workqueue at fs_info->workers, which was already freed before by the call to btrfs_stop_all_workers() at close_ctree(). This results in an invalid memory access due to a use-after-free, leading to a crash. When this happens, before the crash there are several warnings triggered, since we have reserved metadata space in a block group, the delayed refs reservation, etc: ------------[ cut here ]------------ WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 4 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs] Code: f0 01 00 00 48 39 c2 75 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8 RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800 RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x17f/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 48 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c6 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 2 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Code: 48 83 bb b0 03 00 00 00 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x24c/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c7 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 5 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Code: ad de 49 be 22 01 00 (...) RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206 RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246 RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c8 ]--- BTRFS info (device sdc): space_info 4 has 268238848 free, is not full BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536 BTRFS info (device sdc): global_block_rsv: size 0 reserved 0 BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0 BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0 And the crash, which only happens when we do not have crc32c hardware acceleration, produces the following trace immediately after those warnings: stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 2 PID: 1749129 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs] Code: 54 55 53 48 89 f3 (...) RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282 RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0 RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8 R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000 FS: 00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_wq_submit_bio+0xb3/0xd0 [btrfs] btrfs_submit_metadata_bio+0x44/0xc0 [btrfs] submit_one_bio+0x61/0x70 [btrfs] btree_write_cache_pages+0x414/0x450 [btrfs] ? kobject_put+0x9a/0x1d0 ? trace_hardirqs_on+0x1b/0xf0 ? _raw_spin_unlock_irqrestore+0x3c/0x60 ? free_debug_processing+0x1e1/0x2b0 do_writepages+0x43/0xe0 ? lock_acquired+0x199/0x490 __writeback_single_inode+0x59/0x650 writeback_single_inode+0xaf/0x120 write_inode_now+0x94/0xd0 iput+0x187/0x2b0 close_ctree+0x2c6/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f3cfebabee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000 RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0 R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000 R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60 Modules linked in: btrfs dm_snapshot dm_thin_pool (...) ---[ end trace dd74718fef1ed5cc ]--- Finally when we remove the btrfs module (rmmod btrfs), there are several warnings about objects that were allocated from our slabs but were never freed, consequence of the transaction that was never committed and got leaked: ============================================================================= BUG btrfs_delayed_ref_head (Tainted: G B W ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x0000000050cbdd61 @offset=12104 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x0000000086e9b0ff @offset=12776 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs] commit_cowonly_roots+0x248/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 0b (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_tree_ref (Tainted: G B W ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000001a340018 @offset=4408 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_commit_transaction+0x60/0xc40 [btrfs] create_subvol+0x56a/0x990 [btrfs] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] btrfs_ioctl+0x1a92/0x36f0 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x000000002b46292a @offset=13648 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_extent_op (Tainted: G B W ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? __mutex_unlock_slowpath+0x45/0x2a0 kmem_cache_destroy+0x55/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000004cf95ea8 @offset=6264 INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1 So fix this by calling btrfs_find_orphan_roots() in the mount path only if we are mounting the filesystem in RW mode. It's pointless to have it called for RO mounts anyway, since despite adding any deleted roots to the list of dead roots, we will never have the roots deleted until the filesystem is remounted in RW mode, as the cleaner kthread does nothing when we are mounted in RO - btrfs_need_cleaner_sleep() always returns true and the cleaner spends all time sleeping, never cleaning dead roots. This is accomplished by moving the call to btrfs_find_orphan_roots() from open_ctree() to btrfs_start_pre_rw_mount(), which also guarantees that if later the filesystem is remounted RW, we populate the list of dead roots and have the cleaner task delete the dead roots. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 765deefda92b1..e941cbae39919 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2969,6 +2969,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } + ret = btrfs_find_orphan_roots(fs_info); out: return ret; } @@ -3383,10 +3384,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } } - ret = btrfs_find_orphan_roots(fs_info); - if (ret) - goto fail_qgroup; - fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); -- GitLab From a0a1db70df5f48576fea6d08f0a69c05f3ab4cf4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:47 +0000 Subject: [PATCH 0778/1894] btrfs: fix race between RO remount and the cleaner task When we are remounting a filesystem in RO mode we can race with the cleaner task and result in leaking a transaction if the filesystem is unmounted shortly after, before the transaction kthread had a chance to commit that transaction. That also results in a crash during unmount, due to a use-after-free, if hardware acceleration is not available for crc32c. The following sequence of steps explains how the race happens. 1) The filesystem is mounted in RW mode and the cleaner task is running. This means that currently BTRFS_FS_CLEANER_RUNNING is set at fs_info->flags; 2) The cleaner task is currently running delayed iputs for example; 3) A filesystem RO remount operation starts; 4) The RO remount task calls btrfs_commit_super(), which commits any currently open transaction, and it finishes; 5) At this point the cleaner task is still running and it creates a new transaction by doing one of the following things: * When running the delayed iput() for an inode with a 0 link count, in which case at btrfs_evict_inode() we start a transaction through the call to evict_refill_and_join(), use it and then release its handle through btrfs_end_transaction(); * When deleting a dead root through btrfs_clean_one_deleted_snapshot(), a transaction is started at btrfs_drop_snapshot() and then its handle is released through a call to btrfs_end_transaction_throttle(); * When the remount task was still running, and before the remount task called btrfs_delete_unused_bgs(), the cleaner task also called btrfs_delete_unused_bgs() and it picked and removed one block group from the list of unused block groups. Before the cleaner task started a transaction, through btrfs_start_trans_remove_block_group() at btrfs_delete_unused_bgs(), the remount task had already called btrfs_commit_super(); 6) So at this point the filesystem is in RO mode and we have an open transaction that was started by the cleaner task; 7) Shortly after a filesystem unmount operation starts. At close_ctree() we stop the transaction kthread before it had a chance to commit the transaction, since less than 30 seconds (the default commit interval) have elapsed since the last transaction was committed; 8) We end up calling iput() against the btree inode at close_ctree() while there is an open transaction, and since that transaction was used to update btrees by the cleaner, we have dirty pages in the btree inode due to COW operations on metadata extents, and therefore writeback is triggered for the btree inode. So btree_write_cache_pages() is invoked to flush those dirty pages during the final iput() on the btree inode. This results in creating a bio and submitting it, which makes us end up at btrfs_submit_metadata_bio(); 9) At btrfs_submit_metadata_bio() we end up at the if-then-else branch that calls btrfs_wq_submit_bio(), because check_async_write() returned a value of 1. This value of 1 is because we did not have hardware acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not set in fs_info->flags; 10) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the workqueue at fs_info->workers, which was already freed before by the call to btrfs_stop_all_workers() at close_ctree(). This results in an invalid memory access due to a use-after-free, leading to a crash. When this happens, before the crash there are several warnings triggered, since we have reserved metadata space in a block group, the delayed refs reservation, etc: ------------[ cut here ]------------ WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 4 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs] Code: f0 01 00 00 48 39 c2 75 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8 RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800 RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x17f/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 48 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c6 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 2 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Code: 48 83 bb b0 03 00 00 00 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x24c/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c7 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 5 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Code: ad de 49 be 22 01 00 (...) RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206 RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246 RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c8 ]--- BTRFS info (device sdc): space_info 4 has 268238848 free, is not full BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536 BTRFS info (device sdc): global_block_rsv: size 0 reserved 0 BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0 BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0 And the crash, which only happens when we do not have crc32c hardware acceleration, produces the following trace immediately after those warnings: stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 2 PID: 1749129 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs] Code: 54 55 53 48 89 f3 (...) RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282 RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0 RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8 R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000 FS: 00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_wq_submit_bio+0xb3/0xd0 [btrfs] btrfs_submit_metadata_bio+0x44/0xc0 [btrfs] submit_one_bio+0x61/0x70 [btrfs] btree_write_cache_pages+0x414/0x450 [btrfs] ? kobject_put+0x9a/0x1d0 ? trace_hardirqs_on+0x1b/0xf0 ? _raw_spin_unlock_irqrestore+0x3c/0x60 ? free_debug_processing+0x1e1/0x2b0 do_writepages+0x43/0xe0 ? lock_acquired+0x199/0x490 __writeback_single_inode+0x59/0x650 writeback_single_inode+0xaf/0x120 write_inode_now+0x94/0xd0 iput+0x187/0x2b0 close_ctree+0x2c6/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f3cfebabee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000 RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0 R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000 R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60 Modules linked in: btrfs dm_snapshot dm_thin_pool (...) ---[ end trace dd74718fef1ed5cc ]--- Finally when we remove the btrfs module (rmmod btrfs), there are several warnings about objects that were allocated from our slabs but were never freed, consequence of the transaction that was never committed and got leaked: ============================================================================= BUG btrfs_delayed_ref_head (Tainted: G B W ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x0000000050cbdd61 @offset=12104 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x0000000086e9b0ff @offset=12776 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs] commit_cowonly_roots+0x248/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 0b (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_tree_ref (Tainted: G B W ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000001a340018 @offset=4408 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_commit_transaction+0x60/0xc40 [btrfs] create_subvol+0x56a/0x990 [btrfs] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] btrfs_ioctl+0x1a92/0x36f0 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x000000002b46292a @offset=13648 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_extent_op (Tainted: G B W ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? __mutex_unlock_slowpath+0x45/0x2a0 kmem_cache_destroy+0x55/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000004cf95ea8 @offset=6264 INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1 So fix this by making the remount path to wait for the cleaner task before calling btrfs_commit_super(). The remount path now waits for the bit BTRFS_FS_CLEANER_RUNNING to be cleared from fs_info->flags before calling btrfs_commit_super() and this ensures the cleaner can not start a transaction after that, because it sleeps when the filesystem is in RO mode and we have already flagged the filesystem as RO before waiting for BTRFS_FS_CLEANER_RUNNING to be cleared. This also introduces a new flag BTRFS_FS_STATE_RO to be used for fs_info->fs_state when the filesystem is in RO mode. This is because we were doing the RO check using the flags of the superblock and setting the RO mode simply by ORing into the superblock's flags - those operations are not atomic and could result in the cleaner not seeing the update from the remount task after it clears BTRFS_FS_CLEANER_RUNNING. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 20 +++++++++++++++++++- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/super.c | 22 +++++++++++++++++++--- fs/btrfs/volumes.c | 4 ++-- 4 files changed, 44 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3935d297d1981..0225c5208f44c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -132,6 +132,8 @@ enum { * defrag */ BTRFS_FS_STATE_REMOUNTING, + /* Filesystem in RO mode */ + BTRFS_FS_STATE_RO, /* Track if a transaction abort has been reported on this filesystem */ BTRFS_FS_STATE_TRANS_ABORTED, /* @@ -2892,10 +2894,26 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) * If we remount the fs to be R/O or umount the fs, the cleaner needn't do * anything except sleeping. This function is used to check the status of * the fs. + * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, + * since setting and checking for SB_RDONLY in the superblock's flags is not + * atomic. */ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) { - return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info); + return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || + btrfs_fs_closing(fs_info); +} + +static inline void btrfs_set_sb_rdonly(struct super_block *sb) +{ + sb->s_flags |= SB_RDONLY; + set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); +} + +static inline void btrfs_clear_sb_rdonly(struct super_block *sb) +{ + sb->s_flags &= ~SB_RDONLY; + clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); } /* tree mod log functions from ctree.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e941cbae39919..e7bcbd0b93ef8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1729,7 +1729,7 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(fs_info); sleep: - clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); + clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); if (kthread_should_park()) kthread_parkme(); if (kthread_should_stop()) @@ -2830,6 +2830,9 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block return -ENOMEM; btrfs_init_delayed_root(fs_info->delayed_root); + if (sb_rdonly(sb)) + set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); + return btrfs_alloc_stripe_hash_table(fs_info); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b24fa62375e06..38740cc2919fd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -175,7 +175,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function btrfs_discard_stop(fs_info); /* btrfs handle error by forcing the filesystem readonly */ - sb->s_flags |= SB_RDONLY; + btrfs_set_sb_rdonly(sb); btrfs_info(fs_info, "forced readonly"); /* * Note that a running device replace operation is not canceled here @@ -1953,7 +1953,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) /* avoid complains from lockdep et al. */ up(&fs_info->uuid_tree_rescan_sem); - sb->s_flags |= SB_RDONLY; + btrfs_set_sb_rdonly(sb); /* * Setting SB_RDONLY will put the cleaner thread to @@ -1964,6 +1964,20 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) */ btrfs_delete_unused_bgs(fs_info); + /* + * The cleaner task could be already running before we set the + * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). + * We must make sure that after we finish the remount, i.e. after + * we call btrfs_commit_super(), the cleaner can no longer start + * a transaction - either because it was dropping a dead root, + * running delayed iputs or deleting an unused block group (the + * cleaner picked a block group from the list of unused block + * groups before we were able to in the previous call to + * btrfs_delete_unused_bgs()). + */ + wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, + TASK_UNINTERRUPTIBLE); + btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); @@ -2014,7 +2028,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; - sb->s_flags &= ~SB_RDONLY; + btrfs_clear_sb_rdonly(sb); set_bit(BTRFS_FS_OPEN, &fs_info->flags); } @@ -2036,6 +2050,8 @@ restore: /* We've hit an error - don't reset SB_RDONLY */ if (sb_rdonly(sb)) old_flags |= SB_RDONLY; + if (!(old_flags & SB_RDONLY)) + clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); sb->s_flags = old_flags; fs_info->mount_opt = old_opts; fs_info->compress_type = old_compress_type; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7930e1c78c45b..2c0aa03b64376 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2593,7 +2593,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); if (seeding_dev) { - sb->s_flags &= ~SB_RDONLY; + btrfs_clear_sb_rdonly(sb); ret = btrfs_prepare_sprout(fs_info); if (ret) { btrfs_abort_transaction(trans, ret); @@ -2729,7 +2729,7 @@ error_sysfs: mutex_unlock(&fs_info->fs_devices->device_list_mutex); error_trans: if (seeding_dev) - sb->s_flags |= SB_RDONLY; + btrfs_set_sb_rdonly(sb); if (trans) btrfs_end_transaction(trans); error_free_zone: -- GitLab From 0a31daa4b602ff6861fdf182236d64b2a353bace Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:48 +0000 Subject: [PATCH 0779/1894] btrfs: add assertion for empty list of transactions at late stage of umount Add an assertion to close_ctree(), after destroying all the work queues, to verify we do not have any transaction still open or committing at that at that point. If we have any, it means something is seriously wrong and that can cause memory leaks and use-after-free problems. This is motivated by the previous patches that fixed bugs where we ended up leaking an open transaction after unmounting the filesystem. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e7bcbd0b93ef8..1dfd4b2d0e1e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4181,6 +4181,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); + /* We shouldn't have any transaction open at this point */ + ASSERT(list_empty(&fs_info->trans_list)); + clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); btrfs_free_fs_roots(fs_info); -- GitLab From a8cc263eb58ca133617662a5a5e07131d0ebf299 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:49 +0000 Subject: [PATCH 0780/1894] btrfs: run delayed iputs when remounting RO to avoid leaking them When remounting RO, after setting the superblock with the RO flag, the cleaner task will start sleeping and do nothing, since the call to btrfs_need_cleaner_sleep() keeps returning 'true'. However, when the cleaner task goes to sleep, the list of delayed iputs may not be empty. As long as we are in RO mode, the cleaner task will keep sleeping and never run the delayed iputs. This means that if a filesystem unmount is started, we get into close_ctree() with a non-empty list of delayed iputs, and because the filesystem is in RO mode and is not in an error state (or a transaction aborted), btrfs_error_commit_super() and btrfs_commit_super(), which run the delayed iputs, are never called, and later we fail the assertion that checks if the delayed iputs list is empty: assertion failed: list_empty(&fs_info->delayed_iputs), in fs/btrfs/disk-io.c:4049 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3153! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 1 PID: 3780621 Comm: umount Tainted: G L 5.6.0-rc2-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 RIP: 0010:assertfail.constprop.0+0x18/0x26 [btrfs] Code: 8b 7b 58 48 85 ff 74 (...) RSP: 0018:ffffb748c89bbdf8 EFLAGS: 00010246 RAX: 0000000000000051 RBX: ffff9608f2584000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff91998988 RDI: 00000000ffffffff RBP: ffff9608f25870d8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffc0cbc500 R13: ffffffff92411750 R14: 0000000000000000 R15: ffff9608f2aab250 FS: 00007fcbfaa66c80(0000) GS:ffff960936c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffc2c2dd38 CR3: 0000000235e54002 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x1a2/0x2e6 [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x93/0xc0 exit_to_usermode_loop+0xf9/0x100 do_syscall_64+0x20d/0x260 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fcbfaca6307 Code: eb 0b 00 f7 d8 64 89 (...) RSP: 002b:00007fffc2c2ed68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000558203b559b0 RCX: 00007fcbfaca6307 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000558203b55bc0 RBP: 0000000000000000 R08: 0000000000000001 R09: 00007fffc2c2dad0 R10: 0000558203b55bf0 R11: 0000000000000246 R12: 0000558203b55bc0 R13: 00007fcbfadcc204 R14: 0000558203b55aa8 R15: 0000000000000000 Modules linked in: btrfs dm_flakey dm_log_writes (...) ---[ end trace d44d303790049ef6 ]--- So fix this by making the remount RO path run any remaining delayed iputs after waiting for the cleaner to become inactive. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 38740cc2919fd..12d7d3be7cd45 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1978,6 +1978,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, TASK_UNINTERRUPTIBLE); + /* + * We've set the superblock to RO mode, so we might have made + * the cleaner task sleep without running all pending delayed + * iputs. Go through all the delayed iputs here, so that if an + * unmount happens without remounting RW we don't end up at + * finishing close_ctree() with a non-empty list of delayed + * iputs. + */ + btrfs_run_delayed_iputs(fs_info); + btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); -- GitLab From f09ced4053bc0a2094a12b60b646114c966ef4c6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 18 Dec 2020 14:45:24 +0100 Subject: [PATCH 0781/1894] xsk: Fix race in SKB mode transmit with shared cq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a race when multiple sockets are simultaneously calling sendto() when the completion ring is shared in the SKB case. This is the case when you share the same netdev and queue id through the XDP_SHARED_UMEM bind flag. The problem is that multiple processes can be in xsk_generic_xmit() and call the backpressure mechanism in xskq_prod_reserve(xs->pool->cq). As this is a shared resource in this specific scenario, a race might occur since the rings are single-producer single-consumer. Fix this by moving the tx_completion_lock from the socket to the pool as the pool is shared between the sockets that share the completion ring. (The pool is not shared when this is not the case.) And then protect the accesses to xskq_prod_reserve() with this lock. The tx_completion_lock is renamed cq_lock to better reflect that it protects accesses to the potentially shared completion ring. Fixes: 35fcde7f8deb ("xsk: support for Tx") Reported-by: Xuan Zhuo Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201218134525.13119-2-magnus.karlsson@gmail.com --- include/net/xdp_sock.h | 4 ---- include/net/xsk_buff_pool.h | 5 +++++ net/xdp/xsk.c | 9 ++++++--- net/xdp/xsk_buff_pool.c | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 4f4e93bf814c3..cc17bc9575482 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -58,10 +58,6 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head tx_list; - /* Mutual exclusion of NAPI TX thread and sendmsg error paths - * in the SKB destructor callback. - */ - spinlock_t tx_completion_lock; /* Protects generic receive. */ spinlock_t rx_lock; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 01755b838c745..eaa8386dbc630 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -73,6 +73,11 @@ struct xsk_buff_pool { bool dma_need_sync; bool unaligned; void *addrs; + /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: + * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when + * sockets share a single cq when the same netdev and queue id is shared. + */ + spinlock_t cq_lock; struct xdp_buff_xsk *free_heads[]; }; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c6532d77fde76..d531f9cd0de6a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb) struct xdp_sock *xs = xdp_sk(skb->sk); unsigned long flags; - spin_lock_irqsave(&xs->tx_completion_lock, flags); + spin_lock_irqsave(&xs->pool->cq_lock, flags); xskq_prod_submit_addr(xs->pool->cq, addr); - spin_unlock_irqrestore(&xs->tx_completion_lock, flags); + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); sock_wfree(skb); } @@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk) bool sent_frame = false; struct xdp_desc desc; struct sk_buff *skb; + unsigned long flags; int err = 0; mutex_lock(&xs->mutex); @@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ + spin_lock_irqsave(&xs->pool->cq_lock, flags); if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) { + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); kfree_skb(skb); goto out; } + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); skb->dev = xs->dev; skb->priority = sk->sk_priority; @@ -1303,7 +1307,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs->state = XSK_READY; mutex_init(&xs->mutex); spin_lock_init(&xs->rx_lock); - spin_lock_init(&xs->tx_completion_lock); INIT_LIST_HEAD(&xs->map_list); spin_lock_init(&xs->map_list_lock); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 818b750609220..20598eea658c4 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -71,6 +71,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xsk_tx_list); spin_lock_init(&pool->xsk_tx_list_lock); + spin_lock_init(&pool->cq_lock); refcount_set(&pool->users, 1); pool->fq = xs->fq_tmp; -- GitLab From b1b95cb5c0a9694d47d5f845ba97e226cfda957d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 18 Dec 2020 14:45:25 +0100 Subject: [PATCH 0782/1894] xsk: Rollback reservation at NETDEV_TX_BUSY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rollback the reservation in the completion ring when we get a NETDEV_TX_BUSY. When this error is received from the driver, we are supposed to let the user application retry the transmit again. And in order to do this, we need to roll back the failed send so it can be retried. Unfortunately, we did not cancel the reservation we had made in the completion ring. By not doing this, we actually make the completion ring one entry smaller per NETDEV_TX_BUSY error we get, and after enough of these errors the completion ring will be of size zero and transmit will stop working. Fix this by cancelling the reservation when we get a NETDEV_TX_BUSY error. Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") Reported-by: Xuan Zhuo Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201218134525.13119-3-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 3 +++ net/xdp/xsk_queue.h | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d531f9cd0de6a..8037b04a9edd1 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -487,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk) if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; + spin_lock_irqsave(&xs->pool->cq_lock, flags); + xskq_prod_cancel(xs->pool->cq); + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); /* Free skb without triggering the perf drop trace */ consume_skb(skb); err = -EAGAIN; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 4a9663aa7afe6..2823b7c3302d0 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q) return xskq_prod_nb_free(q, 1) ? false : true; } +static inline void xskq_prod_cancel(struct xsk_queue *q) +{ + q->cached_prod--; +} + static inline int xskq_prod_reserve(struct xsk_queue *q) { if (xskq_prod_is_full(q)) -- GitLab From e79bb299ccad6983876686a4d8c87c92ebbe5657 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 22:35:39 +0000 Subject: [PATCH 0783/1894] selftests/bpf: Fix spelling mistake "tranmission" -> "transmission" There are two spelling mistakes in output messages. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201214223539.83168-1-colin.king@canonical.com --- tools/testing/selftests/bpf/xdpxceiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 014dedaa4dd28..1e722ee76b1fc 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -715,7 +715,7 @@ static void worker_pkt_dump(void) int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); if (payload == EOT) { - ksft_print_msg("End-of-tranmission frame received\n"); + ksft_print_msg("End-of-transmission frame received\n"); fprintf(stdout, "---------------------------------------\n"); break; } @@ -747,7 +747,7 @@ static void worker_pkt_validate(void) } if (payloadseqnum == EOT) { - ksft_print_msg("End-of-tranmission frame received: PASS\n"); + ksft_print_msg("End-of-transmission frame received: PASS\n"); sigvar = 1; break; } -- GitLab From 0020ef04e48571a88d4f482ad08f71052c5c5a08 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Fri, 18 Dec 2020 15:26:48 +0800 Subject: [PATCH 0784/1894] io_uring: fix io_wqe->work_list corruption For the first time a req punted to io-wq, we'll initialize io_wq_work's list to be NULL, then insert req to io_wqe->work_list. If this req is not inserted into tail of io_wqe->work_list, this req's io_wq_work list will point to another req's io_wq_work. For splitted bio case, this req maybe inserted to io_wqe->work_list repeatedly, once we insert it to tail of io_wqe->work_list for the second time, now io_wq_work->list->next will be invalid pointer, which then result in many strang error, panic, kernel soft-lockup, rcu stall, etc. In my vm, kernel doest not have commit cc29e1bf0d63f7 ("block: disable iopoll for split bio"), below fio job can reproduce this bug steadily: [global] name=iouring-sqpoll-iopoll-1 ioengine=io_uring iodepth=128 numjobs=1 thread rw=randread direct=1 registerfiles=1 hipri=1 bs=4m size=100M runtime=120 time_based group_reporting randrepeat=0 [device] directory=/home/feiman.wxg/mntpoint/ # an ext4 mount point If we have commit cc29e1bf0d63f7 ("block: disable iopoll for split bio"), there will no splitted bio case for polled io, but I think we still to need to fix this list corruption, it also should maybe go to stable branchs. To fix this corruption, if a req is inserted into tail of io_wqe->work_list, initialize req->io_wq_work->list->next to bu NULL. Cc: stable@vger.kernel.org Signed-off-by: Xiaoguang Wang Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io-wq.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io-wq.h b/fs/io-wq.h index 069496c6d4f94..75113bcd5889f 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -59,6 +59,7 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, list->last->next = node; list->last = node; } + node->next = NULL; } static inline void wq_list_cut(struct io_wq_work_list *list, -- GitLab From dfea9fce29fda6f2f91161677e0e0d9b671bc099 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 18 Dec 2020 13:12:21 +0000 Subject: [PATCH 0785/1894] io_uring: close a small race gap for files cancel The purpose of io_uring_cancel_files() is to wait for all requests matching ->files to go/be cancelled. We should first drop files of a request in io_req_drop_files() and only then make it undiscoverable for io_uring_cancel_files. First drop, then delete from list. It's ok to leave req->id->files dangling, because it's not dereferenced by cancellation code, only compared against. It would potentially go to sleep and be awaken by following in io_req_drop_files() wake_up(). Fixes: 0f2122045b946 ("io_uring: don't rely on weak ->files references") Cc: # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8cf6f22afc5e4..b74957856e681 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6098,15 +6098,15 @@ static void io_req_drop_files(struct io_kiocb *req) struct io_uring_task *tctx = req->task->io_uring; unsigned long flags; + put_files_struct(req->work.identity->files); + put_nsproxy(req->work.identity->nsproxy); spin_lock_irqsave(&ctx->inflight_lock, flags); list_del(&req->inflight_entry); - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; - put_files_struct(req->work.identity->files); - put_nsproxy(req->work.identity->nsproxy); req->work.flags &= ~IO_WQ_WORK_FILES; + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); } static void __io_clean_op(struct io_kiocb *req) -- GitLab From d467d80dc399ba77875d647f2f37b7d1a70d94c2 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 16 Dec 2020 10:47:15 +0800 Subject: [PATCH 0786/1894] bpf: Remove unused including Remove including that don't need it. Signed-off-by: Tian Tao Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1608086835-54523-1-git-send-email-tiantao6@hisilicon.com --- kernel/bpf/syscall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 287be337d5f63..bb2700ec5bf3b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- GitLab From 55d2eba8e7cd439c11cdb204898c2d384227629b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Dec 2020 12:21:36 +0100 Subject: [PATCH 0787/1894] jump_label: Fix usage in module __init When the static_key is part of the module, and the module calls static_key_inc/enable() from it's __init section *AND* has a static_branch_*() user in that very same __init section, things go wobbly. If the static_key lives outside the module, jump_label_add_module() would append this module's sites to the key and jump_label_update() would take the static_key_linked() branch and all would be fine. If all the sites are outside of __init, then everything will be fine too. However, when all is aligned just as described above, jump_label_update() calls __jump_label_update(.init = false) and we'll not update sites in __init text. Fixes: 19483677684b ("jump_label: Annotate entries that operate on __init code earlier") Reported-by: Dexuan Cui Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Jessica Yu Link: https://lkml.kernel.org/r/20201216135435.GV3092@hirez.programming.kicks-ass.net --- kernel/jump_label.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 015ef903ce8cc..c6a39d662935e 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -793,6 +793,7 @@ int jump_label_text_reserved(void *start, void *end) static void jump_label_update(struct static_key *key) { struct jump_entry *stop = __stop___jump_table; + bool init = system_state < SYSTEM_RUNNING; struct jump_entry *entry; #ifdef CONFIG_MODULES struct module *mod; @@ -804,15 +805,16 @@ static void jump_label_update(struct static_key *key) preempt_disable(); mod = __module_address((unsigned long)key); - if (mod) + if (mod) { stop = mod->jump_entries + mod->num_jump_entries; + init = mod->state == MODULE_STATE_COMING; + } preempt_enable(); #endif entry = static_key_entries(key); /* if there are no users, entry can be NULL */ if (entry) - __jump_label_update(key, entry, stop, - system_state < SYSTEM_RUNNING); + __jump_label_update(key, entry, stop, init); } #ifdef CONFIG_STATIC_KEYS_SELFTEST -- GitLab From 441fa3409769180df2fd12fcada35441435a120c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Dec 2020 14:19:22 +0100 Subject: [PATCH 0788/1894] jump_label/static_call: Add MAINTAINERS These files don't appear to have a MAINTAINERS entry and as such patches miss being seen by people who know this code. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt (VMware) Acked-by: Ard Biesheuvel Acked-by: Josh Poimboeuf Acked-by: Jason Baron Link: https://lkml.kernel.org/r/20201216133014.GT3092@hirez.programming.kicks-ass.net --- MAINTAINERS | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 281de213ef478..be02614ad28d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16675,6 +16675,22 @@ M: Ion Badulescu S: Odd Fixes F: drivers/net/ethernet/adaptec/starfire* +STATIC BRANCH/CALL +M: Peter Zijlstra +M: Josh Poimboeuf +M: Jason Baron +R: Steven Rostedt +R: Ard Biesheuvel +S: Supported +F: arch/*/include/asm/jump_label*.h +F: arch/*/include/asm/static_call*.h +F: arch/*/kernel/jump_label.c +F: arch/*/kernel/static_call.c +F: include/linux/jump_label*.h +F: include/linux/static_call*.h +F: kernel/jump_label.c +F: kernel/static_call.c + STEC S1220 SKD DRIVER M: Damien Le Moal L: linux-block@vger.kernel.org -- GitLab From 91ea62d58bd661827c328a2c6c02a87fa4aae88b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Dec 2020 16:39:14 +0100 Subject: [PATCH 0789/1894] softirq: Avoid bad tracing / lockdep interaction Similar to commit: 1a63dcd8765b ("softirq: Reorder trace_softirqs_on to prevent lockdep splat") __local_bh_enable_ip() can also call into tracing with inconsistent state. Unlike that commit we don't need to bother about the tracepoint because 'cnt-1' never matches preempt_count() (by construction). Reported-by: Heiko Carstens Signed-off-by: Peter Zijlstra (Intel) Tested-by: Heiko Carstens Link: https://lkml.kernel.org/r/20201218154519.GW3092@hirez.programming.kicks-ass.net --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 09229ad822096..0f1d3a32d53b3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -185,7 +185,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) * Keep preemption disabled until we are done with * softirq processing: */ - preempt_count_sub(cnt - 1); + __preempt_count_sub(cnt - 1); if (unlikely(!in_interrupt() && local_softirq_pending())) { /* -- GitLab From f2283366c2919fda71e6eb725c3e5c1bd47bae1a Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 18 Dec 2020 16:34:00 +0100 Subject: [PATCH 0790/1894] ALSA: pcm: Remove snd_pcm_lib_preallocate_dma_free() Since commit d4cfb30fce03 ("ALSA: pcm: Set per-card upper limit of PCM buffer allocations") snd_pcm_lib_preallocate_dma_free() is a single line function that has one caller, which is another single line function. Clean this up a bit and remove snd_pcm_lib_preallocate_dma_free() and directly call do_free_pages() from snd_pcm_lib_preallocate_free(). This is a bit less boilerplate. Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20201218153400.18394-1-lars@metafoo.de Signed-off-by: Takashi Iwai --- sound/core/pcm_memory.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index 4f03ba8ed0ae5..ee6e9c5eec45d 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -89,14 +89,6 @@ static int preallocate_pcm_pages(struct snd_pcm_substream *substream, size_t siz return 0; } -/* - * release the preallocated buffer if not yet done. - */ -static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream) -{ - do_free_pages(substream->pcm->card, &substream->dma_buffer); -} - /** * snd_pcm_lib_preallocate_free - release the preallocated buffer of the specified substream. * @substream: the pcm substream instance @@ -105,7 +97,7 @@ static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream */ void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream) { - snd_pcm_lib_preallocate_dma_free(substream); + do_free_pages(substream->pcm->card, &substream->dma_buffer); } /** -- GitLab From 9df28edce7c6ab38050235f6f8b43dd7ccd01b6d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 18 Dec 2020 15:58:58 +0100 Subject: [PATCH 0791/1894] ALSA: usb-audio: Disable sample read check if firmware doesn't give back Some buggy firmware don't give the current sample rate but leaves zero. Handle this case more gracefully without warning but just skip the current rate verification from the next time. Cc: Link: https://lore.kernel.org/r/20201218145858.2357-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index e940dcee792b1..31051f2be46da 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -534,6 +534,12 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, } crate = data[0] | (data[1] << 8) | (data[2] << 16); + if (!crate) { + dev_info(&dev->dev, "failed to read current rate; disabling the check\n"); + chip->sample_rate_read_error = 3; /* three strikes, see above */ + return 0; + } + if (crate != rate) { dev_warn(&dev->dev, "current rate %d is different from the runtime rate %d\n", crate, rate); // runtime->rate = crate; -- GitLab From 5c1733e33c888a3cb7f576564d8ad543d5ad4a9e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 18 Dec 2020 15:56:24 +0100 Subject: [PATCH 0792/1894] ALSA: memalloc: Align buffer allocations in page size Currently the standard memory allocator (snd_dma_malloc_pages*()) passes the byte size to allocate as is. Most of the backends allocates real pages, hence the actual allocations are aligned in page size. However, the genalloc doesn't seem assuring the size alignment, hence it may result in the access outside the buffer when the whole memory pages are exposed via mmap. For avoiding such inconsistencies, this patch makes the allocation size always to be aligned in page size. Note that, after this change, snd_dma_buffer.bytes field contains the aligned size, not the originally requested size. This value is also used for releasing the pages in return. Reviewed-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20201218145625.2045-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 0f335162f87c7..966bef5acc750 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -133,6 +133,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, if (WARN_ON(!dmab)) return -ENXIO; + size = PAGE_ALIGN(size); dmab->dev.type = type; dmab->dev.dev = device; dmab->bytes = 0; -- GitLab From 618de0f4ef11acd8cf26902e65493d46cc20cc89 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 18 Dec 2020 15:56:25 +0100 Subject: [PATCH 0793/1894] ALSA: pcm: Clear the full allocated memory at hw_params The PCM hw_params core function tries to clear up the PCM buffer before actually using for avoiding the information leak from the previous usages or the usage before a new allocation. It performs the memset() with runtime->dma_bytes, but this might still leave some remaining bytes untouched; namely, the PCM buffer size is aligned in page size for mmap, hence runtime->dma_bytes doesn't necessarily cover all PCM buffer pages, and the remaining bytes are exposed via mmap. This patch changes the memory clearance to cover the all buffer pages if the stream is supposed to be mmap-ready (that guarantees that the buffer size is aligned in page size). Reviewed-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20201218145625.2045-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 47b155a49226f..9f3f8e953ff04 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -755,8 +755,13 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, runtime->boundary *= 2; /* clear the buffer for avoiding possible kernel info leaks */ - if (runtime->dma_area && !substream->ops->copy_user) - memset(runtime->dma_area, 0, runtime->dma_bytes); + if (runtime->dma_area && !substream->ops->copy_user) { + size_t size = runtime->dma_bytes; + + if (runtime->info & SNDRV_PCM_INFO_MMAP) + size = PAGE_ALIGN(size); + memset(runtime->dma_area, 0, size); + } snd_pcm_timer_resolution_change(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); -- GitLab From 11cb881bf075cea41092a20236ba708b18e1dbb2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 18 Dec 2020 17:17:30 +0100 Subject: [PATCH 0794/1894] ALSA: pcm: oss: Fix a few more UBSAN fixes There are a few places that call round{up|down}_pow_of_two() with the value zero, and this causes undefined behavior warnings. Avoid calling those macros if such a nonsense value is passed; it's a minor optimization as well, as we handle it as either an error or a value to be skipped, instead. Reported-by: syzbot+33ef0b6639a8d2d42b4c@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20201218161730.26596-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index de1917484647e..142fc751a8477 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -693,6 +693,8 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, oss_buffer_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size; + if (!oss_buffer_size) + return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { if (oss_buffer_size > runtime->oss.mmap_bytes) @@ -728,17 +730,21 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - min_period_size *= oss_frame_size; - min_period_size = roundup_pow_of_two(min_period_size); - if (oss_period_size < min_period_size) - oss_period_size = min_period_size; + if (min_period_size) { + min_period_size *= oss_frame_size; + min_period_size = roundup_pow_of_two(min_period_size); + if (oss_period_size < min_period_size) + oss_period_size = min_period_size; + } max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - max_period_size *= oss_frame_size; - max_period_size = rounddown_pow_of_two(max_period_size); - if (oss_period_size > max_period_size) - oss_period_size = max_period_size; + if (max_period_size) { + max_period_size *= oss_frame_size; + max_period_size = rounddown_pow_of_two(max_period_size); + if (oss_period_size > max_period_size) + oss_period_size = max_period_size; + } oss_periods = oss_buffer_size / oss_period_size; -- GitLab From 4b501262826f5b20f54433c586b111dd190bea25 Mon Sep 17 00:00:00 2001 From: James Qian Wang Date: Thu, 19 Nov 2020 09:39:48 +0800 Subject: [PATCH 0795/1894] drm/komeda: Correct the sequence of hw_done() and flip_done() Komeda HW has no special, program the update to HW is done first, then flip happens. So correct the sequence to hw_done() first then flip_done(). Reported-by: Daniel Vetter Signed-off-by: James Qian Wang Acked-by: Daniel Vetter Signed-off-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20201119013948.2866343-1-james.qian.wang@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_kms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index 6b99df6963842..034ee08482e0f 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -81,10 +81,10 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_commit_modeset_enables(dev, old_state); - drm_atomic_helper_wait_for_flip_done(dev, old_state); - drm_atomic_helper_commit_hw_done(old_state); + drm_atomic_helper_wait_for_flip_done(dev, old_state); + drm_atomic_helper_cleanup_planes(dev, old_state); } -- GitLab From 8e8fbfc682481b7f814985341020129161afd9de Mon Sep 17 00:00:00 2001 From: Carsten Haitzler Date: Fri, 27 Nov 2020 11:00:27 +0000 Subject: [PATCH 0796/1894] drm/komeda: Remove useless variable assignment ret is not actually read after this (only written in one case then returned), so this assign line is useless. This removes that assignment. Signed-off-by: Carsten Haitzler Reviewed-by: Steven Price Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20201127110027.133569-1-carsten.haitzler@foss.arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_dev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 1f8195bad536a..ca891ae14d368 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -152,7 +152,6 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev) ret = of_reserved_mem_device_init(dev); if (ret && ret != -ENODEV) return ret; - ret = 0; for_each_available_child_of_node(np, child) { if (of_node_name_eq(child, "pipeline")) { -- GitLab From a24cf238c771a1d3f0dc68b9f2b62c6d23359026 Mon Sep 17 00:00:00 2001 From: Carsten Haitzler Date: Fri, 27 Nov 2020 11:00:54 +0000 Subject: [PATCH 0797/1894] drm/komeda: Handle NULL pointer access code path in error case komeda_component_get_old_state() technically can return a NULL pointer. komeda_compiz_set_input() even warns when this happens, but then proceeeds to use that NULL pointer to compare memory content there agains the new state to see if it changed. In this case, it's better to assume that the input changed as there is no old state to compare against and thus assume the changes happen anyway. Signed-off-by: Carsten Haitzler Reviewed-by: Steven Price Acked-by: Liviu Dudau [Applied small spelling fixes and fix suggested by Steven Price] Signed-off-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20201127110054.133686-1-carsten.haitzler@foss.arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index 8f32ae7c25d06..5c085116de3f8 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -704,10 +704,10 @@ komeda_compiz_set_input(struct komeda_compiz *compiz, cin->layer_alpha = dflow->layer_alpha; old_st = komeda_component_get_old_state(&compiz->base, drm_st); - WARN_ON(!old_st); /* compare with old to check if this input has been changed */ - if (memcmp(&(to_compiz_st(old_st)->cins[idx]), cin, sizeof(*cin))) + if (WARN_ON(!old_st) || + memcmp(&(to_compiz_st(old_st)->cins[idx]), cin, sizeof(*cin))) c_st->changed_active_inputs |= BIT(idx); komeda_component_add_input(c_st, &dflow->input, idx); -- GitLab From be3e477effba636ad25dcd244db264c6cd5c1f36 Mon Sep 17 00:00:00 2001 From: Carsten Haitzler Date: Fri, 18 Dec 2020 15:08:12 +0000 Subject: [PATCH 0798/1894] drm/komeda: Fix bit check to import to value of proper type KASAN found this problem. find_first_bit() expects to look at a pointer pointing to a long, but we look at a u32 - this is going to be an issue with endianness but, KSAN already flags this as out-of-bounds stack reads. This fixes it by just importing inot a local long. Signed-off-by: Carsten Haitzler Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20201218150812.68195-1-carsten.haitzler@foss.arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c index 452e505a1fd39..719a79728e24f 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c @@ -137,9 +137,10 @@ komeda_pipeline_get_first_component(struct komeda_pipeline *pipe, u32 comp_mask) { struct komeda_component *c = NULL; + unsigned long comp_mask_local = (unsigned long)comp_mask; int id; - id = find_first_bit((unsigned long *)&comp_mask, 32); + id = find_first_bit(&comp_mask_local, 32); if (id < 32) c = komeda_pipeline_get_component(pipe, id); -- GitLab From 72d78717c6d06adf65d2e3dccc96d9e9dc978593 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 11 Dec 2020 12:26:14 -0500 Subject: [PATCH 0799/1894] nfsd: Fixes for nfsd4_encode_read_plus_data() Ensure that we encode the data payload + padding, and that we truncate the preallocated buffer to the actual read size. Fixes: 528b84934eb9 ("NFSD: Add READ_PLUS data support") Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 45ee6b12ce5b7..5dacc673ef173 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4756,6 +4756,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); tmp = htonl(NFS4_CONTENT_DATA); write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); @@ -4763,6 +4764,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); tmp = htonl(*maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); + + tmp = xdr_zero; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, + xdr_pad_size(*maxcount)); return nfs_ok; } -- GitLab From b68f0cbd3f95f2df81e525c310a41fc73c2ed0d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 11 Dec 2020 12:26:15 -0500 Subject: [PATCH 0800/1894] nfsd: Don't set eof on a truncated READ_PLUS If the READ_PLUS operation was truncated due to an error, then ensure we clear the 'eof' flag. Fixes: 9f0b5792f07d ("NFSD: Encode a full READ_PLUS reply") Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5dacc673ef173..20178f60f6121 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4860,14 +4860,15 @@ out: if (nfserr && segments == 0) xdr_truncate_encode(xdr, starting_len); else { - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); if (nfserr) { xdr_truncate_encode(xdr, last_segment); nfserr = nfs_ok; + eof = 0; } + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp = htonl(segments); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); } return nfserr; -- GitLab From d6c9e4368cc6a61bf25c9c72437ced509c854563 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:23 -0500 Subject: [PATCH 0801/1894] NFSD: Fix sparse warning in nfssvc.c fs/nfsd/nfssvc.c:36:6: warning: symbol 'inter_copy_offload_enable' was not declared. Should it be static? The parameter was added by commit ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy"). Relocate it into the source file that uses it, and make it static. This approach is similar to the nfs4_disable_idmapping, cltrack_prog, and cltrack_legacy_disable module parameters. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 5 +++++ fs/nfsd/nfssvc.c | 6 ------ fs/nfsd/xdr4.h | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4727b7f03c5bb..8d6d2678abade 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -50,6 +50,11 @@ #include "pnfs.h" #include "trace.h" +static bool inter_copy_offload_enable; +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #include diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 00384c332f9bb..f9c9f4c63cc77 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -33,12 +33,6 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC -bool inter_copy_offload_enable; -EXPORT_SYMBOL_GPL(inter_copy_offload_enable); -module_param(inter_copy_offload_enable, bool, 0644); -MODULE_PARM_DESC(inter_copy_offload_enable, - "Enable inter server to server copy offload. Default: false"); - extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index a60ff5ce1a375..c300885ae75dd 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -568,7 +568,6 @@ struct nfsd4_copy { struct nfs_fh c_fh; nfs4_stateid stateid; }; -extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ -- GitLab From 4a85a6a3320b4a622315d2e0ea91a1d2b013bce4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:41 -0500 Subject: [PATCH 0802/1894] SUNRPC: Handle TCP socket sends with kernel_sendpage() again Daire Byrne reports a ~50% aggregrate throughput regression on his Linux NFS server after commit da1661b93bf4 ("SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends"), which replaced kernel_send_page() calls in NFSD's socket send path with calls to sock_sendmsg() using iov_iter. Investigation showed that tcp_sendmsg() was not using zero-copy to send the xdr_buf's bvec pages, but instead was relying on memcpy. This means copying every byte of a large NFS READ payload. It looks like TLS sockets do indeed support a ->sendpage method, so it's really not necessary to use xprt_sock_sendmsg() to support TLS fully on the server. A mechanical reversion of da1661b93bf4 is not possible at this point, but we can re-implement the server's TCP socket sendmsg path using kernel_sendpage(). Reported-by: Daire Byrne BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209439 Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 86 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b248f2349437d..c9766d07eb81a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1062,6 +1062,90 @@ err_noclose: return 0; /* record not complete */ } +static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, + int flags) +{ + return kernel_sendpage(sock, virt_to_page(vec->iov_base), + offset_in_page(vec->iov_base), + vec->iov_len, flags); +} + +/* + * kernel_sendpage() is used exclusively to reduce the number of + * copy operations in this path. Therefore the caller must ensure + * that the pages backing @xdr are unchanging. + * + * In addition, the logic assumes that * .bv_len is never larger + * than PAGE_SIZE. + */ +static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, + struct xdr_buf *xdr, rpc_fraghdr marker, + unsigned int *sentp) +{ + const struct kvec *head = xdr->head; + const struct kvec *tail = xdr->tail; + struct kvec rm = { + .iov_base = &marker, + .iov_len = sizeof(marker), + }; + int flags, ret; + + *sentp = 0; + xdr_alloc_bvec(xdr, GFP_KERNEL); + + msg->msg_flags = MSG_MORE; + ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != rm.iov_len) + return -EAGAIN; + + flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0; + ret = svc_tcp_send_kvec(sock, head, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != head->iov_len) + goto out; + + if (xdr->page_len) { + unsigned int offset, len, remaining; + struct bio_vec *bvec; + + bvec = xdr->bvec; + offset = xdr->page_base; + remaining = xdr->page_len; + flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; + while (remaining > 0) { + if (remaining <= PAGE_SIZE && tail->iov_len == 0) + flags = 0; + len = min(remaining, bvec->bv_len); + ret = kernel_sendpage(sock, bvec->bv_page, + bvec->bv_offset + offset, + len, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != len) + goto out; + remaining -= len; + offset = 0; + bvec++; + } + } + + if (tail->iov_len) { + ret = svc_tcp_send_kvec(sock, tail, 0); + if (ret < 0) + return ret; + *sentp += ret; + } + +out: + return 0; +} + /** * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst @@ -1089,7 +1173,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) goto out_notconn; - err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); + err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent); xdr_free_bvec(xdr); trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) -- GitLab From 7b723008f9c95624c848fad661c01b06e47b20da Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:58 -0500 Subject: [PATCH 0803/1894] NFSD: Restore NFSv4 decoding's SAVEMEM functionality While converting the NFSv4 decoder to use xdr_stream-based XDR processing, I removed the old SAVEMEM() macro. This macro wrapped a bit of logic that avoided a memory allocation by recognizing when the decoded item resides in a linear section of the Receive buffer. In that case, it returned a pointer into that buffer instead of allocating a bounce buffer. The bounce buffer is necessary only when xdr_inline_decode() has placed the decoded item in the xdr_stream's scratch buffer, which disappears the next time xdr_inline_decode() is called with that xdr_stream. That happens only if the data item crosses a page boundary in the receive buffer, an exceedingly rare occurrence. Allocating a bounce buffer every time results in a minor performance regression that was introduced by the recent NFSv4 decoder overhaul. Let's restore the previous behavior. On average, it saves about 1.5 kmalloc() calls per COMPOUND. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20178f60f6121..eaaa1605b5b5f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -147,6 +147,25 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) return p; } +static void * +svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len) +{ + __be32 *tmp; + + /* + * The location of the decoded data item is stable, + * so @p is OK to use. This is the common case. + */ + if (p != argp->xdr->scratch.iov_base) + return p; + + tmp = svcxdr_tmpalloc(argp, len); + if (!tmp) + return NULL; + memcpy(tmp, p, len); + return tmp; +} + /* * NFSv4 basic data type decoders */ @@ -183,11 +202,10 @@ nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) p = xdr_inline_decode(argp->xdr, len); if (!p) return nfserr_bad_xdr; - o->data = svcxdr_tmpalloc(argp, len); + o->data = svcxdr_savemem(argp, p, len); if (!o->data) return nfserr_jukebox; o->len = len; - memcpy(o->data, p, len); return nfs_ok; } @@ -205,10 +223,9 @@ nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) status = check_filename((char *)p, *lenp); if (status) return status; - *namp = svcxdr_tmpalloc(argp, *lenp); + *namp = svcxdr_savemem(argp, p, *lenp); if (!*namp) return nfserr_jukebox; - memcpy(*namp, p, *lenp); return nfs_ok; } @@ -1200,10 +1217,9 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen); if (!p) return nfserr_bad_xdr; - putfh->pf_fhval = svcxdr_tmpalloc(argp, putfh->pf_fhlen); + putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen); if (!putfh->pf_fhval) return nfserr_jukebox; - memcpy(putfh->pf_fhval, p, putfh->pf_fhlen); return nfs_ok; } @@ -1318,24 +1334,20 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len); if (!p) return nfserr_bad_xdr; - setclientid->se_callback_netid_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_netid_val = svcxdr_savemem(argp, p, setclientid->se_callback_netid_len); if (!setclientid->se_callback_netid_val) return nfserr_jukebox; - memcpy(setclientid->se_callback_netid_val, p, - setclientid->se_callback_netid_len); if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0) return nfserr_bad_xdr; p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len); if (!p) return nfserr_bad_xdr; - setclientid->se_callback_addr_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_addr_val = svcxdr_savemem(argp, p, setclientid->se_callback_addr_len); if (!setclientid->se_callback_addr_val) return nfserr_jukebox; - memcpy(setclientid->se_callback_addr_val, p, - setclientid->se_callback_addr_len); if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0) return nfserr_bad_xdr; @@ -1375,10 +1387,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify p = xdr_inline_decode(argp->xdr, verify->ve_attrlen); if (!p) return nfserr_bad_xdr; - verify->ve_attrval = svcxdr_tmpalloc(argp, verify->ve_attrlen); + verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen); if (!verify->ve_attrval) return nfserr_jukebox; - memcpy(verify->ve_attrval, p, verify->ve_attrlen); return nfs_ok; } @@ -2333,10 +2344,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) p = xdr_inline_decode(argp->xdr, argp->taglen); if (!p) return 0; - argp->tag = svcxdr_tmpalloc(argp, argp->taglen); + argp->tag = svcxdr_savemem(argp, p, argp->taglen); if (!argp->tag) return 0; - memcpy(argp->tag, p, argp->taglen); max_reply += xdr_align_size(argp->taglen); } -- GitLab From 4aa1464acbe3697710279a4bd65cb4801ed30425 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:11 -0800 Subject: [PATCH 0804/1894] spi: spi-geni-qcom: Fix geni_spi_isr() NULL dereference in timeout case In commit 7ba9bdcb91f6 ("spi: spi-geni-qcom: Don't keep a local state variable") we changed handle_fifo_timeout() so that we set "mas->cur_xfer" to NULL to make absolutely sure that we don't mess with the buffers from the previous transfer in the timeout case. Unfortunately, this caused the IRQ handler to dereference NULL in some cases. One case: CPU0 CPU1 ---- ---- setup_fifo_xfer() geni_se_setup_m_cmd() ... handle_fifo_timeout() spin_lock_irq(mas->lock) mas->cur_xfer = NULL geni_se_cancel_m_cmd() spin_unlock_irq(mas->lock) geni_spi_isr() spin_lock(mas->lock) if (m_irq & M_RX_FIFO_WATERMARK_EN) geni_spi_handle_rx() mas->cur_xfer NULL dereference! tl;dr: Seriously delayed interrupts for RX/TX can lead to timeout handling setting mas->cur_xfer to NULL. Let's check for the NULL transfer in the TX and RX cases and reset the watermark or clear out the fifo respectively to put the hardware back into a sane state. NOTE: things still could get confused if we get timeouts all the way through handle_fifo_timeout() and then start a new transfer because interrupts from the old transfer / cancel / abort could still be pending. A future patch will help this corner case. Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.1.I99ee04f0cb823415df59bd4f550d6ff5756e43d6@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 512e925d5ea48..096edfbde4513 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -354,6 +354,12 @@ static bool geni_spi_handle_tx(struct spi_geni_master *mas) unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas); unsigned int i = 0; + /* Stop the watermark IRQ if nothing to send */ + if (!mas->cur_xfer) { + writel(0, se->base + SE_GENI_TX_WATERMARK_REG); + return false; + } + max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word; if (mas->tx_rem_bytes < max_bytes) max_bytes = mas->tx_rem_bytes; @@ -396,6 +402,14 @@ static void geni_spi_handle_rx(struct spi_geni_master *mas) if (rx_last_byte_valid && rx_last_byte_valid < 4) rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid; } + + /* Clear out the FIFO and bail if nowhere to put it */ + if (!mas->cur_xfer) { + for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++) + readl(se->base + SE_GENI_RX_FIFOn); + return; + } + if (mas->rx_rem_bytes < rx_bytes) rx_bytes = mas->rx_rem_bytes; -- GitLab From 690d8b917bbe64772cb0b652311bcd50908aea6b Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:12 -0800 Subject: [PATCH 0805/1894] spi: spi-geni-qcom: Fail new xfers if xfer/cancel/abort pending If we got a timeout when trying to send an abort command then it means that we just got 3 timeouts in a row: 1. The original timeout that caused handle_fifo_timeout() to be called. 2. A one second timeout waiting for the cancel command to finish. 3. A one second timeout waiting for the abort command to finish. SPI is clocked by the controller, so nothing (aside from a hardware fault or a totally broken sequencer) should be causing the actual commands to fail in hardware. However, even though the hardware itself is not expected to fail (and it'd be hard to predict how we should handle things if it did), it's easy to hit the timeout case by simply blocking our interrupt handler from running for a long period of time. Obviously the system is in pretty bad shape if a interrupt handler is blocked for > 2 seconds, but there are certainly bugs (even bugs in other unrelated drivers) that can make this happen. Let's make things a bit more robust against this case. If we fail to abort we'll set a flag and then we'll block all future transfers until we have no more interrupts pending. Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.2.Ibade998ed587e070388b4bf58801f1107a40eb53@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 59 +++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 096edfbde4513..32c053705dece 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -83,6 +83,7 @@ struct spi_geni_master { spinlock_t lock; int irq; bool cs_flag; + bool abort_failed; }; static int get_spi_clk_cfg(unsigned int speed_hz, @@ -141,8 +142,49 @@ static void handle_fifo_timeout(struct spi_master *spi, spin_unlock_irq(&mas->lock); time_left = wait_for_completion_timeout(&mas->abort_done, HZ); - if (!time_left) + if (!time_left) { dev_err(mas->dev, "Failed to cancel/abort m_cmd\n"); + + /* + * No need for a lock since SPI core has a lock and we never + * access this from an interrupt. + */ + mas->abort_failed = true; + } +} + +static bool spi_geni_is_abort_still_pending(struct spi_geni_master *mas) +{ + struct geni_se *se = &mas->se; + u32 m_irq, m_irq_en; + + if (!mas->abort_failed) + return false; + + /* + * The only known case where a transfer times out and then a cancel + * times out then an abort times out is if something is blocking our + * interrupt handler from running. Avoid starting any new transfers + * until that sorts itself out. + */ + spin_lock_irq(&mas->lock); + m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS); + m_irq_en = readl(se->base + SE_GENI_M_IRQ_EN); + spin_unlock_irq(&mas->lock); + + if (m_irq & m_irq_en) { + dev_err(mas->dev, "Interrupts pending after abort: %#010x\n", + m_irq & m_irq_en); + return true; + } + + /* + * If we're here the problem resolved itself so no need to check more + * on future transfers. + */ + mas->abort_failed = false; + + return false; } static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) @@ -158,9 +200,15 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) if (set_flag == mas->cs_flag) return; + pm_runtime_get_sync(mas->dev); + + if (spi_geni_is_abort_still_pending(mas)) { + dev_err(mas->dev, "Can't set chip select\n"); + goto exit; + } + mas->cs_flag = set_flag; - pm_runtime_get_sync(mas->dev); spin_lock_irq(&mas->lock); reinit_completion(&mas->cs_done); if (set_flag) @@ -173,6 +221,7 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) if (!time_left) handle_fifo_timeout(spi, NULL); +exit: pm_runtime_put(mas->dev); } @@ -280,6 +329,9 @@ static int spi_geni_prepare_message(struct spi_master *spi, int ret; struct spi_geni_master *mas = spi_master_get_devdata(spi); + if (spi_geni_is_abort_still_pending(mas)) + return -EBUSY; + ret = setup_fifo_params(spi_msg->spi, spi); if (ret) dev_err(mas->dev, "Couldn't select mode %d\n", ret); @@ -509,6 +561,9 @@ static int spi_geni_transfer_one(struct spi_master *spi, { struct spi_geni_master *mas = spi_master_get_devdata(spi); + if (spi_geni_is_abort_still_pending(mas)) + return -EBUSY; + /* Terminate and return success for 0 byte length transfer */ if (!xfer->len) return 0; -- GitLab From 3d7d916f9bc98ce88272b3e4405c7c685afbfcd6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:13 -0800 Subject: [PATCH 0806/1894] spi: spi-geni-qcom: Don't try to set CS if an xfer is pending If we get a timeout sending then this happens: spi_transfer_one_message() ->transfer_one() AKA spi_geni_transfer_one() setup_fifo_xfer() mas->cur_xfer = non-NULL spi_transfer_wait() => TIMES OUT if (msg->status != -EINPROGRESS) goto out if (ret != 0 ...) spi_set_cs() ->set_cs AKA spi_geni_set_cs() # mas->cur_xfer is non-NULL The above happens _before_ the SPI core calls ->handle_err() AKA handle_fifo_timeout(). Unfortunately that won't work so well on geni. If we got a timeout transferring then it's likely that our interrupt handler is blocked, but we need that same interrupt handler to run and the command channel to be unblocked in order to adjust the chip select. Trying to set the chip select doesn't crash us but ends up confusing our state machine and leads to messages like: Premature done. rx_rem = 32 bpw8 Let's just drop the chip select request in this case. We can detect the case because cur_xfer is non-NULL--it would have been set to NULL in the interrupt handler if the previous transfer had finished. Sure, we might leave the chip select in the wrong state but it's likely it was going to fail anyway and this avoids getting the driver even more confused about what it's doing. The SPI core in general assumes that setting chip select is a simple operation that doesn't fail. Yet another reason to just reconfigure the chip select line as GPIOs. Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.3.I07afdedcc49655c5d26880f8df9170aac5792378@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 32c053705dece..365565d3a9983 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -207,9 +207,14 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) goto exit; } - mas->cs_flag = set_flag; - spin_lock_irq(&mas->lock); + if (mas->cur_xfer) { + dev_err(mas->dev, "Can't set CS when prev xfer running\n"); + spin_unlock_irq(&mas->lock); + goto exit; + } + + mas->cs_flag = set_flag; reinit_completion(&mas->cs_done); if (set_flag) geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0); -- GitLab From 17fa81aa702ec118f2b835715897041675b06336 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:14 -0800 Subject: [PATCH 0807/1894] spi: spi-geni-qcom: Print an error when we timeout setting the CS If we're using geni to manage the chip select line (don't do it--use a GPIO!) and we happen to get a timeout waiting for the chip select command to be completed, no errors are printed even though things might not be in the best shape. Let's add a print. Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.4.I666b37646de9652cef438ac7c2c6c2053367fc6b@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 365565d3a9983..881f645661cc6 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -223,8 +223,10 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) spin_unlock_irq(&mas->lock); time_left = wait_for_completion_timeout(&mas->cs_done, HZ); - if (!time_left) + if (!time_left) { + dev_warn(mas->dev, "Timeout setting chip select\n"); handle_fifo_timeout(spi, NULL); + } exit: pm_runtime_put(mas->dev); -- GitLab From 3970acf7ddb9aa01c4bdeef197495157c98a15f6 Mon Sep 17 00:00:00 2001 From: Boris Protopopov Date: Fri, 18 Dec 2020 11:30:12 -0600 Subject: [PATCH 0808/1894] SMB3: Add support for getting and setting SACLs Add SYSTEM_SECURITY access flag and use with smb2 when opening files for getting/setting SACLs. Add "system.cifs_ntsd_full" extended attribute to allow user-space access to the functionality. Avoid multiple server calls when setting owner, DACL, and SACL. Signed-off-by: Boris Protopopov Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 15 ++++---- fs/cifs/cifsglob.h | 4 +-- fs/cifs/cifspdu.h | 2 ++ fs/cifs/cifsproto.h | 4 +-- fs/cifs/smb2ops.c | 31 ++++++++++------- fs/cifs/smb2pdu.c | 5 ++- fs/cifs/smb2proto.h | 4 +-- fs/cifs/xattr.c | 83 +++++++++++++++++++++++++++++++++------------ 8 files changed, 100 insertions(+), 48 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 2f21f89871ccb..562913e2b3f2e 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1195,7 +1195,8 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, } struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, - const struct cifs_fid *cifsfid, u32 *pacllen) + const struct cifs_fid *cifsfid, u32 *pacllen, + u32 __maybe_unused unused) { struct cifs_ntsd *pntsd = NULL; unsigned int xid; @@ -1263,7 +1264,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, /* Retrieve an ACL from the server */ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, struct inode *inode, const char *path, - u32 *pacllen) + u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; struct cifsFileInfo *open_file = NULL; @@ -1273,7 +1274,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, if (!open_file) return get_cifs_acl_by_path(cifs_sb, path, pacllen); - pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen); + pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info); cifsFileInfo_put(open_file); return pntsd; } @@ -1338,6 +1339,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, int rc = 0; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct smb_version_operations *ops; + const u32 info = 0; cifs_dbg(NOISY, "converting ACL to mode for %s\n", path); @@ -1347,9 +1349,9 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, ops = tlink_tcon(tlink)->ses->server->ops; if (pfid && (ops->get_acl_by_fid)) - pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen); + pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen, info); else if (ops->get_acl) - pntsd = ops->get_acl(cifs_sb, inode, path, &acllen); + pntsd = ops->get_acl(cifs_sb, inode, path, &acllen, info); else { cifs_put_tlink(tlink); return -EOPNOTSUPP; @@ -1388,6 +1390,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct smb_version_operations *ops; bool mode_from_sid, id_from_sid; + const u32 info = 0; if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -1403,7 +1406,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, return -EOPNOTSUPP; } - pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen); + pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen, info); if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 720d0f6a982d3..50fcb65920e80 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -456,9 +456,9 @@ struct smb_version_operations { const char *, const void *, const __u16, const struct nls_table *, struct cifs_sb_info *); struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *, - const char *, u32 *); + const char *, u32 *, u32); struct cifs_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *, - const struct cifs_fid *, u32 *); + const struct cifs_fid *, u32 *, u32); int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, int); /* writepages retry size */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index ce51183ecaf40..64fe5a47b5e87 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -240,6 +240,8 @@ #define SYNCHRONIZE 0x00100000 /* The file handle can waited on to */ /* synchronize with the completion */ /* of an input/output request */ +#define SYSTEM_SECURITY 0x01000000 /* The system access control list */ + /* can be read and changed */ #define GENERIC_ALL 0x10000000 #define GENERIC_EXECUTE 0x20000000 #define GENERIC_WRITE 0x40000000 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index bd1c9b038568d..340ff81ee87bf 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -218,9 +218,9 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, kuid_t uid, kgid_t gid); extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, - const char *, u32 *); + const char *, u32 *, u32); extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *, - const struct cifs_fid *, u32 *); + const struct cifs_fid *, u32 *, u32); extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, const char *, int); extern unsigned int setup_authusers_ACE(struct cifs_ace *pace); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 949cd1177147f..4a1761139e002 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3214,7 +3214,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, static struct cifs_ntsd * get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, - const struct cifs_fid *cifsfid, u32 *pacllen) + const struct cifs_fid *cifsfid, u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; unsigned int xid; @@ -3228,7 +3228,8 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, cifs_dbg(FYI, "trying to get acl\n"); rc = SMB2_query_acl(xid, tlink_tcon(tlink), cifsfid->persistent_fid, - cifsfid->volatile_fid, (void **)&pntsd, pacllen); + cifsfid->volatile_fid, (void **)&pntsd, pacllen, + info); free_xid(xid); cifs_put_tlink(tlink); @@ -3242,7 +3243,7 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, static struct cifs_ntsd * get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, - const char *path, u32 *pacllen) + const char *path, u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; u8 oplock = SMB2_OPLOCK_LEVEL_NONE; @@ -3280,12 +3281,16 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms.fid = &fid; oparms.reconnect = false; + if (info & SACL_SECINFO) + oparms.desired_access |= SYSTEM_SECURITY; + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); kfree(utf16_path); if (!rc) { rc = SMB2_query_acl(xid, tlink_tcon(tlink), fid.persistent_fid, - fid.volatile_fid, (void **)&pntsd, pacllen); + fid.volatile_fid, (void **)&pntsd, pacllen, + info); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } @@ -3319,10 +3324,12 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) - access_flags = WRITE_OWNER; - else - access_flags = WRITE_DAC; + if (aclflag & CIFS_ACL_OWNER || aclflag & CIFS_ACL_GROUP) + access_flags |= WRITE_OWNER; + if (aclflag & CIFS_ACL_SACL) + access_flags |= SYSTEM_SECURITY; + if (aclflag & CIFS_ACL_DACL) + access_flags |= WRITE_DAC; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { @@ -3356,8 +3363,8 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, /* Retrieve an ACL from the server */ static struct cifs_ntsd * get_smb2_acl(struct cifs_sb_info *cifs_sb, - struct inode *inode, const char *path, - u32 *pacllen) + struct inode *inode, const char *path, + u32 *pacllen, u32 info) { struct cifs_ntsd *pntsd = NULL; struct cifsFileInfo *open_file = NULL; @@ -3365,9 +3372,9 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, if (inode) open_file = find_readable_file(CIFS_I(inode), true); if (!open_file) - return get_smb2_acl_by_path(cifs_sb, path, pacllen); + return get_smb2_acl_by_path(cifs_sb, path, pacllen, info); - pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen); + pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info); cifsFileInfo_put(open_file); return pntsd; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index fc06c762fbbf6..202d8742d1493 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3479,10 +3479,9 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - void **data, u32 *plen) + u64 persistent_fid, u64 volatile_fid, + void **data, u32 *plen, u32 additional_info) { - __u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO; *plen = 0; return query_info(xid, tcon, persistent_fid, volatile_fid, diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index d4110447ee3a8..9565e27681a54 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -200,8 +200,8 @@ extern int SMB2_query_info_init(struct cifs_tcon *tcon, size_t input_len, void *input); extern void SMB2_query_info_free(struct smb_rqst *rqst); extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_file_id, u64 volatile_file_id, - void **data, unsigned int *plen); + u64 persistent_file_id, u64 volatile_file_id, + void **data, unsigned int *plen, u32 info); extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid); diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index b8299173ea7e3..9318a2acf4ee5 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -34,6 +34,7 @@ #define MAX_EA_VALUE_SIZE CIFSMaxBufSize #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" /* DACL only */ #define CIFS_XATTR_CIFS_NTSD "system.cifs_ntsd" /* owner plus DACL */ +#define CIFS_XATTR_CIFS_NTSD_FULL "system.cifs_ntsd_full" /* owner/DACL/SACL */ #define CIFS_XATTR_ATTRIB "cifs.dosattrib" /* full name: user.cifs.dosattrib */ #define CIFS_XATTR_CREATETIME "cifs.creationtime" /* user.cifs.creationtime */ /* @@ -43,12 +44,13 @@ */ #define SMB3_XATTR_CIFS_ACL "system.smb3_acl" /* DACL only */ #define SMB3_XATTR_CIFS_NTSD "system.smb3_ntsd" /* owner plus DACL */ +#define SMB3_XATTR_CIFS_NTSD_FULL "system.smb3_ntsd_full" /* owner/DACL/SACL */ #define SMB3_XATTR_ATTRIB "smb3.dosattrib" /* full name: user.smb3.dosattrib */ #define SMB3_XATTR_CREATETIME "smb3.creationtime" /* user.smb3.creationtime */ /* BB need to add server (Samba e.g) support for security and trusted prefix */ enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT, - XATTR_CIFS_NTSD }; + XATTR_CIFS_NTSD, XATTR_CIFS_NTSD_FULL }; static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon, struct inode *inode, char *full_path, @@ -164,7 +166,8 @@ static int cifs_xattr_set(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: - case XATTR_CIFS_NTSD: { + case XATTR_CIFS_NTSD: + case XATTR_CIFS_NTSD_FULL: { struct cifs_ntsd *pacl; if (!value) @@ -174,23 +177,27 @@ static int cifs_xattr_set(const struct xattr_handler *handler, rc = -ENOMEM; } else { memcpy(pacl, value, size); - if (value && - pTcon->ses->server->ops->set_acl) { + if (pTcon->ses->server->ops->set_acl) { + int aclflags = 0; rc = 0; - if (handler->flags == XATTR_CIFS_NTSD) { - /* set owner and DACL */ - rc = pTcon->ses->server->ops->set_acl( - pacl, size, inode, - full_path, - CIFS_ACL_OWNER); - } - if (rc == 0) { - /* set DACL */ - rc = pTcon->ses->server->ops->set_acl( - pacl, size, inode, - full_path, - CIFS_ACL_DACL); + + switch (handler->flags) { + case XATTR_CIFS_NTSD_FULL: + aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_DACL | + CIFS_ACL_SACL); + break; + case XATTR_CIFS_NTSD: + aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_DACL); + break; + case XATTR_CIFS_ACL: + default: + aclflags = CIFS_ACL_DACL; } + + rc = pTcon->ses->server->ops->set_acl(pacl, + size, inode, full_path, aclflags); } else { rc = -EOPNOTSUPP; } @@ -327,16 +334,27 @@ static int cifs_xattr_get(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: - case XATTR_CIFS_NTSD: { - /* the whole ntsd is fetched regardless */ - u32 acllen; + case XATTR_CIFS_NTSD: + case XATTR_CIFS_NTSD_FULL: { + /* + * fetch owner, DACL, and SACL if asked for full descriptor, + * fetch owner and DACL otherwise + */ + u32 acllen, additional_info = 0; struct cifs_ntsd *pacl; if (pTcon->ses->server->ops->get_acl == NULL) goto out; /* rc already EOPNOTSUPP */ + if (handler->flags == XATTR_CIFS_NTSD_FULL) { + additional_info = OWNER_SECINFO | GROUP_SECINFO | + DACL_SECINFO | SACL_SECINFO; + } else { + additional_info = OWNER_SECINFO | GROUP_SECINFO | + DACL_SECINFO; + } pacl = pTcon->ses->server->ops->get_acl(cifs_sb, - inode, full_path, &acllen); + inode, full_path, &acllen, additional_info); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); cifs_dbg(VFS, "%s: error %zd getting sec desc\n", @@ -486,6 +504,27 @@ static const struct xattr_handler smb3_ntsd_xattr_handler = { .set = cifs_xattr_set, }; +static const struct xattr_handler cifs_cifs_ntsd_full_xattr_handler = { + .name = CIFS_XATTR_CIFS_NTSD_FULL, + .flags = XATTR_CIFS_NTSD_FULL, + .get = cifs_xattr_get, + .set = cifs_xattr_set, +}; + +/* + * Although this is just an alias for the above, need to move away from + * confusing users and using the 20 year old term 'cifs' when it is no + * longer secure and was replaced by SMB2/SMB3 a long time ago, and + * SMB3 and later are highly secure. + */ +static const struct xattr_handler smb3_ntsd_full_xattr_handler = { + .name = SMB3_XATTR_CIFS_NTSD_FULL, + .flags = XATTR_CIFS_NTSD_FULL, + .get = cifs_xattr_get, + .set = cifs_xattr_set, +}; + + static const struct xattr_handler cifs_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = XATTR_ACL_ACCESS, @@ -507,6 +546,8 @@ const struct xattr_handler *cifs_xattr_handlers[] = { &smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_cifs_ntsd_xattr_handler, &smb3_ntsd_xattr_handler, /* alias for above since avoiding "cifs" */ + &cifs_cifs_ntsd_full_xattr_handler, + &smb3_ntsd_full_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_posix_acl_access_xattr_handler, &cifs_posix_acl_default_xattr_handler, NULL -- GitLab From abdcd06c4dedbcabaec68c433c7f53f33307811f Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 16 Dec 2020 09:28:04 +0200 Subject: [PATCH 0809/1894] net: af_packet: fix procfs header for 64-bit pointers On 64-bit systems the packet procfs header field names following 'sk' are not aligned correctly: sk RefCnt Type Proto Iface R Rmem User Inode 00000000605d2c64 3 3 0003 7 1 450880 0 16643 00000000080e9b80 2 2 0000 0 0 0 0 17404 00000000b23b8a00 2 2 0000 0 0 0 0 17421 ... With this change field names are correctly aligned: sk RefCnt Type Proto Iface R Rmem User Inode 000000005c3b1d97 3 3 0003 7 1 21568 0 16178 000000007be55bb7 3 3 fbce 8 1 0 0 16250 00000000be62127d 3 3 fbcd 8 1 0 0 16254 ... Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/54917251d8433735d9a24e935a6cb8eb88b4058a.1608103684.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index de8e8dbbdeb8c..6bbc7a4485938 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4595,7 +4595,9 @@ static void packet_seq_stop(struct seq_file *seq, void *v) static int packet_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); + seq_printf(seq, + "%*sRefCnt Type Proto Iface R Rmem User Inode\n", + IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk"); else { struct sock *s = sk_entry(v); const struct packet_sock *po = pkt_sk(s); -- GitLab From 4bba4c4bb09ad4a2b70836725e08439c86d8f9e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Dec 2020 09:59:05 -0300 Subject: [PATCH 0810/1894] tools headers: Get tools's linux/compiler.h closer to the kernel's We're cherry picking stuff from the kernel to allow for the other headers that we keep in sync via tools/perf/check-headers.sh to work, so introduce linux/compiler_types.h and from there get the compiler specific stuff. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 4 +--- tools/include/linux/compiler_types.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 tools/include/linux/compiler_types.h diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index d22a974372c0e..ff872dc2637c5 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -2,9 +2,7 @@ #ifndef _TOOLS_LINUX_COMPILER_H_ #define _TOOLS_LINUX_COMPILER_H_ -#ifdef __GNUC__ -#include -#endif +#include #ifndef __compiletime_error # define __compiletime_error(message) diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h new file mode 100644 index 0000000000000..31fc2caa758a0 --- /dev/null +++ b/tools/include/linux/compiler_types.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_COMPILER_TYPES_H +#define __LINUX_COMPILER_TYPES_H + +/* Compiler specific macros. */ +#ifdef __GNUC__ +#include +#endif + +#endif /* __LINUX_COMPILER_TYPES_H */ -- GitLab From ffb9beb13e8daf3fcb6bab470d07962b05d619b7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Dec 2020 10:16:08 -0300 Subject: [PATCH 0811/1894] tools headers: Add conditional __has_builtin() As it'll be used by the ctype.h sync with its kernel source original. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler_types.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 31fc2caa758a0..feea09029f610 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -2,6 +2,17 @@ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H +/* Builtins */ + +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + /* Compiler specific macros. */ #ifdef __GNUC__ #include -- GitLab From 23cd9543a52b96ac75d666eee3576b47f1901248 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:41:21 -0300 Subject: [PATCH 0812/1894] tools headers: Update linux/ctype.h with the kernel sources To pick up the changes in: caabdd0f59a9771e ("ctype.h: remove duplicate isdigit() helper") Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/ctype.h' differs from latest version at 'include/linux/ctype.h' diff -u tools/include/linux/ctype.h include/linux/ctype.h And we need to continue using the combination of: inline __isdigit() #define isdigit() __isdigit When the __has_builtin() thing isn't available, as it is a builtin in older systems with it as a builtin but with compilers not hacinv __has_builtin(), rendering the __has_builtin() check useless otherwise. Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/ctype.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/include/linux/ctype.h b/tools/include/linux/ctype.h index 310090b4c4749..29ed3fe94404f 100644 --- a/tools/include/linux/ctype.h +++ b/tools/include/linux/ctype.h @@ -2,6 +2,8 @@ #ifndef _LINUX_CTYPE_H #define _LINUX_CTYPE_H +#include + /* * NOTE! This ctype does not handle EOF like the standard C * library is required to. @@ -23,11 +25,6 @@ extern const unsigned char _ctype[]; #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) #define iscntrl(c) ((__ismask(c)&(_C)) != 0) -static inline int __isdigit(int c) -{ - return '0' <= c && c <= '9'; -} -#define isdigit(c) __isdigit(c) #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) #define islower(c) ((__ismask(c)&(_L)) != 0) #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) @@ -40,6 +37,16 @@ static inline int __isdigit(int c) #define isascii(c) (((unsigned char)(c))<=0x7f) #define toascii(c) (((unsigned char)(c))&0x7f) +#if __has_builtin(__builtin_isdigit) +#define isdigit(c) __builtin_isdigit(c) +#else +static inline int __isdigit(int c) +{ + return '0' <= c && c <= '9'; +} +#define isdigit(c) __isdigit(c) +#endif + static inline unsigned char __tolower(unsigned char c) { if (isupper(c)) -- GitLab From eb2842da77e1f7a3c46033f930524ab76dffe67a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:42:48 -0300 Subject: [PATCH 0813/1894] perf trace beauty: Update copy of linux/socket.h with the kernel sources This just triggers the rebuilding of the syscall beautifiers that extract patterns from this file due to this cset: b713c195d5933227 ("net: provide __sys_shutdown_sock() that takes a socket") After updating it: CC /tmp/build/perf/trace/beauty/sockaddr.o Addressing this perf build warning: Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h' diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jens Axboe Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index e9cb30d8cbfb1..385894b4a8bba 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); extern struct ns_common *get_net_ns(struct ns_common *ns); -- GitLab From e9bde94f1eb53c5721ba8e477dee837632fedebe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:46:24 -0300 Subject: [PATCH 0814/1894] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: d205e0f1426e0f99 ("x86/{cpufeatures,msr}: Add Intel SGX Launch Control hardware bits") e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits") 43756a298928c9a4 ("powercap: Add AMD Fam17h RAPL support") 298ed2b31f552806 ("x86/msr-index: sort AMD RAPL MSRs by address") 68299a42f8428853 ("x86/mce: Enable additional error logging on certain Intel CPUs") That cause these changes in tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-12-17 14:45:49.036994450 -0300 +++ after 2020-12-17 14:46:01.654256639 -0300 @@ -22,6 +22,10 @@ [0x00000060] = "LBR_CORE_TO", [0x00000079] = "IA32_UCODE_WRITE", [0x0000008b] = "IA32_UCODE_REV", + [0x0000008C] = "IA32_SGXLEPUBKEYHASH0", + [0x0000008D] = "IA32_SGXLEPUBKEYHASH1", + [0x0000008E] = "IA32_SGXLEPUBKEYHASH2", + [0x0000008F] = "IA32_SGXLEPUBKEYHASH3", [0x0000009b] = "IA32_SMM_MONITOR_CTL", [0x0000009e] = "IA32_SMBASE", [0x000000c1] = "IA32_PERFCTR0", @@ -59,6 +63,7 @@ [0x00000179] = "IA32_MCG_CAP", [0x0000017a] = "IA32_MCG_STATUS", [0x0000017b] = "IA32_MCG_CTL", + [0x0000017f] = "ERROR_CONTROL", [0x00000180] = "IA32_MCG_EAX", [0x00000181] = "IA32_MCG_EBX", [0x00000182] = "IA32_MCG_ECX", @@ -294,6 +299,7 @@ [0xc0010241 - x86_AMD_V_KVM_MSRs_offset] = "F15H_NB_PERF_CTR", [0xc0010280 - x86_AMD_V_KVM_MSRs_offset] = "F15H_PTSC", [0xc0010299 - x86_AMD_V_KVM_MSRs_offset] = "AMD_RAPL_POWER_UNIT", + [0xc001029a - x86_AMD_V_KVM_MSRs_offset] = "AMD_CORE_ENERGY_STATUS", [0xc001029b - x86_AMD_V_KVM_MSRs_offset] = "AMD_PKG_ENERGY_STATUS", [0xc00102f0 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN_CTL", [0xc00102f1 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN", $ Which causes these parts of tools/perf/ to be rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf At some point these should just be tables read by perf on demand. This allows 'perf trace' users to use those strings to translate from the msr ids provided by the msr: tracepoints. This addresses this perf tools build warning: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Rafael J. Wysocki Cc: Sean Christopherson Cc: Tony Luck Cc: Victor Ding Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 972a34d935059..2b5fc9accec48 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -139,6 +139,7 @@ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_ERROR_CONTROL 0x0000017f #define MSR_IA32_MCG_EXT_CTL 0x000004d0 #define MSR_OFFCORE_RSP_0 0x000001a6 @@ -326,8 +327,9 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 -#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b #define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 +#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a +#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b /* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 @@ -609,6 +611,8 @@ #define FEAT_CTL_LOCKED BIT(0) #define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_SGX_LC_ENABLED BIT(17) +#define FEAT_CTL_SGX_ENABLED BIT(18) #define FEAT_CTL_LMCE_ENABLED BIT(20) #define MSR_IA32_TSC_ADJUST 0x0000003b @@ -628,6 +632,12 @@ #define MSR_IA32_UCODE_WRITE 0x00000079 #define MSR_IA32_UCODE_REV 0x0000008b +/* Intel SGX Launch Enclave Public Key Hash MSRs */ +#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C +#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D +#define MSR_IA32_SGXLEPUBKEYHASH2 0x0000008E +#define MSR_IA32_SGXLEPUBKEYHASH3 0x0000008F + #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b #define MSR_IA32_SMBASE 0x0000009e -- GitLab From 7ddcdea5b54492f54700f427f58690cf1e187e5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:55:01 -0300 Subject: [PATCH 0815/1894] tools headers UAPI: Sync linux/const.h with the kernel headers To pick up the changes in: a85cbe6159ffc973 ("uapi: move constants from to ") That causes no changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/const.h' differs from latest version at 'include/uapi/linux/const.h' diff -u tools/include/uapi/linux/const.h include/uapi/linux/const.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Petr Vorel Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/const.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index 5ed721ad5b198..af2a44c08683d 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _UAPI_LINUX_CONST_H */ -- GitLab From 4a443a51776ca9847942523cf987a330894d3a31 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:58:51 -0300 Subject: [PATCH 0816/1894] tools headers UAPI: Sync linux/fscrypt.h with the kernel sources To pick the changes from: 3ceb6543e9cf6ed8 ("fscrypt: remove kernel-internal constants from UAPI header") That don't result in any changes in tooling, just addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index e5de603369381..9f4428be3e362 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 -#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 @@ -28,7 +27,7 @@ #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 #define FSCRYPT_MODE_ADIANTUM 9 -#define __FSCRYPT_MODE_MAX 9 +/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. @@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg { #define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 #define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK #define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY -#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_POLICY_FLAGS_VALID 0x07 /* contains old flags only */ #define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ #define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS #define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ -- GitLab From d6dbfceec5dd41becbe8c47c402240925d31036a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 15:01:08 -0300 Subject: [PATCH 0817/1894] tools headers UAPI: Sync linux/prctl.h with the kernel sources To pick a new prctl introduced in: 1446e1df9eb183fd ("kernel: Implement selective syscall userspace redirection") That results in: $ tools/perf/trace/beauty/prctl_option.sh > before $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h $ tools/perf/trace/beauty/prctl_option.sh > after $ diff -u before after --- before 2020-12-17 15:00:42.012537367 -0300 +++ after 2020-12-17 15:00:49.832699463 -0300 @@ -53,6 +53,7 @@ [56] = "GET_TAGGED_ADDR_CTRL", [57] = "SET_IO_FLUSHER", [58] = "GET_IO_FLUSHER", + [59] = "SET_SYSCALL_USER_DISPATCH", }; static const char *prctl_set_mm_options[] = { [1] = "START_CODE", $ Now users can do: # perf trace -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH" ^C# # trace -v -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH" New filter for syscalls:sys_enter_prctl: (option==0x3b) && (common_pid != 5519 && common_pid != 3404) ^C# And also when prctl appears in a session, its options will be translated to the string. Cc: Adrian Hunter Cc: Gabriel Krisman Bertazi Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 7f0827705c9a4..90deb41c8a346 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -247,4 +247,9 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Dispatch syscalls to a userspace handler */ +#define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 + #endif /* _LINUX_PRCTL_H */ -- GitLab From f93c789a3e245707e3eddcaab5c2b7c62615692d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 15:44:29 -0300 Subject: [PATCH 0818/1894] tools headers cpufeatures: Sync with the kernel sources To pick the changes in: e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits") That causes only these 'perf bench' objects to rebuild: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Sean Christopherson Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 2 ++ tools/arch/x86/include/asm/disabled-features.h | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index dad350d42ecfb..f5ef2d5b9231c 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -241,6 +241,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ @@ -356,6 +357,7 @@ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ #define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 5861d34f97718..7947cb1782daf 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -62,6 +62,12 @@ # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #endif +#ifdef CONFIG_X86_SGX +# define DISABLE_SGX 0 +#else +# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -74,7 +80,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 -- GitLab From b53d4872d2cfbce117abedee2a29a93e624e4e32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 15:48:06 -0300 Subject: [PATCH 0819/1894] tools headers UAPI: Update asm-generic/unistd.h Just a comment change, trivial. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 2056318988f77..fc48c64700eb8 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -517,7 +517,7 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex) #endif -/* kernel/timer.c */ +/* kernel/sys.c */ #define __NR_getpid 172 __SYSCALL(__NR_getpid, sys_getpid) #define __NR_getppid 173 -- GitLab From 6725f21157b4b6a9fe689cdf07b040d21ea536dd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:47 +0100 Subject: [PATCH 0820/1894] virtio-mem: determine nid only once using memory_add_physaddr_to_nid() Let's determine the target nid only once in case we have none specified - usually, we'll end up with node 0 either way. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-2-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 181e2f18beae5..a37fd73588daf 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -70,7 +70,7 @@ struct virtio_mem { /* The device block size (for communicating with the device). */ uint64_t device_block_size; - /* The translated node id. NUMA_NO_NODE in case not specified. */ + /* The determined node id for all memory of the device. */ int nid; /* Physical start address of the memory region. */ uint64_t addr; @@ -406,10 +406,6 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; - - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); /* * When force-unloading the driver and we still have memory added to @@ -423,7 +419,8 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) } dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); - return add_memory_driver_managed(nid, addr, memory_block_size_bytes(), + return add_memory_driver_managed(vm->nid, addr, + memory_block_size_bytes(), vm->resource_name, MEMHP_MERGE_RESOURCE); } @@ -440,13 +437,9 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; - - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); - return remove_memory(nid, addr, memory_block_size_bytes()); + return remove_memory(vm->nid, addr, memory_block_size_bytes()); } /* @@ -461,14 +454,11 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; - - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", mb_id); - return offline_and_remove_memory(nid, addr, memory_block_size_bytes()); + return offline_and_remove_memory(vm->nid, addr, + memory_block_size_bytes()); } /* @@ -1659,6 +1649,10 @@ static int virtio_mem_init(struct virtio_mem *vm) virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, &vm->region_size); + /* Determine the nid for the device based on the lowest address. */ + if (vm->nid == NUMA_NO_NODE) + vm->nid = memory_add_physaddr_to_nid(vm->addr); + /* * We always hotplug memory in memory block granularity. This way, * we have to wait for exactly one memory block to online. @@ -1707,7 +1701,7 @@ static int virtio_mem_init(struct virtio_mem *vm) memory_block_size_bytes()); dev_info(&vm->vdev->dev, "subblock size: 0x%llx", (unsigned long long)vm->subblock_size); - if (vm->nid != NUMA_NO_NODE) + if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); return 0; -- GitLab From 347202dc04a110bdab8d4e1c38ceccd7758fe13e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:48 +0100 Subject: [PATCH 0821/1894] virtio-mem: more precise calculation in virtio_mem_mb_state_prepare_next_mb() We actually need one byte less (next_mb_id is exclusive, first_mb_id is inclusive). While at it, compact the code. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-3-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a37fd73588daf..dee46865bae2b 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -257,10 +257,8 @@ static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm, */ static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm) { - unsigned long old_bytes = vm->next_mb_id - vm->first_mb_id + 1; - unsigned long new_bytes = vm->next_mb_id - vm->first_mb_id + 2; - int old_pages = PFN_UP(old_bytes); - int new_pages = PFN_UP(new_bytes); + int old_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id); + int new_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id + 1); uint8_t *new_mb_state; if (vm->mb_state && old_pages == new_pages) -- GitLab From 20b9150225c8e9599999b4e161192d8a8d56a4cb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:49 +0100 Subject: [PATCH 0822/1894] virtio-mem: simplify MAX_ORDER - 1 / pageblock_order handling Let's use pageblock_nr_pages and MAX_ORDER_NR_PAGES instead where possible to simplify. Add a comment why we have that restriction for now. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-4-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index dee46865bae2b..0f9d854e8e424 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -753,14 +753,15 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, */ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) { - const int order = MAX_ORDER - 1; + const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES; int i; /* - * We are always called with subblock granularity, which is at least - * aligned to MAX_ORDER - 1. + * We are always called at least with MAX_ORDER_NR_PAGES + * granularity/alignment (e.g., the way subblocks work). All pages + * inside such a block are alike. */ - for (i = 0; i < nr_pages; i += 1 << order) { + for (i = 0; i < nr_pages; i += max_nr_pages) { struct page *page = pfn_to_page(pfn + i); /* @@ -770,14 +771,14 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) * alike. */ if (PageDirty(page)) { - virtio_mem_clear_fake_offline(pfn + i, 1 << order, + virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, false); - generic_online_page(page, order); + generic_online_page(page, MAX_ORDER - 1); } else { - virtio_mem_clear_fake_offline(pfn + i, 1 << order, + virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, true); - free_contig_range(pfn + i, 1 << order); - adjust_managed_page_count(page, 1 << order); + free_contig_range(pfn + i, max_nr_pages); + adjust_managed_page_count(page, max_nr_pages); } } } @@ -790,7 +791,7 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) int sb_id; /* - * We exploit here that subblocks have at least MAX_ORDER - 1 + * We exploit here that subblocks have at least MAX_ORDER_NR_PAGES. * size/alignment and that this callback is is called with such a * size/alignment. So we cannot cross subblocks and therefore * also not memory blocks. @@ -1673,13 +1674,15 @@ static int virtio_mem_init(struct virtio_mem *vm) "Some memory is not addressable. This can make some memory unusable.\n"); /* - * Calculate the subblock size: - * - At least MAX_ORDER - 1 / pageblock_order. - * - At least the device block size. - * In the worst case, a single subblock per memory block. + * We want subblocks to span at least MAX_ORDER_NR_PAGES and + * pageblock_nr_pages pages. This: + * - Simplifies our page onlining code (virtio_mem_online_page_cb) + * and fake page onlining code (virtio_mem_fake_online). + * - Is required for now for alloc_contig_range() to work reliably - + * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ - vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1, - pageblock_order); + vm->subblock_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) * PAGE_SIZE; vm->subblock_size = max_t(uint64_t, vm->device_block_size, vm->subblock_size); vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size; -- GitLab From d76944f80d5f500c8be74feb7938edddf68ee931 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:50 +0100 Subject: [PATCH 0823/1894] virtio-mem: drop rc2 in virtio_mem_mb_plug_and_add() We can drop rc2, we don't actually need the value. Reviewed-by: Pankaj Gupta Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-5-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 0f9d854e8e424..4f18d9855a0ed 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1070,7 +1070,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, uint64_t *nb_sb) { const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb); - int rc, rc2; + int rc; if (WARN_ON_ONCE(!count)) return -EINVAL; @@ -1101,13 +1101,12 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, dev_err(&vm->vdev->dev, "adding memory block %lu failed with %d\n", mb_id, rc); - rc2 = virtio_mem_mb_unplug_sb(vm, mb_id, 0, count); /* * TODO: Linux MM does not properly clean up yet in all cases * where adding of memory failed - especially on -ENOMEM. */ - if (rc2) + if (virtio_mem_mb_unplug_sb(vm, mb_id, 0, count)) new_state = VIRTIO_MEM_MB_STATE_PLUGGED; virtio_mem_mb_set_state(vm, mb_id, new_state); return rc; -- GitLab From 2a6285114bc543b70612e2bc0fcf13d2dd6ce5b9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:51 +0100 Subject: [PATCH 0824/1894] virtio-mem: use "unsigned long" for nr_pages when fake onlining/offlining No harm done, but let's be consistent. Reviewed-by: Pankaj Gupta Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-6-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 4f18d9855a0ed..94451b401fba6 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -717,7 +717,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, * (via generic_online_page()) using PageDirty(). */ static void virtio_mem_set_fake_offline(unsigned long pfn, - unsigned int nr_pages, bool onlined) + unsigned long nr_pages, bool onlined) { for (; nr_pages--; pfn++) { struct page *page = pfn_to_page(pfn); @@ -736,7 +736,7 @@ static void virtio_mem_set_fake_offline(unsigned long pfn, * (via generic_online_page()), clear PageDirty(). */ static void virtio_mem_clear_fake_offline(unsigned long pfn, - unsigned int nr_pages, bool onlined) + unsigned long nr_pages, bool onlined) { for (; nr_pages--; pfn++) { struct page *page = pfn_to_page(pfn); @@ -751,10 +751,10 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, * Release a range of fake-offline pages to the buddy, effectively * fake-onlining them. */ -static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) +static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) { const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES; - int i; + unsigned long i; /* * We are always called at least with MAX_ORDER_NR_PAGES -- GitLab From 41e6215c6d29a7bbcee599411cdf0911fde1f09b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:52 +0100 Subject: [PATCH 0825/1894] virtio-mem: factor out calculation of the bit number within the subblock bitmap The calculation is already complicated enough, let's limit it to one location. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-7-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 94451b401fba6..30b4d07f5263b 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -290,6 +290,16 @@ static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm) _mb_id--) \ if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) +/* + * Calculate the bit number in the subblock bitmap for the given subblock + * inside the given memory block. + */ +static int virtio_mem_sb_bitmap_bit_nr(struct virtio_mem *vm, + unsigned long mb_id, int sb_id) +{ + return (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; +} + /* * Mark all selected subblocks plugged. * @@ -299,7 +309,7 @@ static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); __bitmap_set(vm->sb_bitmap, bit, count); } @@ -313,7 +323,7 @@ static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); __bitmap_clear(vm->sb_bitmap, bit, count); } @@ -325,7 +335,7 @@ static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); if (count == 1) return test_bit(bit, vm->sb_bitmap); @@ -342,7 +352,7 @@ static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); /* TODO: Helper similar to bitmap_set() */ return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count; @@ -355,7 +365,7 @@ static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm, static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm, unsigned long mb_id) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, 0); return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) - bit; -- GitLab From 6beb3a9421fd81d36bd4d87a6b307fc744ea9dd2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:53 +0100 Subject: [PATCH 0826/1894] virtio-mem: print debug messages from virtio_mem_send_*_request() Let's move the existing dev_dbg() into the functions, print if something went wrong, and also print for virtio_mem_send_unplug_all_request(). Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-8-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 50 ++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 30b4d07f5263b..4742497feff0f 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -869,23 +869,33 @@ static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr, .u.plug.addr = cpu_to_virtio64(vm->vdev, addr), .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), }; + int rc = -ENOMEM; if (atomic_read(&vm->config_changed)) return -EAGAIN; + dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: vm->plugged_size += size; return 0; case VIRTIO_MEM_RESP_NACK: - return -EAGAIN; + rc = -EAGAIN; + break; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; case VIRTIO_MEM_RESP_ERROR: - return -EINVAL; + rc = -EINVAL; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc); + return rc; } static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, @@ -897,21 +907,30 @@ static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr), .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), }; + int rc = -ENOMEM; if (atomic_read(&vm->config_changed)) return -EAGAIN; + dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: vm->plugged_size -= size; return 0; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; case VIRTIO_MEM_RESP_ERROR: - return -EINVAL; + rc = -EINVAL; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc); + return rc; } static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) @@ -919,6 +938,9 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) const struct virtio_mem_req req = { .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL), }; + int rc = -ENOMEM; + + dev_dbg(&vm->vdev->dev, "unplugging all memory"); switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: @@ -928,10 +950,14 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) atomic_set(&vm->config_changed, 1); return 0; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc); + return rc; } /* @@ -946,9 +972,6 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, const uint64_t size = count * vm->subblock_size; int rc; - dev_dbg(&vm->vdev->dev, "plugging memory block: %lu : %i - %i\n", mb_id, - sb_id, sb_id + count - 1); - rc = virtio_mem_send_plug_request(vm, addr, size); if (!rc) virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count); @@ -967,9 +990,6 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, const uint64_t size = count * vm->subblock_size; int rc; - dev_dbg(&vm->vdev->dev, "unplugging memory block: %lu : %i - %i\n", - mb_id, sb_id, sb_id + count - 1); - rc = virtio_mem_send_unplug_request(vm, addr, size); if (!rc) virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count); -- GitLab From 89c486c47f2a450d7f064b4927b7f0ab911569a4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:54 +0100 Subject: [PATCH 0827/1894] virtio-mem: factor out fake-offlining into virtio_mem_fake_offline() ... which now matches virtio_mem_fake_online(). We'll reuse this functionality soon. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-9-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 4742497feff0f..fedfea27967e9 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -793,6 +793,27 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) } } +/* + * Try to allocate a range, marking pages fake-offline, effectively + * fake-offlining them. + */ +static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) +{ + int rc; + + rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, + GFP_KERNEL); + if (rc == -ENOMEM) + /* whoops, out of memory */ + return rc; + if (rc) + return -EBUSY; + + virtio_mem_set_fake_offline(pfn, nr_pages, true); + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + return 0; +} + static void virtio_mem_online_page_cb(struct page *page, unsigned int order) { const unsigned long addr = page_to_phys(page); @@ -1328,17 +1349,10 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); - rc = alloc_contig_range(start_pfn, start_pfn + nr_pages, - MIGRATE_MOVABLE, GFP_KERNEL); - if (rc == -ENOMEM) - /* whoops, out of memory */ - return rc; - if (rc) - return -EBUSY; - /* Mark it as fake-offline before unplugging it */ - virtio_mem_set_fake_offline(start_pfn, nr_pages, true); - adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); + rc = virtio_mem_fake_offline(start_pfn, nr_pages); + if (rc) + return rc; /* Try to unplug the allocated memory */ rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); -- GitLab From 7a34c77dab7e0c7ecb58da8bf600b7aadb4d878c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:55 +0100 Subject: [PATCH 0828/1894] virtio-mem: factor out handling of fake-offline pages in memory notifier Let's factor out the core pieces and place the implementation next to virtio_mem_fake_offline(). We'll reuse this functionality soon. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-10-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 73 +++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index fedfea27967e9..c24055248f9d9 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -160,6 +160,10 @@ static DEFINE_MUTEX(virtio_mem_mutex); static LIST_HEAD(virtio_mem_devices); static void virtio_mem_online_page_cb(struct page *page, unsigned int order); +static void virtio_mem_fake_offline_going_offline(unsigned long pfn, + unsigned long nr_pages); +static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, + unsigned long nr_pages); /* * Register a virtio-mem device so it will be considered for the online_page @@ -586,27 +590,15 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, unsigned long mb_id) { const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); - struct page *page; unsigned long pfn; - int sb_id, i; + int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; - /* - * Drop our reference to the pages so the memory can get - * offlined and add the unplugged pages to the managed - * page counters (so offlining code can correctly subtract - * them again). - */ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); - adjust_managed_page_count(pfn_to_page(pfn), nr_pages); - for (i = 0; i < nr_pages; i++) { - page = pfn_to_page(pfn + i); - if (WARN_ON(!page_ref_dec_and_test(page))) - dump_page(page, "unplugged page referenced"); - } + virtio_mem_fake_offline_going_offline(pfn, nr_pages); } } @@ -615,21 +607,14 @@ static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, { const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); unsigned long pfn; - int sb_id, i; + int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; - /* - * Get the reference we dropped when going offline and - * subtract the unplugged pages from the managed page - * counters. - */ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); - adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); - for (i = 0; i < nr_pages; i++) - page_ref_inc(pfn_to_page(pfn + i)); + virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); } } @@ -814,6 +799,48 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) return 0; } +/* + * Handle fake-offline pages when memory is going offline - such that the + * pages can be skipped by mm-core when offlining. + */ +static void virtio_mem_fake_offline_going_offline(unsigned long pfn, + unsigned long nr_pages) +{ + struct page *page; + unsigned long i; + + /* + * Drop our reference to the pages so the memory can get offlined + * and add the unplugged pages to the managed page counters (so + * offlining code can correctly subtract them again). + */ + adjust_managed_page_count(pfn_to_page(pfn), nr_pages); + /* Drop our reference to the pages so the memory can get offlined. */ + for (i = 0; i < nr_pages; i++) { + page = pfn_to_page(pfn + i); + if (WARN_ON(!page_ref_dec_and_test(page))) + dump_page(page, "fake-offline page referenced"); + } +} + +/* + * Handle fake-offline pages when memory offlining is canceled - to undo + * what we did in virtio_mem_fake_offline_going_offline(). + */ +static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, + unsigned long nr_pages) +{ + unsigned long i; + + /* + * Get the reference we dropped when going offline and subtract the + * unplugged pages from the managed page counters. + */ + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + for (i = 0; i < nr_pages; i++) + page_ref_inc(pfn_to_page(pfn + i)); +} + static void virtio_mem_online_page_cb(struct page *page, unsigned int order) { const unsigned long addr = page_to_phys(page); -- GitLab From f2d799d591359685a3a74d28c2989c56f4bb9898 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:56 +0100 Subject: [PATCH 0829/1894] virtio-mem: retry fake-offlining via alloc_contig_range() on ZONE_MOVABLE ZONE_MOVABLE is supposed to give some guarantees, yet, alloc_contig_range() isn't prepared to properly deal with some racy cases properly (e.g., temporary page pinning when exiting processed, PCP). Retry 5 times for now. There is certainly room for improvement in the future. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-11-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index c24055248f9d9..2f1ce4d4781b3 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -784,19 +784,34 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) */ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) { - int rc; + const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) == + ZONE_MOVABLE; + int rc, retry_count; - rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, - GFP_KERNEL); - if (rc == -ENOMEM) - /* whoops, out of memory */ - return rc; - if (rc) - return -EBUSY; + /* + * TODO: We want an alloc_contig_range() mode that tries to allocate + * harder (e.g., dealing with temporarily pinned pages, PCP), especially + * with ZONE_MOVABLE. So for now, retry a couple of times with + * ZONE_MOVABLE before giving up - because that zone is supposed to give + * some guarantees. + */ + for (retry_count = 0; retry_count < 5; retry_count++) { + rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, + GFP_KERNEL); + if (rc == -ENOMEM) + /* whoops, out of memory */ + return rc; + else if (rc && !is_movable) + break; + else if (rc) + continue; - virtio_mem_set_fake_offline(pfn, nr_pages, true); - adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); - return 0; + virtio_mem_set_fake_offline(pfn, nr_pages, true); + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + return 0; + } + + return -EBUSY; } /* -- GitLab From 989ff82527074b79bc89ba1c390be1eda01784a5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:57 +0100 Subject: [PATCH 0830/1894] virtio-mem: generalize check for added memory Let's check by traversing busy system RAM resources instead, to avoid relying on memory block states. Don't use walk_system_ram_range(), as that works on pages and we want to use the bare addresses we have easily at hand. This is a preparation for Big Block Mode (BBM), which won't have memory block states. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-12-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 2f1ce4d4781b3..3731097cd9e80 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1833,6 +1833,20 @@ static void virtio_mem_delete_resource(struct virtio_mem *vm) vm->parent_resource = NULL; } +static int virtio_mem_range_has_system_ram(struct resource *res, void *arg) +{ + return 1; +} + +static bool virtio_mem_has_memory_added(struct virtio_mem *vm) +{ + const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr, + vm->addr + vm->region_size, NULL, + virtio_mem_range_has_system_ram) == 1; +} + static int virtio_mem_probe(struct virtio_device *vdev) { struct virtio_mem *vm; @@ -1954,10 +1968,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) * the system. And there is no way to stop the driver/device from going * away. Warn at least. */ - if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) { + if (virtio_mem_has_memory_added(vm)) { dev_warn(&vdev->dev, "device still has system memory added\n"); } else { virtio_mem_delete_resource(vm); -- GitLab From 8464e3bdf208e86410e369601ca363b2a81683e3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:58 +0100 Subject: [PATCH 0831/1894] virtio-mem: generalize virtio_mem_owned_mb() Avoid using memory block ids. Rename it to virtio_mem_contains_range(). This is a preparation for Big Block Mode (BBM). Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-13-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 3731097cd9e80..2193c5172195d 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -512,12 +512,13 @@ static bool virtio_mem_overlaps_range(struct virtio_mem *vm, } /* - * Test if a virtio-mem device owns a memory block. Can be called from + * Test if a virtio-mem device contains a given range. Can be called from * (notifier) callbacks lockless. */ -static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id) +static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, + uint64_t size) { - return mb_id >= vm->first_mb_id && mb_id <= vm->last_mb_id; + return start >= vm->addr && start + size <= vm->addr + vm->region_size; } static int virtio_mem_notify_going_online(struct virtio_mem *vm, @@ -871,7 +872,7 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) */ rcu_read_lock(); list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { - if (!virtio_mem_owned_mb(vm, mb_id)) + if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) continue; sb_id = virtio_mem_phys_to_sb_id(vm, addr); -- GitLab From 835491c554fbdbc18452f4b1546df21879b8b26d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:59 +0100 Subject: [PATCH 0832/1894] virtio-mem: generalize virtio_mem_overlaps_range() Avoid using memory block ids. While at it, use uint64_t for address/size. This is a preparation for Big Block Mode (BBM). Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-14-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 2193c5172195d..bd76aa79a82e3 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -501,14 +501,10 @@ static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id) * Test if a virtio-mem device overlaps with the given range. Can be called * from (notifier) callbacks lockless. */ -static bool virtio_mem_overlaps_range(struct virtio_mem *vm, - unsigned long start, unsigned long size) +static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start, + uint64_t size) { - unsigned long dev_start = virtio_mem_mb_id_to_phys(vm->first_mb_id); - unsigned long dev_end = virtio_mem_mb_id_to_phys(vm->last_mb_id) + - memory_block_size_bytes(); - - return start < dev_end && dev_start < start + size; + return start < vm->addr + vm->region_size && vm->addr < start + size; } /* -- GitLab From 420066829bb614826115892e81f85b8c4341ee95 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:00 +0100 Subject: [PATCH 0833/1894] virtio-mem: drop last_mb_id No longer used, let's drop it. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-15-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index bd76aa79a82e3..a7beac5942e06 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -84,8 +84,6 @@ struct virtio_mem { /* Id of the first memory block of this device. */ unsigned long first_mb_id; - /* Id of the last memory block of this device. */ - unsigned long last_mb_id; /* Id of the last usable memory block of this device. */ unsigned long last_usable_mb_id; /* Id of the next memory bock to prepare when needed. */ @@ -1773,8 +1771,6 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + memory_block_size_bytes()); vm->next_mb_id = vm->first_mb_id; - vm->last_mb_id = virtio_mem_phys_to_mb_id(vm->addr + - vm->region_size) - 1; dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); -- GitLab From 1d33c2caa8cbdc0f093a8cdad5a4c153ef9cbe8f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:01 +0100 Subject: [PATCH 0834/1894] virtio-mem: don't always trigger the workqueue when offlining memory Let's trigger from offlining code only when we're not allowed to unplug online memory. Handle the other case (memmap possibly freeing up another memory block) when actually removing memory. We now also properly handle the case when removing already offline memory blocks via virtio_mem_mb_remove(). When removing via virtio_mem_remove(), when unloading the driver, virtio_mem_retry() is a NOP and safe to use. While at it, move retry handling when offlining out of virtio_mem_notify_offline(), to share it with Big Block Mode (BBM) soon. This is a preparation for Big Block Mode (BBM), whereby we can see some temporary offlining of memory blocks without actually making progress. Imagine you have a Big Block that spans to Linux memory blocks. Assume the first Linux memory blocks has no unmovable data on it. When we would call offline_and_remove_memory() on the big block, we would 1. Try to offline the first block. Works, notifiers triggered. virtio_mem_retry() called. 2. Try to offline the second block. Does not work. 3. Re-online first block. 4. Exit to main loop, exit workqueue. 5. Retry immediately (due to virtio_mem_retry()), go to 1. The result are endless retries. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-16-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 40 ++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a7beac5942e06..f86654af8b6bd 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -162,6 +162,7 @@ static void virtio_mem_fake_offline_going_offline(unsigned long pfn, unsigned long nr_pages); static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, unsigned long nr_pages); +static void virtio_mem_retry(struct virtio_mem *vm); /* * Register a virtio-mem device so it will be considered for the online_page @@ -447,9 +448,17 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + int rc; dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); - return remove_memory(vm->nid, addr, memory_block_size_bytes()); + rc = remove_memory(vm->nid, addr, memory_block_size_bytes()); + if (!rc) + /* + * We might have freed up memory we can now unplug, retry + * immediately instead of waiting. + */ + virtio_mem_retry(vm); + return rc; } /* @@ -464,11 +473,19 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + int rc; dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", mb_id); - return offline_and_remove_memory(vm->nid, addr, - memory_block_size_bytes()); + rc = offline_and_remove_memory(vm->nid, addr, + memory_block_size_bytes()); + if (!rc) + /* + * We might have freed up memory we can now unplug, retry + * immediately instead of waiting. + */ + virtio_mem_retry(vm); + return rc; } /* @@ -546,15 +563,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, BUG(); break; } - - /* - * Trigger the workqueue, maybe we can now unplug memory. Also, - * when we offline and remove a memory block, this will re-trigger - * us immediately - which is often nice because the removal of - * the memory block (e.g., memmap) might have freed up memory - * on other memory blocks we manage. - */ - virtio_mem_retry(vm); } static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) @@ -672,6 +680,14 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; case MEM_OFFLINE: virtio_mem_notify_offline(vm, mb_id); + + /* + * Trigger the workqueue. Now that we have some offline memory, + * maybe we can handle pending unplug requests. + */ + if (!unplug_online) + virtio_mem_retry(vm); + vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; -- GitLab From 98ff9f9411860073f952f1e62a05afb9f6a9e77e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:02 +0100 Subject: [PATCH 0835/1894] virtio-mem: generalize handling when memory is getting onlined deferred We don't want to add too much memory when it's not getting onlined immediately, to avoid running OOM. Generalize the handling, to avoid making use of memory block states. Use a threshold of 1 GiB for now. Properly adjust the offline size when adding/removing memory. As we are not always protected by a lock when touching the offline size, use an atomic64_t. We don't care about races (e.g., someone offlining memory while we are adding more), only about consistent values. (1 GiB needs a memmap of ~16MiB - which sounds reasonable even for setups with little boot memory and (possibly) one virtio-mem device per node) We don't want to retrigger when onlining is caused immediately by our action (e.g., adding memory which immediately gets onlined), so use a flag to indicate if the workqueue is active and use that as an indicator whether to trigger a retry. This will also be especially relevant for Big Block Mode (BBM), whereby we might re-online memory in case offlining of another memory block failed. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-17-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 95 ++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f86654af8b6bd..cbd0aa5eb95c9 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -51,6 +51,7 @@ struct virtio_mem { /* Workqueue that processes the plug/unplug requests. */ struct work_struct wq; + atomic_t wq_active; atomic_t config_changed; /* Virtqueue for guest->host requests. */ @@ -99,7 +100,15 @@ struct virtio_mem { /* Summary of all memory block states. */ unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT]; -#define VIRTIO_MEM_NB_OFFLINE_THRESHOLD 10 + + /* + * We don't want to add too much memory if it's not getting onlined, + * to avoid running OOM. Besides this threshold, we allow to have at + * least two offline blocks at a time (whatever is bigger). + */ +#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024) + atomic64_t offline_size; + uint64_t offline_threshold; /* * One byte state per memory block. @@ -405,6 +414,18 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) return 0; } +/* + * Test if we could add memory without creating too much offline memory - + * to avoid running OOM if memory is getting onlined deferred. + */ +static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) +{ + if (WARN_ON_ONCE(size > vm->offline_threshold)) + return false; + + return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold; +} + /* * Try to add a memory block to Linux. This will usually only fail * if out of memory. @@ -417,6 +438,8 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + int rc; /* * When force-unloading the driver and we still have memory added to @@ -430,10 +453,13 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) } dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); - return add_memory_driver_managed(vm->nid, addr, - memory_block_size_bytes(), - vm->resource_name, - MEMHP_MERGE_RESOURCE); + /* Memory might get onlined immediately. */ + atomic64_add(size, &vm->offline_size); + rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name, + MEMHP_MERGE_RESOURCE); + if (rc) + atomic64_sub(size, &vm->offline_size); + return rc; } /* @@ -448,16 +474,19 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); int rc; dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); - rc = remove_memory(vm->nid, addr, memory_block_size_bytes()); - if (!rc) + rc = remove_memory(vm->nid, addr, size); + if (!rc) { + atomic64_sub(size, &vm->offline_size); /* * We might have freed up memory we can now unplug, retry * immediately instead of waiting. */ virtio_mem_retry(vm); + } return rc; } @@ -473,18 +502,20 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); int rc; dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", mb_id); - rc = offline_and_remove_memory(vm->nid, addr, - memory_block_size_bytes()); - if (!rc) + rc = offline_and_remove_memory(vm->nid, addr, size); + if (!rc) { + atomic64_sub(size, &vm->offline_size); /* * We might have freed up memory we can now unplug, retry * immediately instead of waiting. */ virtio_mem_retry(vm); + } return rc; } @@ -567,8 +598,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) { - unsigned long nb_offline; - switch (virtio_mem_mb_get_state(vm, mb_id)) { case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: virtio_mem_mb_set_state(vm, mb_id, @@ -581,12 +610,6 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) BUG(); break; } - nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] + - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL]; - - /* see if we can add new blocks now that we onlined one block */ - if (nb_offline == VIRTIO_MEM_NB_OFFLINE_THRESHOLD - 1) - virtio_mem_retry(vm); } static void virtio_mem_notify_going_offline(struct virtio_mem *vm, @@ -681,6 +704,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, case MEM_OFFLINE: virtio_mem_notify_offline(vm, mb_id); + atomic64_add(size, &vm->offline_size); /* * Trigger the workqueue. Now that we have some offline memory, * maybe we can handle pending unplug requests. @@ -693,6 +717,18 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; case MEM_ONLINE: virtio_mem_notify_online(vm, mb_id); + + atomic64_sub(size, &vm->offline_size); + /* + * Start adding more memory once we onlined half of our + * threshold. Don't trigger if it's possibly due to our actipn + * (e.g., us adding memory which gets onlined immediately from + * the core). + */ + if (!atomic_read(&vm->wq_active) && + virtio_mem_could_add_memory(vm, vm->offline_threshold / 2)) + virtio_mem_retry(vm); + vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; @@ -1151,18 +1187,6 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return 0; } -/* - * Don't add too many blocks that are not onlined yet to avoid running OOM. - */ -static bool virtio_mem_too_many_mb_offline(struct virtio_mem *vm) -{ - unsigned long nb_offline; - - nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] + - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL]; - return nb_offline >= VIRTIO_MEM_NB_OFFLINE_THRESHOLD; -} - /* * Try to plug the desired number of subblocks and add the memory block * to Linux. @@ -1316,7 +1340,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to plug and add unused blocks */ virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) { - if (virtio_mem_too_many_mb_offline(vm)) + if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); @@ -1327,7 +1351,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to prepare, plug and add new blocks */ while (nb_sb) { - if (virtio_mem_too_many_mb_offline(vm)) + if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; rc = virtio_mem_prepare_next_mb(vm, &mb_id); @@ -1620,6 +1644,7 @@ static void virtio_mem_run_wq(struct work_struct *work) if (vm->broken) return; + atomic_set(&vm->wq_active, 1); retry: rc = 0; @@ -1680,6 +1705,8 @@ retry: "unknown error, marking device broken: %d\n", rc); vm->broken = true; } + + atomic_set(&vm->wq_active, 0); } static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer) @@ -1788,6 +1815,10 @@ static int virtio_mem_init(struct virtio_mem *vm) memory_block_size_bytes()); vm->next_mb_id = vm->first_mb_id; + /* Prepare the offline threshold - make sure we can add two blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), + VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); + dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); dev_info(&vm->vdev->dev, "device block size: 0x%llx", -- GitLab From d5614944254cf288b8fd46fda8c86d916346131d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:03 +0100 Subject: [PATCH 0836/1894] virito-mem: document Sub Block Mode (SBM) Let's add some documentation for the current mode - Sub Block Mode (SBM) - to prepare for a new mode - Big Block Mode (BBM). Follow-up patches will properly factor out the existing Sub Block Mode (SBM) and implement Big Block Mode (BBM). Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-18-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index cbd0aa5eb95c9..4234bfc0cf529 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -27,6 +27,21 @@ static bool unplug_online = true; module_param(unplug_online, bool, 0644); MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); +/* + * virtio-mem currently supports the following modes of operation: + * + * * Sub Block Mode (SBM): A Linux memory block spans 1..X subblocks (SB). The + * size of a Sub Block (SB) is determined based on the device block size, the + * pageblock size, and the maximum allocation granularity of the buddy. + * Subblocks within a Linux memory block might either be plugged or unplugged. + * Memory is added/removed to Linux MM in Linux memory block granularity. + * + * User space / core MM (auto onlining) is responsible for onlining added + * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are + * always onlined separately, and all memory within a Linux memory block is + * onlined to the same zone - virtio-mem relies on this behavior. + */ + enum virtio_mem_mb_state { /* Unplugged, not added to Linux. Can be reused later. */ VIRTIO_MEM_MB_STATE_UNUSED = 0, -- GitLab From 99f0b55ea6c3a2ed29776ca0dd549d523ae8d6d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:04 +0100 Subject: [PATCH 0837/1894] virtio-mem: memory block states are specific to Sub Block Mode (SBM) let's use a new "sbm" sub-struct to hold SBM-specific state and rename + move applicable definitions, functions, and variables (related to memory block states). While at it: - Drop the "_STATE" part from memory block states - Rename "nb_mb_state" to "mb_count" - "set_mb_state" / "get_mb_state" vs. "mb_set_state" / "mb_get_state" - Don't use lengthy "enum virtio_mem_smb_mb_state", simply use "uint8_t" Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-19-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 215 ++++++++++++++++++------------------ 1 file changed, 109 insertions(+), 106 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 4234bfc0cf529..c6cc301c78e1f 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -42,20 +42,23 @@ MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); * onlined to the same zone - virtio-mem relies on this behavior. */ -enum virtio_mem_mb_state { +/* + * State of a Linux memory block in SBM. + */ +enum virtio_mem_sbm_mb_state { /* Unplugged, not added to Linux. Can be reused later. */ - VIRTIO_MEM_MB_STATE_UNUSED = 0, + VIRTIO_MEM_SBM_MB_UNUSED = 0, /* (Partially) plugged, not added to Linux. Error on add_memory(). */ - VIRTIO_MEM_MB_STATE_PLUGGED, + VIRTIO_MEM_SBM_MB_PLUGGED, /* Fully plugged, fully added to Linux, offline. */ - VIRTIO_MEM_MB_STATE_OFFLINE, + VIRTIO_MEM_SBM_MB_OFFLINE, /* Partially plugged, fully added to Linux, offline. */ - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, /* Fully plugged, fully added to Linux, online. */ - VIRTIO_MEM_MB_STATE_ONLINE, + VIRTIO_MEM_SBM_MB_ONLINE, /* Partially plugged, fully added to Linux, online. */ - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL, - VIRTIO_MEM_MB_STATE_COUNT + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL, + VIRTIO_MEM_SBM_MB_COUNT }; struct virtio_mem { @@ -113,9 +116,6 @@ struct virtio_mem { */ const char *resource_name; - /* Summary of all memory block states. */ - unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT]; - /* * We don't want to add too much memory if it's not getting onlined, * to avoid running OOM. Besides this threshold, we allow to have at @@ -125,27 +125,29 @@ struct virtio_mem { atomic64_t offline_size; uint64_t offline_threshold; - /* - * One byte state per memory block. - * - * Allocated via vmalloc(). When preparing new blocks, resized - * (alloc+copy+free) when needed (crossing pages with the next mb). - * (when crossing pages). - * - * With 128MB memory blocks, we have states for 512GB of memory in one - * page. - */ - uint8_t *mb_state; + struct { + /* Summary of all memory block states. */ + unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; + + /* + * One byte state per memory block. Allocated via vmalloc(). + * Resized (alloc+copy+free) on demand. + * + * With 128 MiB memory blocks, we have states for 512 GiB of + * memory in one 4 KiB page. + */ + uint8_t *mb_states; + } sbm; /* - * $nb_sb_per_mb bit per memory block. Handled similar to mb_state. + * $nb_sb_per_mb bit per memory block. Handled similar to sbm.mb_states. * * With 4MB subblocks, we manage 128GB of memory in one page. */ unsigned long *sb_bitmap; /* - * Mutex that protects the nb_mb_state, mb_state, and sb_bitmap. + * Mutex that protects the sbm.mb_count, sbm.mb_states, and sb_bitmap. * * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get @@ -254,68 +256,68 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, /* * Set the state of a memory block, taking care of the state counter. */ -static void virtio_mem_mb_set_state(struct virtio_mem *vm, unsigned long mb_id, - enum virtio_mem_mb_state state) +static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, + unsigned long mb_id, uint8_t state) { const unsigned long idx = mb_id - vm->first_mb_id; - enum virtio_mem_mb_state old_state; + uint8_t old_state; - old_state = vm->mb_state[idx]; - vm->mb_state[idx] = state; + old_state = vm->sbm.mb_states[idx]; + vm->sbm.mb_states[idx] = state; - BUG_ON(vm->nb_mb_state[old_state] == 0); - vm->nb_mb_state[old_state]--; - vm->nb_mb_state[state]++; + BUG_ON(vm->sbm.mb_count[old_state] == 0); + vm->sbm.mb_count[old_state]--; + vm->sbm.mb_count[state]++; } /* * Get the state of a memory block. */ -static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm, - unsigned long mb_id) +static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, + unsigned long mb_id) { const unsigned long idx = mb_id - vm->first_mb_id; - return vm->mb_state[idx]; + return vm->sbm.mb_states[idx]; } /* * Prepare the state array for the next memory block. */ -static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm) +static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) { int old_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id); int new_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id + 1); - uint8_t *new_mb_state; + uint8_t *new_array; - if (vm->mb_state && old_pages == new_pages) + if (vm->sbm.mb_states && old_pages == new_pages) return 0; - new_mb_state = vzalloc(new_pages * PAGE_SIZE); - if (!new_mb_state) + new_array = vzalloc(new_pages * PAGE_SIZE); + if (!new_array) return -ENOMEM; mutex_lock(&vm->hotplug_mutex); - if (vm->mb_state) - memcpy(new_mb_state, vm->mb_state, old_pages * PAGE_SIZE); - vfree(vm->mb_state); - vm->mb_state = new_mb_state; + if (vm->sbm.mb_states) + memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE); + vfree(vm->sbm.mb_states); + vm->sbm.mb_states = new_array; mutex_unlock(&vm->hotplug_mutex); return 0; } -#define virtio_mem_for_each_mb_state(_vm, _mb_id, _state) \ +#define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ for (_mb_id = _vm->first_mb_id; \ - _mb_id < _vm->next_mb_id && _vm->nb_mb_state[_state]; \ + _mb_id < _vm->next_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id++) \ - if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) + if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) -#define virtio_mem_for_each_mb_state_rev(_vm, _mb_id, _state) \ +#define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ for (_mb_id = _vm->next_mb_id - 1; \ - _mb_id >= _vm->first_mb_id && _vm->nb_mb_state[_state]; \ + _mb_id >= _vm->first_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id--) \ - if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) + if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) /* * Calculate the bit number in the subblock bitmap for the given subblock @@ -581,9 +583,9 @@ static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, static int virtio_mem_notify_going_online(struct virtio_mem *vm, unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: - case VIRTIO_MEM_MB_STATE_OFFLINE: + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: + case VIRTIO_MEM_SBM_MB_OFFLINE: return NOTIFY_OK; default: break; @@ -596,14 +598,14 @@ static int virtio_mem_notify_going_online(struct virtio_mem *vm, static void virtio_mem_notify_offline(struct virtio_mem *vm, unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); break; - case VIRTIO_MEM_MB_STATE_ONLINE: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + case VIRTIO_MEM_SBM_MB_ONLINE: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); break; default: BUG(); @@ -613,13 +615,14 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); break; - case VIRTIO_MEM_MB_STATE_OFFLINE: - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE); + case VIRTIO_MEM_SBM_MB_OFFLINE: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE); break; default: BUG(); @@ -1188,7 +1191,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return -ENOSPC; /* Resize the state array if required. */ - rc = virtio_mem_mb_state_prepare_next_mb(vm); + rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm); if (rc) return rc; @@ -1197,7 +1200,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, if (rc) return rc; - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_UNUSED]++; + vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; *mb_id = vm->next_mb_id++; return 0; } @@ -1231,16 +1234,16 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * so the memory notifiers will find the block in the right state. */ if (count == vm->nb_sb_per_mb) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); else - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); /* Add the memory block to linux - if that fails, try to unplug. */ rc = virtio_mem_mb_add(vm, mb_id); if (rc) { - enum virtio_mem_mb_state new_state = VIRTIO_MEM_MB_STATE_UNUSED; + int new_state = VIRTIO_MEM_SBM_MB_UNUSED; dev_err(&vm->vdev->dev, "adding memory block %lu failed with %d\n", mb_id, rc); @@ -1250,8 +1253,8 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * where adding of memory failed - especially on -ENOMEM. */ if (virtio_mem_mb_unplug_sb(vm, mb_id, 0, count)) - new_state = VIRTIO_MEM_MB_STATE_PLUGGED; - virtio_mem_mb_set_state(vm, mb_id, new_state); + new_state = VIRTIO_MEM_SBM_MB_PLUGGED; + virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); return rc; } @@ -1304,11 +1307,11 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { if (online) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE); else - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); } return 0; @@ -1330,8 +1333,8 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) mutex_lock(&vm->hotplug_mutex); /* Try to plug subblocks of partially plugged online blocks. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true); if (rc || !nb_sb) goto out_unlock; @@ -1339,8 +1342,8 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to plug subblocks of partially plugged offline blocks. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false); if (rc || !nb_sb) goto out_unlock; @@ -1354,7 +1357,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) mutex_unlock(&vm->hotplug_mutex); /* Try to plug and add unused blocks */ - virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) { + virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) { if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; @@ -1403,8 +1406,8 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, /* some subblocks might have been unplugged even on failure */ if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); if (rc) return rc; @@ -1415,8 +1418,8 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, * unplugged. Temporarily drop the mutex, so * any pending GOING_ONLINE requests can be serviced/rejected. */ - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_mb_remove(vm, mb_id); @@ -1454,8 +1457,8 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, return rc; } - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); return 0; } @@ -1515,8 +1518,8 @@ unplugged: rc = virtio_mem_mb_offline_and_remove(vm, mb_id); mutex_lock(&vm->hotplug_mutex); if (!rc) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); } return 0; @@ -1542,8 +1545,8 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) mutex_lock(&vm->hotplug_mutex); /* Try to unplug subblocks of partially plugged offline blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1552,8 +1555,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of plugged offline blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) { rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1567,8 +1569,8 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of partially plugged online blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1579,8 +1581,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of plugged online blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) { rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1606,11 +1607,12 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) unsigned long mb_id; int rc; - virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_PLUGGED) { + virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_PLUGGED) { rc = virtio_mem_mb_unplug(vm, mb_id); if (rc) return rc; - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); } return 0; @@ -2002,11 +2004,12 @@ static void virtio_mem_remove(struct virtio_device *vdev) * After we unregistered our callbacks, user space can online partially * plugged offline blocks. Make sure to remove them. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { rc = virtio_mem_mb_remove(vm, mb_id); BUG_ON(rc); - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); } /* * After we unregistered our callbacks, user space can no longer @@ -2031,7 +2034,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) } /* remove all tracking data - no locking needed */ - vfree(vm->mb_state); + vfree(vm->sbm.mb_states); vfree(vm->sb_bitmap); /* reset the device and cleanup the queues */ -- GitLab From 54c6a6ba75ba4c428b659b167f87c07100ba260e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:05 +0100 Subject: [PATCH 0838/1894] virito-mem: subblock states are specific to Sub Block Mode (SBM) Let's rename and move accordingly. While at it, rename sb_bitmap to "sb_states". Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-20-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 132 +++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index c6cc301c78e1f..851cddf5c6064 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -137,17 +137,23 @@ struct virtio_mem { * memory in one 4 KiB page. */ uint8_t *mb_states; - } sbm; - /* - * $nb_sb_per_mb bit per memory block. Handled similar to sbm.mb_states. - * - * With 4MB subblocks, we manage 128GB of memory in one page. - */ - unsigned long *sb_bitmap; + /* + * Bitmap: one bit per subblock. Allocated similar to + * sbm.mb_states. + * + * A set bit means the corresponding subblock is plugged, + * otherwise it's unblocked. + * + * With 4 MiB subblocks, we manage 128 GiB of memory in one + * 4 KiB page. + */ + unsigned long *sb_states; + } sbm; /* - * Mutex that protects the sbm.mb_count, sbm.mb_states, and sb_bitmap. + * Mutex that protects the sbm.mb_count, sbm.mb_states, and + * sbm.sb_states. * * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get @@ -323,8 +329,8 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) * Calculate the bit number in the subblock bitmap for the given subblock * inside the given memory block. */ -static int virtio_mem_sb_bitmap_bit_nr(struct virtio_mem *vm, - unsigned long mb_id, int sb_id) +static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, + unsigned long mb_id, int sb_id) { return (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; } @@ -334,13 +340,13 @@ static int virtio_mem_sb_bitmap_bit_nr(struct virtio_mem *vm, * * Will not modify the state of the memory block. */ -static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); - __bitmap_set(vm->sb_bitmap, bit, count); + __bitmap_set(vm->sbm.sb_states, bit, count); } /* @@ -348,86 +354,87 @@ static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, * * Will not modify the state of the memory block. */ -static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); - __bitmap_clear(vm->sb_bitmap, bit, count); + __bitmap_clear(vm->sbm.sb_states, bit, count); } /* * Test if all selected subblocks are plugged. */ -static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); if (count == 1) - return test_bit(bit, vm->sb_bitmap); + return test_bit(bit, vm->sbm.sb_states); /* TODO: Helper similar to bitmap_set() */ - return find_next_zero_bit(vm->sb_bitmap, bit + count, bit) >= + return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >= bit + count; } /* * Test if all selected subblocks are unplugged. */ -static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); /* TODO: Helper similar to bitmap_set() */ - return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count; + return find_next_bit(vm->sbm.sb_states, bit + count, bit) >= + bit + count; } /* * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is * none. */ -static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm, +static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, unsigned long mb_id) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, 0); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); - return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) - - bit; + return find_next_zero_bit(vm->sbm.sb_states, + bit + vm->nb_sb_per_mb, bit) - bit; } /* * Prepare the subblock bitmap for the next memory block. */ -static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) +static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) { const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id; const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb; const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb; int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); - unsigned long *new_sb_bitmap, *old_sb_bitmap; + unsigned long *new_bitmap, *old_bitmap; - if (vm->sb_bitmap && old_pages == new_pages) + if (vm->sbm.sb_states && old_pages == new_pages) return 0; - new_sb_bitmap = vzalloc(new_pages * PAGE_SIZE); - if (!new_sb_bitmap) + new_bitmap = vzalloc(new_pages * PAGE_SIZE); + if (!new_bitmap) return -ENOMEM; mutex_lock(&vm->hotplug_mutex); - if (new_sb_bitmap) - memcpy(new_sb_bitmap, vm->sb_bitmap, old_pages * PAGE_SIZE); + if (new_bitmap) + memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE); - old_sb_bitmap = vm->sb_bitmap; - vm->sb_bitmap = new_sb_bitmap; + old_bitmap = vm->sbm.sb_states; + vm->sbm.sb_states = new_bitmap; mutex_unlock(&vm->hotplug_mutex); - vfree(old_sb_bitmap); + vfree(old_bitmap); return 0; } @@ -638,7 +645,7 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); @@ -654,7 +661,7 @@ static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); @@ -944,7 +951,7 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) * If plugged, online the pages, otherwise, set them fake * offline (PageOffline). */ - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) generic_online_page(page, order); else virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, @@ -1102,7 +1109,7 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, rc = virtio_mem_send_plug_request(vm, addr, size); if (!rc) - virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count); + virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count); return rc; } @@ -1120,7 +1127,7 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, rc = virtio_mem_send_unplug_request(vm, addr, size); if (!rc) - virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count); + virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count); return rc; } @@ -1143,14 +1150,14 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, while (*nb_sb) { /* Find the next candidate subblock */ while (sb_id >= 0 && - virtio_mem_mb_test_sb_unplugged(vm, mb_id, sb_id, 1)) + virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1)) sb_id--; if (sb_id < 0) break; /* Try to unplug multiple subblocks at a time */ count = 1; while (count < *nb_sb && sb_id > 0 && - virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { + virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { count++; sb_id--; } @@ -1196,7 +1203,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return rc; /* Resize the subblock bitmap if required. */ - rc = virtio_mem_sb_bitmap_prepare_next_mb(vm); + rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm); if (rc) return rc; @@ -1281,14 +1288,13 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, return -EINVAL; while (*nb_sb) { - sb_id = virtio_mem_mb_first_unplugged_sb(vm, mb_id); + sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); if (sb_id >= vm->nb_sb_per_mb) break; count = 1; while (count < *nb_sb && sb_id + count < vm->nb_sb_per_mb && - !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id + count, - 1)) + !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) count++; rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count); @@ -1305,7 +1311,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, virtio_mem_fake_online(pfn, nr_pages); } - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { if (online) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE); @@ -1405,13 +1411,13 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ - if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) + if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); if (rc) return rc; - if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { /* * Remove the block from Linux - this should never fail. * Hinder the block from getting onlined by marking it @@ -1480,7 +1486,7 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, /* If possible, try to unplug the complete block in one shot. */ if (*nb_sb >= vm->nb_sb_per_mb && - virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0, vm->nb_sb_per_mb); if (!rc) { @@ -1494,7 +1500,7 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { /* Find the next candidate subblock */ while (sb_id >= 0 && - !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) sb_id--; if (sb_id < 0) break; @@ -1513,7 +1519,7 @@ unplugged: * remove it. This will usually not fail, as no memory is in use * anymore - however some other notifiers might NACK the request. */ - if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_mb_offline_and_remove(vm, mb_id); mutex_lock(&vm->hotplug_mutex); @@ -2035,7 +2041,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) /* remove all tracking data - no locking needed */ vfree(vm->sbm.mb_states); - vfree(vm->sb_bitmap); + vfree(vm->sbm.sb_states); /* reset the device and cleanup the queues */ vdev->config->reset(vdev); -- GitLab From 905c4c5146dcb1b1e0a534ae9b5da6c5e4f29c21 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:06 +0100 Subject: [PATCH 0839/1894] virtio-mem: nb_sb_per_mb and subblock_size are specific to Sub Block Mode (SBM) Let's rename to "sbs_per_mb" and "sb_size" and move accordingly. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-21-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 96 ++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 851cddf5c6064..6395c30902522 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -96,11 +96,6 @@ struct virtio_mem { /* Maximum region size in bytes. */ uint64_t region_size; - /* The subblock size. */ - uint64_t subblock_size; - /* The number of subblocks per memory block. */ - uint32_t nb_sb_per_mb; - /* Id of the first memory block of this device. */ unsigned long first_mb_id; /* Id of the last usable memory block of this device. */ @@ -126,6 +121,11 @@ struct virtio_mem { uint64_t offline_threshold; struct { + /* The subblock size. */ + uint64_t sb_size; + /* The number of subblocks per Linux memory block. */ + uint32_t sbs_per_mb; + /* Summary of all memory block states. */ unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; @@ -256,7 +256,7 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id); - return (addr - mb_addr) / vm->subblock_size; + return (addr - mb_addr) / vm->sbm.sb_size; } /* @@ -332,7 +332,7 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, unsigned long mb_id, int sb_id) { - return (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + return (mb_id - vm->first_mb_id) * vm->sbm.sbs_per_mb + sb_id; } /* @@ -395,7 +395,7 @@ static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, } /* - * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is + * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is * none. */ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, @@ -404,7 +404,7 @@ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); return find_next_zero_bit(vm->sbm.sb_states, - bit + vm->nb_sb_per_mb, bit) - bit; + bit + vm->sbm.sbs_per_mb, bit) - bit; } /* @@ -413,8 +413,8 @@ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) { const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id; - const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb; - const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb; + const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; + const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); unsigned long *new_bitmap, *old_bitmap; @@ -640,15 +640,15 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) static void virtio_mem_notify_going_offline(struct virtio_mem *vm, unsigned long mb_id) { - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; int sb_id; - for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { + for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); + sb_id * vm->sbm.sb_size); virtio_mem_fake_offline_going_offline(pfn, nr_pages); } } @@ -656,15 +656,15 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, unsigned long mb_id) { - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; int sb_id; - for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { + for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); + sb_id * vm->sbm.sb_size); virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); } } @@ -1103,8 +1103,8 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size; - const uint64_t size = count * vm->subblock_size; + sb_id * vm->sbm.sb_size; + const uint64_t size = count * vm->sbm.sb_size; int rc; rc = virtio_mem_send_plug_request(vm, addr, size); @@ -1121,8 +1121,8 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size; - const uint64_t size = count * vm->subblock_size; + sb_id * vm->sbm.sb_size; + const uint64_t size = count * vm->sbm.sb_size; int rc; rc = virtio_mem_send_unplug_request(vm, addr, size); @@ -1146,7 +1146,7 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, int sb_id, count; int rc; - sb_id = vm->nb_sb_per_mb - 1; + sb_id = vm->sbm.sbs_per_mb - 1; while (*nb_sb) { /* Find the next candidate subblock */ while (sb_id >= 0 && @@ -1181,7 +1181,7 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, */ static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id) { - uint64_t nb_sb = vm->nb_sb_per_mb; + uint64_t nb_sb = vm->sbm.sbs_per_mb; return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb); } @@ -1222,7 +1222,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, unsigned long mb_id, uint64_t *nb_sb) { - const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb); + const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); int rc; if (WARN_ON_ONCE(!count)) @@ -1240,7 +1240,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * Mark the block properly offline before adding it to Linux, * so the memory notifiers will find the block in the right state. */ - if (count == vm->nb_sb_per_mb) + if (count == vm->sbm.sbs_per_mb) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE); else @@ -1289,11 +1289,11 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, while (*nb_sb) { sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); - if (sb_id >= vm->nb_sb_per_mb) + if (sb_id >= vm->sbm.sbs_per_mb) break; count = 1; while (count < *nb_sb && - sb_id + count < vm->nb_sb_per_mb && + sb_id + count < vm->sbm.sbs_per_mb && !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) count++; @@ -1306,12 +1306,12 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, /* fake-online the pages if the memory block is online */ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); - nr_pages = PFN_DOWN(count * vm->subblock_size); + sb_id * vm->sbm.sb_size); + nr_pages = PFN_DOWN(count * vm->sbm.sb_size); virtio_mem_fake_online(pfn, nr_pages); } - if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { if (online) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE); @@ -1328,7 +1328,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, */ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) { - uint64_t nb_sb = diff / vm->subblock_size; + uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; int rc; @@ -1411,13 +1411,13 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ - if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) + if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); if (rc) return rc; - if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { /* * Remove the block from Linux - this should never fail. * Hinder the block from getting onlined by marking it @@ -1444,12 +1444,12 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size) * count; + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; unsigned long start_pfn; int rc; start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); + sb_id * vm->sbm.sb_size); rc = virtio_mem_fake_offline(start_pfn, nr_pages); if (rc) @@ -1485,19 +1485,19 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, int rc, sb_id; /* If possible, try to unplug the complete block in one shot. */ - if (*nb_sb >= vm->nb_sb_per_mb && - virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (*nb_sb >= vm->sbm.sbs_per_mb && + virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0, - vm->nb_sb_per_mb); + vm->sbm.sbs_per_mb); if (!rc) { - *nb_sb -= vm->nb_sb_per_mb; + *nb_sb -= vm->sbm.sbs_per_mb; goto unplugged; } else if (rc != -EBUSY) return rc; } /* Fallback to single subblocks. */ - for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { + for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { /* Find the next candidate subblock */ while (sb_id >= 0 && !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) @@ -1519,7 +1519,7 @@ unplugged: * remove it. This will usually not fail, as no memory is in use * anymore - however some other notifiers might NACK the request. */ - if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_mb_offline_and_remove(vm, mb_id); mutex_lock(&vm->hotplug_mutex); @@ -1536,7 +1536,7 @@ unplugged: */ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) { - uint64_t nb_sb = diff / vm->subblock_size; + uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; int rc; @@ -1827,11 +1827,11 @@ static int virtio_mem_init(struct virtio_mem *vm) * - Is required for now for alloc_contig_range() to work reliably - * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ - vm->subblock_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, - pageblock_nr_pages) * PAGE_SIZE; - vm->subblock_size = max_t(uint64_t, vm->device_block_size, - vm->subblock_size); - vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size; + vm->sbm.sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) * PAGE_SIZE; + vm->sbm.sb_size = max_t(uint64_t, vm->device_block_size, + vm->sbm.sb_size); + vm->sbm.sbs_per_mb = memory_block_size_bytes() / vm->sbm.sb_size; /* Round up to the next full memory block */ vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + @@ -1849,7 +1849,7 @@ static int virtio_mem_init(struct virtio_mem *vm) dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); dev_info(&vm->vdev->dev, "subblock size: 0x%llx", - (unsigned long long)vm->subblock_size); + (unsigned long long)vm->sbm.sb_size); if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); -- GitLab From 8a6f082babea6744b876a23ff5ed6081bf12968d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:07 +0100 Subject: [PATCH 0840/1894] virtio-mem: memory block ids are specific to Sub Block Mode (SBM) Let's move first_mb_id/next_mb_id/last_usable_mb_id accordingly. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-22-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 46 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 6395c30902522..248d28e653a9b 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -96,13 +96,6 @@ struct virtio_mem { /* Maximum region size in bytes. */ uint64_t region_size; - /* Id of the first memory block of this device. */ - unsigned long first_mb_id; - /* Id of the last usable memory block of this device. */ - unsigned long last_usable_mb_id; - /* Id of the next memory bock to prepare when needed. */ - unsigned long next_mb_id; - /* The parent resource for all memory added via this device. */ struct resource *parent_resource; /* @@ -121,6 +114,13 @@ struct virtio_mem { uint64_t offline_threshold; struct { + /* Id of the first memory block of this device. */ + unsigned long first_mb_id; + /* Id of the last usable memory block of this device. */ + unsigned long last_usable_mb_id; + /* Id of the next memory bock to prepare when needed. */ + unsigned long next_mb_id; + /* The subblock size. */ uint64_t sb_size; /* The number of subblocks per Linux memory block. */ @@ -265,7 +265,7 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, unsigned long mb_id, uint8_t state) { - const unsigned long idx = mb_id - vm->first_mb_id; + const unsigned long idx = mb_id - vm->sbm.first_mb_id; uint8_t old_state; old_state = vm->sbm.mb_states[idx]; @@ -282,7 +282,7 @@ static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, unsigned long mb_id) { - const unsigned long idx = mb_id - vm->first_mb_id; + const unsigned long idx = mb_id - vm->sbm.first_mb_id; return vm->sbm.mb_states[idx]; } @@ -292,8 +292,8 @@ static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, */ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) { - int old_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id); - int new_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id + 1); + int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id); + int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1); uint8_t *new_array; if (vm->sbm.mb_states && old_pages == new_pages) @@ -314,14 +314,14 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) } #define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ - for (_mb_id = _vm->first_mb_id; \ - _mb_id < _vm->next_mb_id && _vm->sbm.mb_count[_state]; \ + for (_mb_id = _vm->sbm.first_mb_id; \ + _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id++) \ if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) #define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ - for (_mb_id = _vm->next_mb_id - 1; \ - _mb_id >= _vm->first_mb_id && _vm->sbm.mb_count[_state]; \ + for (_mb_id = _vm->sbm.next_mb_id - 1; \ + _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id--) \ if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) @@ -332,7 +332,7 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, unsigned long mb_id, int sb_id) { - return (mb_id - vm->first_mb_id) * vm->sbm.sbs_per_mb + sb_id; + return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id; } /* @@ -412,7 +412,7 @@ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, */ static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) { - const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id; + const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id; const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); @@ -1194,7 +1194,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, { int rc; - if (vm->next_mb_id > vm->last_usable_mb_id) + if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id) return -ENOSPC; /* Resize the state array if required. */ @@ -1208,7 +1208,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return rc; vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; - *mb_id = vm->next_mb_id++; + *mb_id = vm->sbm.next_mb_id++; return 0; } @@ -1643,7 +1643,7 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm) usable_region_size, &usable_region_size); end_addr = vm->addr + usable_region_size; end_addr = min(end_addr, phys_limit); - vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; + vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; /* see if there is a request to change the size */ virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, @@ -1834,9 +1834,9 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->sbm.sbs_per_mb = memory_block_size_bytes() / vm->sbm.sb_size; /* Round up to the next full memory block */ - vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + - memory_block_size_bytes()); - vm->next_mb_id = vm->first_mb_id; + vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + + memory_block_size_bytes()); + vm->sbm.next_mb_id = vm->sbm.first_mb_id; /* Prepare the offline threshold - make sure we can add two blocks. */ vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), -- GitLab From 602ef89457173a24dde30874faec1f15a00e112a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:08 +0100 Subject: [PATCH 0841/1894] virito-mem: existing (un)plug functions are specific to Sub Block Mode (SBM) Let's rename them accordingly. virtio_mem_plug_request() and virtio_mem_unplug_request() will be handled separately. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-23-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 90 ++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 47 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 248d28e653a9b..ec81f9d4bccf5 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1099,8 +1099,8 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) * Plug selected subblocks. Updates the plugged state, but not the state * of the memory block. */ -static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, - int sb_id, int count) +static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id, + int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->sbm.sb_size; @@ -1117,8 +1117,8 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, * Unplug selected subblocks. Updates the plugged state, but not the state * of the memory block. */ -static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, - int sb_id, int count) +static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, + int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->sbm.sb_size; @@ -1140,8 +1140,8 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, * * Note: can fail after some subblocks were unplugged. */ -static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, - unsigned long mb_id, uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb) { int sb_id, count; int rc; @@ -1162,7 +1162,7 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, sb_id--; } - rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); if (rc) return rc; *nb_sb -= count; @@ -1179,18 +1179,18 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, * * Note: can fail after some subblocks were unplugged. */ -static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id) { uint64_t nb_sb = vm->sbm.sbs_per_mb; - return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb); + return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); } /* * Prepare tracking data for the next memory block. */ -static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, - unsigned long *mb_id) +static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm, + unsigned long *mb_id) { int rc; @@ -1218,9 +1218,8 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, * * Will modify the state of the memory block. */ -static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb) { const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); int rc; @@ -1232,7 +1231,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * Plug the requested number of subblocks before adding it to linux, * so that onlining will directly online all plugged subblocks. */ - rc = virtio_mem_mb_plug_sb(vm, mb_id, 0, count); + rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count); if (rc) return rc; @@ -1259,7 +1258,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * TODO: Linux MM does not properly clean up yet in all cases * where adding of memory failed - especially on -ENOMEM. */ - if (virtio_mem_mb_unplug_sb(vm, mb_id, 0, count)) + if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) new_state = VIRTIO_MEM_SBM_MB_PLUGGED; virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); return rc; @@ -1277,8 +1276,9 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * * Note: Can fail after some subblocks were successfully plugged. */ -static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, - uint64_t *nb_sb, bool online) +static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb, + bool online) { unsigned long pfn, nr_pages; int sb_id, count; @@ -1297,7 +1297,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) count++; - rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count); if (rc) return rc; *nb_sb -= count; @@ -1341,7 +1341,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to plug subblocks of partially plugged online blocks. */ virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { - rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true); + rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1350,7 +1350,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to plug subblocks of partially plugged offline blocks. */ virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false); + rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1367,7 +1367,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; - rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); + rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); if (rc || !nb_sb) return rc; cond_resched(); @@ -1378,10 +1378,10 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; - rc = virtio_mem_prepare_next_mb(vm, &mb_id); + rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id); if (rc) return rc; - rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); + rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); if (rc) return rc; cond_resched(); @@ -1402,13 +1402,13 @@ out_unlock: * * Note: Can fail after some subblocks were successfully unplugged. */ -static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) { int rc; - rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb); + rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) @@ -1440,9 +1440,9 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, * * Will modify the state of the memory block. */ -static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; unsigned long start_pfn; @@ -1456,7 +1456,7 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, return rc; /* Try to unplug the allocated memory */ - rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); if (rc) { /* Return the memory to the buddy. */ virtio_mem_fake_online(start_pfn, nr_pages); @@ -1478,17 +1478,17 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, * Note: Can fail after some subblocks were successfully unplugged. Can * return 0 even if subblocks were busy and could not get unplugged. */ -static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) { int rc, sb_id; /* If possible, try to unplug the complete block in one shot. */ if (*nb_sb >= vm->sbm.sbs_per_mb && virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { - rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0, - vm->sbm.sbs_per_mb); + rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0, + vm->sbm.sbs_per_mb); if (!rc) { *nb_sb -= vm->sbm.sbs_per_mb; goto unplugged; @@ -1505,7 +1505,7 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, if (sb_id < 0) break; - rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, sb_id, 1); + rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1); if (rc == -EBUSY) continue; else if (rc) @@ -1553,8 +1553,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of partially plugged offline blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1562,8 +1561,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of plugged offline blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) { - rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1577,8 +1575,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of partially plugged online blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { - rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; mutex_unlock(&vm->hotplug_mutex); @@ -1588,8 +1585,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of plugged online blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) { - rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; mutex_unlock(&vm->hotplug_mutex); @@ -1614,7 +1610,7 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) int rc; virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_PLUGGED) { - rc = virtio_mem_mb_unplug(vm, mb_id); + rc = virtio_mem_sbm_unplug_mb(vm, mb_id); if (rc) return rc; virtio_mem_sbm_set_mb_state(vm, mb_id, -- GitLab From d46dfb62f676f949352c7fd8b7a0fa3b7fe1b933 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:09 +0100 Subject: [PATCH 0842/1894] virtio-mem: memory notifier callbacks are specific to Sub Block Mode (SBM) Let's rename accordingly. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-24-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index ec81f9d4bccf5..cdcf67e55a56b 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -587,8 +587,8 @@ static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, return start >= vm->addr && start + size <= vm->addr + vm->region_size; } -static int virtio_mem_notify_going_online(struct virtio_mem *vm, - unsigned long mb_id) +static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm, + unsigned long mb_id) { switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: @@ -602,8 +602,8 @@ static int virtio_mem_notify_going_online(struct virtio_mem *vm, return NOTIFY_BAD; } -static void virtio_mem_notify_offline(struct virtio_mem *vm, - unsigned long mb_id) +static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, + unsigned long mb_id) { switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL: @@ -620,7 +620,8 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, } } -static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) +static void virtio_mem_sbm_notify_online(struct virtio_mem *vm, + unsigned long mb_id) { switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: @@ -637,8 +638,8 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) } } -static void virtio_mem_notify_going_offline(struct virtio_mem *vm, - unsigned long mb_id) +static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, + unsigned long mb_id) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; @@ -653,8 +654,8 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, } } -static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, - unsigned long mb_id) +static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, + unsigned long mb_id) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; @@ -714,7 +715,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - virtio_mem_notify_going_offline(vm, mb_id); + virtio_mem_sbm_notify_going_offline(vm, mb_id); break; case MEM_GOING_ONLINE: mutex_lock(&vm->hotplug_mutex); @@ -724,10 +725,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - rc = virtio_mem_notify_going_online(vm, mb_id); + rc = virtio_mem_sbm_notify_going_online(vm, mb_id); break; case MEM_OFFLINE: - virtio_mem_notify_offline(vm, mb_id); + virtio_mem_sbm_notify_offline(vm, mb_id); atomic64_add(size, &vm->offline_size); /* @@ -741,7 +742,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, mutex_unlock(&vm->hotplug_mutex); break; case MEM_ONLINE: - virtio_mem_notify_online(vm, mb_id); + virtio_mem_sbm_notify_online(vm, mb_id); atomic64_sub(size, &vm->offline_size); /* @@ -760,7 +761,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, case MEM_CANCEL_OFFLINE: if (!vm->hotplug_active) break; - virtio_mem_notify_cancel_offline(vm, mb_id); + virtio_mem_sbm_notify_cancel_offline(vm, mb_id); vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; -- GitLab From 01afdee29aef144ad956d1d5302aaaeabf498f48 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:10 +0100 Subject: [PATCH 0843/1894] virtio-mem: factor out adding/removing memory from Linux Let's use wrappers for the low-level functions that dev_dbg/dev_warn and work on addr + size, such that we can reuse them for adding/removing in other granularity. We only warn when adding memory failed, because that's something to pay attention to. We won't warn when removing failed, we'll reuse that in racy context soon (and we do have proper BUG_ON() statements in the current cases where it must never happen). Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-25-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 107 ++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 34 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index cdcf67e55a56b..95fa0262af1de 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -451,18 +451,16 @@ static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) } /* - * Try to add a memory block to Linux. This will usually only fail - * if out of memory. + * Try adding memory to Linux. Will usually only fail if out of memory. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - const uint64_t size = memory_block_size_bytes(); int rc; /* @@ -476,32 +474,50 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) return -ENOMEM; } - dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); + dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); /* Memory might get onlined immediately. */ atomic64_add(size, &vm->offline_size); rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name, MEMHP_MERGE_RESOURCE); - if (rc) + if (rc) { atomic64_sub(size, &vm->offline_size); + dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); + /* + * TODO: Linux MM does not properly clean up yet in all cases + * where adding of memory failed - especially on -ENOMEM. + */ + } return rc; } /* - * Try to remove a memory block from Linux. Will only fail if the memory block - * is not offline. + * See virtio_mem_add_memory(): Try adding a single Linux memory block. + */ +static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_add_memory(vm, addr, size); +} + +/* + * Try removing memory from Linux. Will only fail if memory blocks aren't + * offline. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - const uint64_t size = memory_block_size_bytes(); int rc; - dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); + dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); rc = remove_memory(vm->nid, addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); @@ -510,27 +526,41 @@ static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) * immediately instead of waiting. */ virtio_mem_retry(vm); + } else { + dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc); } return rc; } /* - * Try to offline and remove a memory block from Linux. + * See virtio_mem_remove_memory(): Try removing a single Linux memory block. + */ +static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_remove_memory(vm, addr, size); +} + +/* + * Try offlining and removing memory from Linux. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, - unsigned long mb_id) +static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, + uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - const uint64_t size = memory_block_size_bytes(); int rc; - dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", - mb_id); + dev_dbg(&vm->vdev->dev, + "offlining and removing memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + rc = offline_and_remove_memory(vm->nid, addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); @@ -539,10 +569,26 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, * immediately instead of waiting. */ virtio_mem_retry(vm); + } else { + dev_dbg(&vm->vdev->dev, + "offlining and removing memory failed: %d\n", rc); } return rc; } +/* + * See virtio_mem_offline_and_remove_memory(): Try offlining and removing + * a single Linux memory block. + */ +static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, + unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_offline_and_remove_memory(vm, addr, size); +} + /* * Trigger the workqueue so the device can perform its magic. */ @@ -1248,17 +1294,10 @@ static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); /* Add the memory block to linux - if that fails, try to unplug. */ - rc = virtio_mem_mb_add(vm, mb_id); + rc = virtio_mem_sbm_add_mb(vm, mb_id); if (rc) { int new_state = VIRTIO_MEM_SBM_MB_UNUSED; - dev_err(&vm->vdev->dev, - "adding memory block %lu failed with %d\n", mb_id, rc); - - /* - * TODO: Linux MM does not properly clean up yet in all cases - * where adding of memory failed - especially on -ENOMEM. - */ if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) new_state = VIRTIO_MEM_SBM_MB_PLUGGED; virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); @@ -1429,7 +1468,7 @@ static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, VIRTIO_MEM_SBM_MB_UNUSED); mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_mb_remove(vm, mb_id); + rc = virtio_mem_sbm_remove_mb(vm, mb_id); BUG_ON(rc); mutex_lock(&vm->hotplug_mutex); } @@ -1522,7 +1561,7 @@ unplugged: */ if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_mb_offline_and_remove(vm, mb_id); + rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); mutex_lock(&vm->hotplug_mutex); if (!rc) virtio_mem_sbm_set_mb_state(vm, mb_id, @@ -2009,7 +2048,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) */ virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_remove(vm, mb_id); + rc = virtio_mem_sbm_remove_mb(vm, mb_id); BUG_ON(rc); virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED); -- GitLab From 4ba50cd3355d742c8befbfe38dcbe559f2b0f758 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:11 +0100 Subject: [PATCH 0844/1894] virtio-mem: Big Block Mode (BBM) memory hotplug Currently, we do not support device block sizes that exceed the Linux memory block size. For example, having a device block size of 1 GiB (e.g., gigantic pages in the hypervisor) won't work with 128 MiB Linux memory blocks. Let's implement Big Block Mode (BBM), whereby we add/remove at least one Linux memory block at a time. With a 1 GiB device block size, a Big Block (BB) will cover 8 Linux memory blocks. We'll keep registering the online_page_callback machinery, it will be used for safe memory hotunplug in BBM next. Note: BBM is properly prepared for variable-sized Linux memory blocks that we might see in the future. So we won't care how many Linux memory blocks a big block actually spans, and how the memory notifier is called. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-26-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 560 ++++++++++++++++++++++++++++-------- 1 file changed, 441 insertions(+), 119 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 95fa0262af1de..8a4f735360ac3 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -30,12 +30,18 @@ MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); /* * virtio-mem currently supports the following modes of operation: * - * * Sub Block Mode (SBM): A Linux memory block spans 1..X subblocks (SB). The + * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The * size of a Sub Block (SB) is determined based on the device block size, the * pageblock size, and the maximum allocation granularity of the buddy. * Subblocks within a Linux memory block might either be plugged or unplugged. * Memory is added/removed to Linux MM in Linux memory block granularity. * + * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks. + * Memory is added/removed to Linux MM in Big Block granularity. + * + * The mode is determined automatically based on the Linux memory block size + * and the device block size. + * * User space / core MM (auto onlining) is responsible for onlining added * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are * always onlined separately, and all memory within a Linux memory block is @@ -61,6 +67,19 @@ enum virtio_mem_sbm_mb_state { VIRTIO_MEM_SBM_MB_COUNT }; +/* + * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks. + */ +enum virtio_mem_bbm_bb_state { + /* Unplugged, not added to Linux. Can be reused later. */ + VIRTIO_MEM_BBM_BB_UNUSED = 0, + /* Plugged, not added to Linux. Error on add_memory(). */ + VIRTIO_MEM_BBM_BB_PLUGGED, + /* Plugged and added to Linux. */ + VIRTIO_MEM_BBM_BB_ADDED, + VIRTIO_MEM_BBM_BB_COUNT +}; + struct virtio_mem { struct virtio_device *vdev; @@ -113,47 +132,70 @@ struct virtio_mem { atomic64_t offline_size; uint64_t offline_threshold; - struct { - /* Id of the first memory block of this device. */ - unsigned long first_mb_id; - /* Id of the last usable memory block of this device. */ - unsigned long last_usable_mb_id; - /* Id of the next memory bock to prepare when needed. */ - unsigned long next_mb_id; - - /* The subblock size. */ - uint64_t sb_size; - /* The number of subblocks per Linux memory block. */ - uint32_t sbs_per_mb; - - /* Summary of all memory block states. */ - unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; - - /* - * One byte state per memory block. Allocated via vmalloc(). - * Resized (alloc+copy+free) on demand. - * - * With 128 MiB memory blocks, we have states for 512 GiB of - * memory in one 4 KiB page. - */ - uint8_t *mb_states; - - /* - * Bitmap: one bit per subblock. Allocated similar to - * sbm.mb_states. - * - * A set bit means the corresponding subblock is plugged, - * otherwise it's unblocked. - * - * With 4 MiB subblocks, we manage 128 GiB of memory in one - * 4 KiB page. - */ - unsigned long *sb_states; - } sbm; + /* If set, the driver is in SBM, otherwise in BBM. */ + bool in_sbm; + + union { + struct { + /* Id of the first memory block of this device. */ + unsigned long first_mb_id; + /* Id of the last usable memory block of this device. */ + unsigned long last_usable_mb_id; + /* Id of the next memory bock to prepare when needed. */ + unsigned long next_mb_id; + + /* The subblock size. */ + uint64_t sb_size; + /* The number of subblocks per Linux memory block. */ + uint32_t sbs_per_mb; + + /* Summary of all memory block states. */ + unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; + + /* + * One byte state per memory block. Allocated via + * vmalloc(). Resized (alloc+copy+free) on demand. + * + * With 128 MiB memory blocks, we have states for 512 + * GiB of memory in one 4 KiB page. + */ + uint8_t *mb_states; + + /* + * Bitmap: one bit per subblock. Allocated similar to + * sbm.mb_states. + * + * A set bit means the corresponding subblock is + * plugged, otherwise it's unblocked. + * + * With 4 MiB subblocks, we manage 128 GiB of memory + * in one 4 KiB page. + */ + unsigned long *sb_states; + } sbm; + + struct { + /* Id of the first big block of this device. */ + unsigned long first_bb_id; + /* Id of the last usable big block of this device. */ + unsigned long last_usable_bb_id; + /* Id of the next device bock to prepare when needed. */ + unsigned long next_bb_id; + + /* Summary of all big block states. */ + unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT]; + + /* One byte state per big block. See sbm.mb_states. */ + uint8_t *bb_states; + + /* The block size used for plugging/adding/removing. */ + uint64_t bb_size; + } bbm; + }; /* - * Mutex that protects the sbm.mb_count, sbm.mb_states, and - * sbm.sb_states. + * Mutex that protects the sbm.mb_count, sbm.mb_states, + * sbm.sb_states, bbm.bb_count, and bbm.bb_states * * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get @@ -247,6 +289,24 @@ static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id) return mb_id * memory_block_size_bytes(); } +/* + * Calculate the big block id of a given address. + */ +static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm, + uint64_t addr) +{ + return addr / vm->bbm.bb_size; +} + +/* + * Calculate the physical start address of a given big block id. + */ +static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm, + unsigned long bb_id) +{ + return bb_id * vm->bbm.bb_size; +} + /* * Calculate the subblock id of a given address. */ @@ -259,6 +319,67 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, return (addr - mb_addr) / vm->sbm.sb_size; } +/* + * Set the state of a big block, taking care of the state counter. + */ +static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm, + unsigned long bb_id, + enum virtio_mem_bbm_bb_state state) +{ + const unsigned long idx = bb_id - vm->bbm.first_bb_id; + enum virtio_mem_bbm_bb_state old_state; + + old_state = vm->bbm.bb_states[idx]; + vm->bbm.bb_states[idx] = state; + + BUG_ON(vm->bbm.bb_count[old_state] == 0); + vm->bbm.bb_count[old_state]--; + vm->bbm.bb_count[state]++; +} + +/* + * Get the state of a big block. + */ +static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm, + unsigned long bb_id) +{ + return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id]; +} + +/* + * Prepare the big block state array for the next big block. + */ +static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) +{ + unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id; + unsigned long new_bytes = old_bytes + 1; + int old_pages = PFN_UP(old_bytes); + int new_pages = PFN_UP(new_bytes); + uint8_t *new_array; + + if (vm->bbm.bb_states && old_pages == new_pages) + return 0; + + new_array = vzalloc(new_pages * PAGE_SIZE); + if (!new_array) + return -ENOMEM; + + mutex_lock(&vm->hotplug_mutex); + if (vm->bbm.bb_states) + memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE); + vfree(vm->bbm.bb_states); + vm->bbm.bb_states = new_array; + mutex_unlock(&vm->hotplug_mutex); + + return 0; +} + +#define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \ + for (_bb_id = vm->bbm.first_bb_id; \ + _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \ + _bb_id++) \ + if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) + /* * Set the state of a memory block, taking care of the state counter. */ @@ -502,6 +623,17 @@ static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) return virtio_mem_add_memory(vm, addr, size); } +/* + * See virtio_mem_add_memory(): Try adding a big block. + */ +static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_add_memory(vm, addr, size); +} + /* * Try removing memory from Linux. Will only fail if memory blocks aren't * offline. @@ -729,20 +861,33 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, struct memory_notify *mhp = arg; const unsigned long start = PFN_PHYS(mhp->start_pfn); const unsigned long size = PFN_PHYS(mhp->nr_pages); - const unsigned long mb_id = virtio_mem_phys_to_mb_id(start); int rc = NOTIFY_OK; + unsigned long id; if (!virtio_mem_overlaps_range(vm, start, size)) return NOTIFY_DONE; - /* - * Memory is onlined/offlined in memory block granularity. We cannot - * cross virtio-mem device boundaries and memory block boundaries. Bail - * out if this ever changes. - */ - if (WARN_ON_ONCE(size != memory_block_size_bytes() || - !IS_ALIGNED(start, memory_block_size_bytes()))) - return NOTIFY_BAD; + if (vm->in_sbm) { + id = virtio_mem_phys_to_mb_id(start); + /* + * In SBM, we add memory in separate memory blocks - we expect + * it to be onlined/offlined in the same granularity. Bail out + * if this ever changes. + */ + if (WARN_ON_ONCE(size != memory_block_size_bytes() || + !IS_ALIGNED(start, memory_block_size_bytes()))) + return NOTIFY_BAD; + } else { + id = virtio_mem_phys_to_bb_id(vm, start); + /* + * In BBM, we only care about onlining/offlining happening + * within a single big block, we don't care about the + * actual granularity as we don't track individual Linux + * memory blocks. + */ + if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1))) + return NOTIFY_BAD; + } /* * Avoid circular locking lockdep warnings. We lock the mutex @@ -761,7 +906,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - virtio_mem_sbm_notify_going_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_going_offline(vm, id); break; case MEM_GOING_ONLINE: mutex_lock(&vm->hotplug_mutex); @@ -771,10 +917,12 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - rc = virtio_mem_sbm_notify_going_online(vm, mb_id); + if (vm->in_sbm) + rc = virtio_mem_sbm_notify_going_online(vm, id); break; case MEM_OFFLINE: - virtio_mem_sbm_notify_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_offline(vm, id); atomic64_add(size, &vm->offline_size); /* @@ -788,7 +936,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, mutex_unlock(&vm->hotplug_mutex); break; case MEM_ONLINE: - virtio_mem_sbm_notify_online(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_online(vm, id); atomic64_sub(size, &vm->offline_size); /* @@ -807,7 +956,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, case MEM_CANCEL_OFFLINE: if (!vm->hotplug_active) break; - virtio_mem_sbm_notify_cancel_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_cancel_offline(vm, id); vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; @@ -978,27 +1128,29 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, static void virtio_mem_online_page_cb(struct page *page, unsigned int order) { const unsigned long addr = page_to_phys(page); - const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); + unsigned long id, sb_id; struct virtio_mem *vm; - int sb_id; + bool do_online; - /* - * We exploit here that subblocks have at least MAX_ORDER_NR_PAGES. - * size/alignment and that this callback is is called with such a - * size/alignment. So we cannot cross subblocks and therefore - * also not memory blocks. - */ rcu_read_lock(); list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) continue; - sb_id = virtio_mem_phys_to_sb_id(vm, addr); - /* - * If plugged, online the pages, otherwise, set them fake - * offline (PageOffline). - */ - if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (vm->in_sbm) { + /* + * We exploit here that subblocks have at least + * MAX_ORDER_NR_PAGES size/alignment - so we cannot + * cross subblocks within one call. + */ + id = virtio_mem_phys_to_mb_id(addr); + sb_id = virtio_mem_phys_to_sb_id(vm, addr); + do_online = virtio_mem_sbm_test_sb_plugged(vm, id, + sb_id, 1); + } else { + do_online = true; + } + if (do_online) generic_online_page(page, order); else virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, @@ -1178,6 +1330,32 @@ static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, return rc; } +/* + * Request to unplug a big block. + * + * Will not modify the state of the big block. + */ +static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_send_unplug_request(vm, addr, size); +} + +/* + * Request to plug a big block. + * + * Will not modify the state of the big block. + */ +static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_send_plug_request(vm, addr, size); +} + /* * Unplug the desired number of plugged subblocks of a offline or not-added * memory block. Will fail if any subblock cannot get unplugged (instead of @@ -1363,10 +1541,7 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, return 0; } -/* - * Try to plug the requested amount of memory. - */ -static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) +static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) { uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; @@ -1433,6 +1608,112 @@ out_unlock: return rc; } +/* + * Plug a big block and add it to Linux. + * + * Will modify the state of the big block. + */ +static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_UNUSED)) + return -EINVAL; + + rc = virtio_mem_bbm_plug_bb(vm, bb_id); + if (rc) + return rc; + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); + + rc = virtio_mem_bbm_add_bb(vm, bb_id); + if (rc) { + if (!virtio_mem_bbm_unplug_bb(vm, bb_id)) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + else + /* Retry from the main loop. */ + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + return rc; + } + return 0; +} + +/* + * Prepare tracking data for the next big block. + */ +static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm, + unsigned long *bb_id) +{ + int rc; + + if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id) + return -ENOSPC; + + /* Resize the big block state array if required. */ + rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm); + if (rc) + return rc; + + vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++; + *bb_id = vm->bbm.next_bb_id; + vm->bbm.next_bb_id++; + return 0; +} + +static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff) +{ + uint64_t nb_bb = diff / vm->bbm.bb_size; + unsigned long bb_id; + int rc; + + if (!nb_bb) + return 0; + + /* Try to plug and add unused big blocks */ + virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) { + if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) + return -ENOSPC; + + rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + cond_resched(); + } + + /* Try to prepare, plug and add new big blocks */ + while (nb_bb) { + if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) + return -ENOSPC; + + rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id); + if (rc) + return rc; + rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); + if (!rc) + nb_bb--; + if (rc) + return rc; + cond_resched(); + } + + return 0; +} + +/* + * Try to plug the requested amount of memory. + */ +static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) +{ + if (vm->in_sbm) + return virtio_mem_sbm_plug_request(vm, diff); + return virtio_mem_bbm_plug_request(vm, diff); +} + /* * Unplug the desired number of plugged subblocks of an offline memory block. * Will fail if any subblock cannot get unplugged (instead of skipping it). @@ -1571,10 +1852,7 @@ unplugged: return 0; } -/* - * Try to unplug the requested amount of memory. - */ -static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) +static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) { uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; @@ -1640,20 +1918,42 @@ out_unlock: return rc; } +/* + * Try to unplug the requested amount of memory. + */ +static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) +{ + if (vm->in_sbm) + return virtio_mem_sbm_unplug_request(vm, diff); + return -EBUSY; +} + /* * Try to unplug all blocks that couldn't be unplugged before, for example, * because the hypervisor was busy. */ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) { - unsigned long mb_id; + unsigned long id; int rc; - virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_PLUGGED) { - rc = virtio_mem_sbm_unplug_mb(vm, mb_id); + if (!vm->in_sbm) { + virtio_mem_bbm_for_each_bb(vm, id, + VIRTIO_MEM_BBM_BB_PLUGGED) { + rc = virtio_mem_bbm_unplug_bb(vm, id); + if (rc) + return rc; + virtio_mem_bbm_set_bb_state(vm, id, + VIRTIO_MEM_BBM_BB_UNUSED); + } + return 0; + } + + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) { + rc = virtio_mem_sbm_unplug_mb(vm, id); if (rc) return rc; - virtio_mem_sbm_set_mb_state(vm, mb_id, + virtio_mem_sbm_set_mb_state(vm, id, VIRTIO_MEM_SBM_MB_UNUSED); } @@ -1679,7 +1979,13 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm) usable_region_size, &usable_region_size); end_addr = vm->addr + usable_region_size; end_addr = min(end_addr, phys_limit); - vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; + + if (vm->in_sbm) + vm->sbm.last_usable_mb_id = + virtio_mem_phys_to_mb_id(end_addr) - 1; + else + vm->bbm.last_usable_bb_id = + virtio_mem_phys_to_bb_id(vm, end_addr) - 1; /* see if there is a request to change the size */ virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, @@ -1802,6 +2108,7 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) static int virtio_mem_init(struct virtio_mem *vm) { const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS; + uint64_t sb_size, addr; uint16_t node_id; if (!vm->vdev->config->get) { @@ -1834,16 +2141,6 @@ static int virtio_mem_init(struct virtio_mem *vm) if (vm->nid == NUMA_NO_NODE) vm->nid = memory_add_physaddr_to_nid(vm->addr); - /* - * We always hotplug memory in memory block granularity. This way, - * we have to wait for exactly one memory block to online. - */ - if (vm->device_block_size > memory_block_size_bytes()) { - dev_err(&vm->vdev->dev, - "The block size is not supported (too big).\n"); - return -EINVAL; - } - /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) dev_warn(&vm->vdev->dev, @@ -1863,20 +2160,35 @@ static int virtio_mem_init(struct virtio_mem *vm) * - Is required for now for alloc_contig_range() to work reliably - * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ - vm->sbm.sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, - pageblock_nr_pages) * PAGE_SIZE; - vm->sbm.sb_size = max_t(uint64_t, vm->device_block_size, - vm->sbm.sb_size); - vm->sbm.sbs_per_mb = memory_block_size_bytes() / vm->sbm.sb_size; + sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) * PAGE_SIZE; + sb_size = max_t(uint64_t, vm->device_block_size, sb_size); + + if (sb_size < memory_block_size_bytes()) { + /* SBM: At least two subblocks per Linux memory block. */ + vm->in_sbm = true; + vm->sbm.sb_size = sb_size; + vm->sbm.sbs_per_mb = memory_block_size_bytes() / + vm->sbm.sb_size; + + /* Round up to the next full memory block */ + addr = vm->addr + memory_block_size_bytes() - 1; + vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr); + vm->sbm.next_mb_id = vm->sbm.first_mb_id; + } else { + /* BBM: At least one Linux memory block. */ + vm->bbm.bb_size = vm->device_block_size; - /* Round up to the next full memory block */ - vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + - memory_block_size_bytes()); - vm->sbm.next_mb_id = vm->sbm.first_mb_id; + vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, vm->addr); + vm->bbm.next_bb_id = vm->bbm.first_bb_id; + } /* Prepare the offline threshold - make sure we can add two blocks. */ vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); + /* In BBM, we also want at least two big blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, + vm->offline_threshold); dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); @@ -1884,8 +2196,12 @@ static int virtio_mem_init(struct virtio_mem *vm) (unsigned long long)vm->device_block_size); dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); - dev_info(&vm->vdev->dev, "subblock size: 0x%llx", - (unsigned long long)vm->sbm.sb_size); + if (vm->in_sbm) + dev_info(&vm->vdev->dev, "subblock size: 0x%llx", + (unsigned long long)vm->sbm.sb_size); + else + dev_info(&vm->vdev->dev, "big block size: 0x%llx", + (unsigned long long)vm->bbm.bb_size); if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); @@ -2042,22 +2358,24 @@ static void virtio_mem_remove(struct virtio_device *vdev) cancel_work_sync(&vm->wq); hrtimer_cancel(&vm->retry_timer); - /* - * After we unregistered our callbacks, user space can online partially - * plugged offline blocks. Make sure to remove them. - */ - virtio_mem_sbm_for_each_mb(vm, mb_id, - VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_sbm_remove_mb(vm, mb_id); - BUG_ON(rc); - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_UNUSED); + if (vm->in_sbm) { + /* + * After we unregistered our callbacks, user space can online + * partially plugged offline blocks. Make sure to remove them. + */ + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { + rc = virtio_mem_sbm_remove_mb(vm, mb_id); + BUG_ON(rc); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); + } + /* + * After we unregistered our callbacks, user space can no longer + * offline partially plugged online memory blocks. No need to + * worry about them. + */ } - /* - * After we unregistered our callbacks, user space can no longer - * offline partially plugged online memory blocks. No need to worry - * about them. - */ /* unregister callbacks */ unregister_virtio_mem_device(vm); @@ -2076,8 +2394,12 @@ static void virtio_mem_remove(struct virtio_device *vdev) } /* remove all tracking data - no locking needed */ - vfree(vm->sbm.mb_states); - vfree(vm->sbm.sb_states); + if (vm->in_sbm) { + vfree(vm->sbm.mb_states); + vfree(vm->sbm.sb_states); + } else { + vfree(vm->bbm.bb_states); + } /* reset the device and cleanup the queues */ vdev->config->reset(vdev); -- GitLab From faa45ff4ce885af93a3233a408c5a74b2943226b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:12 +0100 Subject: [PATCH 0845/1894] virtio-mem: allow to force Big Block Mode (BBM) and set the big block size Let's allow to force BBM, even if subblocks would be possible. Take care of properly calculating the first big block id, because the start address might no longer be aligned to the big block size. Also, allow to manually configure the size of Big Blocks. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-27-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 8a4f735360ac3..861149acafe5d 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -27,6 +27,16 @@ static bool unplug_online = true; module_param(unplug_online, bool, 0644); MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); +static bool force_bbm; +module_param(force_bbm, bool, 0444); +MODULE_PARM_DESC(force_bbm, + "Force Big Block Mode. Default is 0 (auto-selection)"); + +static unsigned long bbm_block_size; +module_param(bbm_block_size, ulong, 0444); +MODULE_PARM_DESC(bbm_block_size, + "Big Block size in bytes. Default is 0 (auto-detection)."); + /* * virtio-mem currently supports the following modes of operation: * @@ -2164,7 +2174,7 @@ static int virtio_mem_init(struct virtio_mem *vm) pageblock_nr_pages) * PAGE_SIZE; sb_size = max_t(uint64_t, vm->device_block_size, sb_size); - if (sb_size < memory_block_size_bytes()) { + if (sb_size < memory_block_size_bytes() && !force_bbm) { /* SBM: At least two subblocks per Linux memory block. */ vm->in_sbm = true; vm->sbm.sb_size = sb_size; @@ -2177,9 +2187,24 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->sbm.next_mb_id = vm->sbm.first_mb_id; } else { /* BBM: At least one Linux memory block. */ - vm->bbm.bb_size = vm->device_block_size; + vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size, + memory_block_size_bytes()); + + if (bbm_block_size) { + if (!is_power_of_2(bbm_block_size)) { + dev_warn(&vm->vdev->dev, + "bbm_block_size is not a power of 2"); + } else if (bbm_block_size < vm->bbm.bb_size) { + dev_warn(&vm->vdev->dev, + "bbm_block_size is too small"); + } else { + vm->bbm.bb_size = bbm_block_size; + } + } - vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, vm->addr); + /* Round up to the next aligned big block */ + addr = vm->addr + vm->bbm.bb_size - 1; + vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr); vm->bbm.next_bb_id = vm->bbm.first_bb_id; } -- GitLab From 8dc4bb58a146655eb057247d7c9d19e73928715b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:13 +0100 Subject: [PATCH 0846/1894] mm/memory_hotplug: extend offline_and_remove_memory() to handle more than one memory block virtio-mem soon wants to use offline_and_remove_memory() memory that exceeds a single Linux memory block (memory_block_size_bytes()). Let's remove that restriction. Let's remember the old state and try to restore that if anything goes wrong. While re-onlining can, in general, fail, it's highly unlikely to happen (usually only when a notifier fails to allocate memory, and these are rather rare). This will be used by virtio-mem to offline+remove memory ranges that are bigger than a single memory block - for example, with a device block size of 1 GiB (e.g., gigantic pages in the hypervisor) and a Linux memory block size of 128MB. While we could compress the state into 2 bit, using 8 bit is much easier. This handling is similar, but different to acpi_scan_try_to_offline(): a) We don't try to offline twice. I am not sure if this CONFIG_MEMCG optimization is still relevant - it should only apply to ZONE_NORMAL (where we have no guarantees). If relevant, we can always add it. b) acpi_scan_try_to_offline() simply onlines all memory in case something goes wrong. It doesn't restore previous online type. Let's do that, so we won't overwrite what e.g., user space configured. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-28-david@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Andrew Morton --- mm/memory_hotplug.c | 105 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 16 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 63b2e46b65552..2b6cc42ba0a37 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1788,39 +1788,112 @@ int remove_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(remove_memory); +static int try_offline_memory_block(struct memory_block *mem, void *arg) +{ + uint8_t online_type = MMOP_ONLINE_KERNEL; + uint8_t **online_types = arg; + struct page *page; + int rc; + + /* + * Sense the online_type via the zone of the memory block. Offlining + * with multiple zones within one memory block will be rejected + * by offlining code ... so we don't care about that. + */ + page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr)); + if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE) + online_type = MMOP_ONLINE_MOVABLE; + + rc = device_offline(&mem->dev); + /* + * Default is MMOP_OFFLINE - change it only if offlining succeeded, + * so try_reonline_memory_block() can do the right thing. + */ + if (!rc) + **online_types = online_type; + + (*online_types)++; + /* Ignore if already offline. */ + return rc < 0 ? rc : 0; +} + +static int try_reonline_memory_block(struct memory_block *mem, void *arg) +{ + uint8_t **online_types = arg; + int rc; + + if (**online_types != MMOP_OFFLINE) { + mem->online_type = **online_types; + rc = device_online(&mem->dev); + if (rc < 0) + pr_warn("%s: Failed to re-online memory: %d", + __func__, rc); + } + + /* Continue processing all remaining memory blocks. */ + (*online_types)++; + return 0; +} + /* - * Try to offline and remove a memory block. Might take a long time to - * finish in case memory is still in use. Primarily useful for memory devices - * that logically unplugged all memory (so it's no longer in use) and want to - * offline + remove the memory block. + * Try to offline and remove memory. Might take a long time to finish in case + * memory is still in use. Primarily useful for memory devices that logically + * unplugged all memory (so it's no longer in use) and want to offline + remove + * that memory. */ int offline_and_remove_memory(int nid, u64 start, u64 size) { - struct memory_block *mem; - int rc = -EINVAL; + const unsigned long mb_count = size / memory_block_size_bytes(); + uint8_t *online_types, *tmp; + int rc; if (!IS_ALIGNED(start, memory_block_size_bytes()) || - size != memory_block_size_bytes()) - return rc; + !IS_ALIGNED(size, memory_block_size_bytes()) || !size) + return -EINVAL; + + /* + * We'll remember the old online type of each memory block, so we can + * try to revert whatever we did when offlining one memory block fails + * after offlining some others succeeded. + */ + online_types = kmalloc_array(mb_count, sizeof(*online_types), + GFP_KERNEL); + if (!online_types) + return -ENOMEM; + /* + * Initialize all states to MMOP_OFFLINE, so when we abort processing in + * try_offline_memory_block(), we'll skip all unprocessed blocks in + * try_reonline_memory_block(). + */ + memset(online_types, MMOP_OFFLINE, mb_count); lock_device_hotplug(); - mem = find_memory_block(__pfn_to_section(PFN_DOWN(start))); - if (mem) - rc = device_offline(&mem->dev); - /* Ignore if the device is already offline. */ - if (rc > 0) - rc = 0; + + tmp = online_types; + rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block); /* - * In case we succeeded to offline the memory block, remove it. + * In case we succeeded to offline all memory, remove it. * This cannot fail as it cannot get onlined in the meantime. */ if (!rc) { rc = try_remove_memory(nid, start, size); - WARN_ON_ONCE(rc); + if (rc) + pr_err("%s: Failed to remove memory: %d", __func__, rc); + } + + /* + * Rollback what we did. While memory onlining might theoretically fail + * (nacked by a notifier), it barely ever happens. + */ + if (rc) { + tmp = online_types; + walk_memory_blocks(start, size, &tmp, + try_reonline_memory_block); } unlock_device_hotplug(); + kfree(online_types); return rc; } EXPORT_SYMBOL_GPL(offline_and_remove_memory); -- GitLab From 269ac9389db4854f7b05c4749ff051763e7578d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:14 +0100 Subject: [PATCH 0847/1894] virtio-mem: Big Block Mode (BBM) - basic memory hotunplug Let's try to unplug completely offline big blocks first. Then, (if enabled via unplug_offline) try to offline and remove whole big blocks. No locking necessary - we can deal with concurrent onlining/offlining just fine. Note1: This is sub-optimal and might be dangerous in some environments: we could end up in an infinite loop when offlining (e.g., long-term pinnings), similar as with DIMMs. We'll introduce safe memory hotunplug via fake-offlining next, and use this basic mode only when explicitly enabled. Note2: Without ZONE_MOVABLE, memory unplug will be extremely unreliable with bigger block sizes. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-29-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 156 +++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 861149acafe5d..f1696cdb7b0cd 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -390,6 +390,12 @@ static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) _bb_id++) \ if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) +#define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \ + for (_bb_id = vm->bbm.next_bb_id - 1; \ + _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \ + _bb_id--) \ + if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) + /* * Set the state of a memory block, taking care of the state counter. */ @@ -685,6 +691,18 @@ static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) return virtio_mem_remove_memory(vm, addr, size); } +/* + * See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered + * by the big block. + */ +static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_remove_memory(vm, addr, size); +} + /* * Try offlining and removing memory from Linux. * @@ -731,6 +749,19 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, return virtio_mem_offline_and_remove_memory(vm, addr, size); } +/* + * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a + * all Linux memory blocks covered by the big block. + */ +static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_offline_and_remove_memory(vm, addr, size); +} + /* * Trigger the workqueue so the device can perform its magic. */ @@ -1928,6 +1959,129 @@ out_unlock: return rc; } +/* + * Try to offline and remove a big block from Linux and unplug it. Will fail + * with -EBUSY if some memory is busy and cannot get unplugged. + * + * Will modify the state of the memory block. Might temporarily drop the + * hotplug_mutex. + */ +static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_ADDED)) + return -EINVAL; + + rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); + if (rc) + return rc; + + rc = virtio_mem_bbm_unplug_bb(vm, bb_id); + if (rc) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + else + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + return rc; +} + +/* + * Try to remove a big block from Linux and unplug it. Will fail with + * -EBUSY if some memory is online. + * + * Will modify the state of the memory block. + */ +static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_ADDED)) + return -EINVAL; + + rc = virtio_mem_bbm_remove_bb(vm, bb_id); + if (rc) + return -EBUSY; + + rc = virtio_mem_bbm_unplug_bb(vm, bb_id); + if (rc) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + else + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + return rc; +} + +/* + * Test if a big block is completely offline. + */ +static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, + unsigned long bb_id) +{ + const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); + const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + unsigned long pfn; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; + pfn += PAGES_PER_SECTION) { + if (pfn_to_online_page(pfn)) + return false; + } + + return true; +} + +static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff) +{ + uint64_t nb_bb = diff / vm->bbm.bb_size; + uint64_t bb_id; + int rc; + + if (!nb_bb) + return 0; + + /* Try to unplug completely offline big blocks first. */ + virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { + cond_resched(); + /* + * As we're holding no locks, this check is racy as memory + * can get onlined in the meantime - but we'll fail gracefully. + */ + if (!virtio_mem_bbm_bb_is_offline(vm, bb_id)) + continue; + rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id); + if (rc == -EBUSY) + continue; + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + } + + if (!unplug_online) + return 0; + + /* Try to unplug any big blocks. */ + virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { + cond_resched(); + rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); + if (rc == -EBUSY) + continue; + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + } + + return nb_bb ? -EBUSY : 0; +} + /* * Try to unplug the requested amount of memory. */ @@ -1935,7 +2089,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) { if (vm->in_sbm) return virtio_mem_sbm_unplug_request(vm, diff); - return -EBUSY; + return virtio_mem_bbm_unplug_request(vm, diff); } /* -- GitLab From 3711387a7543f2716e52ce5a5d92e3d580423a40 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:15 +0100 Subject: [PATCH 0848/1894] virtio-mem: Big Block Mode (BBM) - safe memory hotunplug Let's add a safe mechanism to unplug memory, avoiding long/endless loops when trying to offline memory - similar to in SBM. Fake-offline all memory (via alloc_contig_range()) before trying to offline+remove it. Use this mode as default, but allow to enable the other mode explicitly (which could give better memory hotunplug guarantees in some environments). The "unsafe" mode can be enabled e.g., via virtio_mem.bbm_safe_unplug=0 on the cmdline. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-30-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 97 ++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f1696cdb7b0cd..9fc9ec4a25f56 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -37,6 +37,11 @@ module_param(bbm_block_size, ulong, 0444); MODULE_PARM_DESC(bbm_block_size, "Big Block size in bytes. Default is 0 (auto-detection)."); +static bool bbm_safe_unplug = true; +module_param(bbm_safe_unplug, bool, 0444); +MODULE_PARM_DESC(bbm_safe_unplug, + "Use a safe unplug mechanism in BBM, avoiding long/endless loops"); + /* * virtio-mem currently supports the following modes of operation: * @@ -87,6 +92,8 @@ enum virtio_mem_bbm_bb_state { VIRTIO_MEM_BBM_BB_PLUGGED, /* Plugged and added to Linux. */ VIRTIO_MEM_BBM_BB_ADDED, + /* All online parts are fake-offline, ready to remove. */ + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE, VIRTIO_MEM_BBM_BB_COUNT }; @@ -889,6 +896,32 @@ static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, } } +static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm, + unsigned long bb_id, + unsigned long pfn, + unsigned long nr_pages) +{ + /* + * When marked as "fake-offline", all online memory of this device block + * is allocated by us. Otherwise, we don't have any memory allocated. + */ + if (virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) + return; + virtio_mem_fake_offline_going_offline(pfn, nr_pages); +} + +static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm, + unsigned long bb_id, + unsigned long pfn, + unsigned long nr_pages) +{ + if (virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) + return; + virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); +} + /* * This callback will either be called synchronously from add_memory() or * asynchronously (e.g., triggered via user space). We have to be careful @@ -949,6 +982,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, vm->hotplug_active = true; if (vm->in_sbm) virtio_mem_sbm_notify_going_offline(vm, id); + else + virtio_mem_bbm_notify_going_offline(vm, id, + mhp->start_pfn, + mhp->nr_pages); break; case MEM_GOING_ONLINE: mutex_lock(&vm->hotplug_mutex); @@ -999,6 +1036,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; if (vm->in_sbm) virtio_mem_sbm_notify_cancel_offline(vm, id); + else + virtio_mem_bbm_notify_cancel_offline(vm, id, + mhp->start_pfn, + mhp->nr_pages); vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; @@ -1189,7 +1230,13 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1); } else { - do_online = true; + /* + * If the whole block is marked fake offline, keep + * everything that way. + */ + id = virtio_mem_phys_to_bb_id(vm, addr); + do_online = virtio_mem_bbm_get_bb_state(vm, id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; } if (do_online) generic_online_page(page, order); @@ -1969,15 +2016,50 @@ out_unlock: static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) { + const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); + const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn; + struct page *page; int rc; if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != VIRTIO_MEM_BBM_BB_ADDED)) return -EINVAL; + if (bbm_safe_unplug) { + /* + * Start by fake-offlining all memory. Once we marked the device + * block as fake-offline, all newly onlined memory will + * automatically be kept fake-offline. Protect from concurrent + * onlining/offlining until we have a consistent state. + */ + mutex_lock(&vm->hotplug_mutex); + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; + + rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + if (rc) { + end_pfn = pfn; + goto rollback_safe_unplug; + } + } + mutex_unlock(&vm->hotplug_mutex); + } + rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); - if (rc) + if (rc) { + if (bbm_safe_unplug) { + mutex_lock(&vm->hotplug_mutex); + goto rollback_safe_unplug; + } return rc; + } rc = virtio_mem_bbm_unplug_bb(vm, bb_id); if (rc) @@ -1987,6 +2069,17 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED); return rc; + +rollback_safe_unplug: + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; + virtio_mem_fake_online(pfn, PAGES_PER_SECTION); + } + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); + mutex_unlock(&vm->hotplug_mutex); + return rc; } /* -- GitLab From 0ab4b8901a8edda4fd1c2aded36192566d89353f Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 11 Nov 2020 09:14:48 +0800 Subject: [PATCH 0849/1894] vhost_vdpa: switch to vmemdup_user() Replace opencoded alloc and copy with vmemdup_user() Signed-off-by: Tian Tao Link: https://lore.kernel.org/r/1605057288-60400-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 29ed4173f04e6..ef688c8c0e0e6 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -245,14 +245,10 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return -EFAULT; if (vhost_vdpa_config_validate(v, &config)) return -EINVAL; - buf = kvzalloc(config.len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - if (copy_from_user(buf, c->buf, config.len)) { - kvfree(buf); - return -EFAULT; - } + buf = vmemdup_user(c->buf, config.len); + if (IS_ERR(buf)) + return PTR_ERR(buf); ops->set_config(vdpa, config.off, buf, config.len); -- GitLab From 4d10367fd411437d55850357e471d9d5f9f47e72 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 29 Nov 2020 13:54:34 +0100 Subject: [PATCH 0850/1894] vdpa: ifcvf: Use dma_set_mask_and_coherent to simplify code 'pci_set_dma_mask()' + 'pci_set_consistent_dma_mask()' can be replaced by an equivalent 'dma_set_mask_and_coherent()' which is much less verbose. While at it, fix a typo (s/confiugration/configuration) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201129125434.1462638-1-christophe.jaillet@wanadoo.fr Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/ifcvf/ifcvf_main.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 8b4028556cb66..fa1af301cf559 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -417,16 +417,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret) { - IFCVF_ERR(pdev, "No usable DMA confiugration\n"); - return ret; - } - - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - IFCVF_ERR(pdev, - "No usable coherent DMA confiugration\n"); + IFCVF_ERR(pdev, "No usable DMA configuration\n"); return ret; } -- GitLab From 29b90f92ee64f4cae2d8ef83922286567da6c2c1 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:39 +0100 Subject: [PATCH 0851/1894] vdpa: remove unnecessary 'default n' in Kconfig entries 'default n' is not necessary since it is already the default when nothing is specified. Suggested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-2-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 6caf539091e55..2c892e890b9e3 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -14,7 +14,6 @@ config VDPA_SIM select DMA_OPS select VHOST_RING select GENERIC_NET_UTILS - default n help vDPA networking device simulator which loop TX traffic back to RX. This device is used for testing, prototyping and @@ -23,7 +22,6 @@ config VDPA_SIM config IFCVF tristate "Intel IFC VF vDPA driver" depends on PCI_MSI - default n help This kernel module can drive Intel IFC VF NIC to offload virtio dataplane traffic to hardware. @@ -42,7 +40,6 @@ config MLX5_VDPA_NET tristate "vDPA driver for ConnectX devices" select MLX5_VDPA depends on MLX5_CORE - default n help VDPA network driver for ConnectX6 and newer. Provides offloading of virtio net datapath such that descriptors put on the ring will -- GitLab From cc3d42386d14176e392d61da1de05c1d87c18b93 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:40 +0100 Subject: [PATCH 0852/1894] vdpa_sim: remove unnecessary headers inclusion Some headers are not necessary, so let's remove them to do some cleaning. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-3-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 6a90fdb9cbfc6..63b85541fb5e6 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -11,16 +11,9 @@ #include #include #include -#include -#include #include #include -#include -#include -#include #include -#include -#include #include #include #include -- GitLab From 423248d60d2b655321fc49eca1545f95a1bc9d6c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 15 Dec 2020 15:42:41 +0100 Subject: [PATCH 0853/1894] vdpa_sim: remove hard-coded virtq count Add a new attribute that will define the number of virt queues to be created for the vdpasim device. Signed-off-by: Max Gurtovoy [sgarzare: replace kmalloc_array() with kcalloc()] Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-4-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 63b85541fb5e6..07ccc8609784f 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -63,7 +63,7 @@ static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | /* State of each vdpasim device */ struct vdpasim { struct vdpa_device vdpa; - struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM]; + struct vdpasim_virtqueue *vqs; struct work_struct work; /* spinlock to synchronize virtqueue state */ spinlock_t lock; @@ -73,6 +73,7 @@ struct vdpasim { u32 status; u32 generation; u64 features; + int nvqs; /* spinlock to synchronize iommu table */ spinlock_t iommu_lock; }; @@ -137,7 +138,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) { int i; - for (i = 0; i < VDPASIM_VQ_NUM; i++) + for (i = 0; i < vdpasim->nvqs; i++) vdpasim_vq_reset(&vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); @@ -343,7 +344,7 @@ static struct vdpasim *vdpasim_create(void) const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; struct device *dev; - int ret = -ENOMEM; + int i, ret = -ENOMEM; if (batch_mapping) ops = &vdpasim_net_batch_config_ops; @@ -354,6 +355,7 @@ static struct vdpasim *vdpasim_create(void) if (!vdpasim) goto err_alloc; + vdpasim->nvqs = VDPASIM_VQ_NUM; INIT_WORK(&vdpasim->work, vdpasim_work); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); @@ -364,6 +366,11 @@ static struct vdpasim *vdpasim_create(void) goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); + vdpasim->vqs = kcalloc(vdpasim->nvqs, sizeof(struct vdpasim_virtqueue), + GFP_KERNEL); + if (!vdpasim->vqs) + goto err_iommu; + vdpasim->iommu = vhost_iotlb_alloc(2048, 0); if (!vdpasim->iommu) goto err_iommu; @@ -382,8 +389,8 @@ static struct vdpasim *vdpasim_create(void) eth_random_addr(vdpasim->config.mac); } - vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); - vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); + for (i = 0; i < vdpasim->nvqs; i++) + vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; ret = vdpa_register_device(&vdpasim->vdpa); @@ -652,6 +659,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) kfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); + kfree(vdpasim->vqs); } static const struct vdpa_config_ops vdpasim_net_config_ops = { -- GitLab From 2fc0ebfa039025d88009e8f275ea8bcd177a9cd9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:42 +0100 Subject: [PATCH 0854/1894] vdpa_sim: make IOTLB entries limit configurable Some devices may require a higher limit for the number of IOTLB entries, so let's make it configurable through a module parameter. By default, it's initialized with the current limit (2048). Suggested-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-5-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 07ccc8609784f..d716bfaadb3be 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -31,6 +31,11 @@ static int batch_mapping = 1; module_param(batch_mapping, int, 0444); MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)"); + static char *macaddr; module_param(macaddr, charp, 0); MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); @@ -371,7 +376,7 @@ static struct vdpasim *vdpasim_create(void) if (!vdpasim->vqs) goto err_iommu; - vdpasim->iommu = vhost_iotlb_alloc(2048, 0); + vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0); if (!vdpasim->iommu) goto err_iommu; -- GitLab From 36a9c30630256629e62a9186793c28735ade3ffc Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:43 +0100 Subject: [PATCH 0855/1894] vdpa_sim: rename vdpasim_config_ops variables These variables store generic callbacks used by the vDPA simulator core, so we can remove the 'net' word in their names. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-6-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index d716bfaadb3be..923f29076b1b7 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -341,8 +341,8 @@ static const struct dma_map_ops vdpasim_dma_ops = { .free = vdpasim_free_coherent, }; -static const struct vdpa_config_ops vdpasim_net_config_ops; -static const struct vdpa_config_ops vdpasim_net_batch_config_ops; +static const struct vdpa_config_ops vdpasim_config_ops; +static const struct vdpa_config_ops vdpasim_batch_config_ops; static struct vdpasim *vdpasim_create(void) { @@ -352,9 +352,9 @@ static struct vdpasim *vdpasim_create(void) int i, ret = -ENOMEM; if (batch_mapping) - ops = &vdpasim_net_batch_config_ops; + ops = &vdpasim_batch_config_ops; else - ops = &vdpasim_net_config_ops; + ops = &vdpasim_config_ops; vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); if (!vdpasim) @@ -667,7 +667,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) kfree(vdpasim->vqs); } -static const struct vdpa_config_ops vdpasim_net_config_ops = { +static const struct vdpa_config_ops vdpasim_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, @@ -694,7 +694,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = { .free = vdpasim_free, }; -static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { +static const struct vdpa_config_ops vdpasim_batch_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, -- GitLab From 6c6e28fe45794054410ad8cd2770af69fbe0338d Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:44 +0100 Subject: [PATCH 0856/1894] vdpa_sim: add struct vdpasim_dev_attr for device attributes vdpasim_dev_attr will contain device specific attributes. We starting moving the number of virtqueues (i.e. nvqs) to vdpasim_dev_attr. vdpasim_create() creates a new vDPA simulator following the device attributes defined in the vdpasim_dev_attr parameter. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-7-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 923f29076b1b7..ae295dc57d7db 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -65,11 +65,16 @@ static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | (1ULL << VIRTIO_F_ACCESS_PLATFORM) | (1ULL << VIRTIO_NET_F_MAC); +struct vdpasim_dev_attr { + int nvqs; +}; + /* State of each vdpasim device */ struct vdpasim { struct vdpa_device vdpa; struct vdpasim_virtqueue *vqs; struct work_struct work; + struct vdpasim_dev_attr dev_attr; /* spinlock to synchronize virtqueue state */ spinlock_t lock; struct virtio_net_config config; @@ -78,7 +83,6 @@ struct vdpasim { u32 status; u32 generation; u64 features; - int nvqs; /* spinlock to synchronize iommu table */ spinlock_t iommu_lock; }; @@ -143,7 +147,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) { int i; - for (i = 0; i < vdpasim->nvqs; i++) + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) vdpasim_vq_reset(&vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); @@ -344,7 +348,7 @@ static const struct dma_map_ops vdpasim_dma_ops = { static const struct vdpa_config_ops vdpasim_config_ops; static const struct vdpa_config_ops vdpasim_batch_config_ops; -static struct vdpasim *vdpasim_create(void) +static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) { const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; @@ -356,11 +360,12 @@ static struct vdpasim *vdpasim_create(void) else ops = &vdpasim_config_ops; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, + dev_attr->nvqs); if (!vdpasim) goto err_alloc; - vdpasim->nvqs = VDPASIM_VQ_NUM; + vdpasim->dev_attr = *dev_attr; INIT_WORK(&vdpasim->work, vdpasim_work); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); @@ -371,7 +376,7 @@ static struct vdpasim *vdpasim_create(void) goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); - vdpasim->vqs = kcalloc(vdpasim->nvqs, sizeof(struct vdpasim_virtqueue), + vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue), GFP_KERNEL); if (!vdpasim->vqs) goto err_iommu; @@ -394,7 +399,7 @@ static struct vdpasim *vdpasim_create(void) eth_random_addr(vdpasim->config.mac); } - for (i = 0; i < vdpasim->nvqs; i++) + for (i = 0; i < dev_attr->nvqs; i++) vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; @@ -722,7 +727,11 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { static int __init vdpasim_dev_init(void) { - vdpasim_dev = vdpasim_create(); + struct vdpasim_dev_attr dev_attr = {}; + + dev_attr.nvqs = VDPASIM_VQ_NUM; + + vdpasim_dev = vdpasim_create(&dev_attr); if (!IS_ERR(vdpasim_dev)) return 0; -- GitLab From 2f8f461888052f1b92ebe6419514355538f7cd68 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:45 +0100 Subject: [PATCH 0857/1894] vdpa_sim: add device id field in vdpasim_dev_attr Remove VDPASIM_DEVICE_ID macro and add 'id' field in vdpasim_dev_attr, that will be returned by vdpasim_get_device_id(). Use VIRTIO_ID_NET for vDPA-net simulator device id. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-8-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index ae295dc57d7db..3a8e57ac7762a 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -55,7 +55,6 @@ struct vdpasim_virtqueue { #define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_MAX 256 -#define VDPASIM_DEVICE_ID 0x1 #define VDPASIM_VENDOR_ID 0 #define VDPASIM_VQ_NUM 0x2 #define VDPASIM_NAME "vdpasim-netdev" @@ -67,6 +66,7 @@ static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | struct vdpasim_dev_attr { int nvqs; + u32 id; }; /* State of each vdpasim device */ @@ -546,7 +546,9 @@ static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) { - return VDPASIM_DEVICE_ID; + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.id; } static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) @@ -729,6 +731,7 @@ static int __init vdpasim_dev_init(void) { struct vdpasim_dev_attr dev_attr = {}; + dev_attr.id = VIRTIO_ID_NET; dev_attr.nvqs = VDPASIM_VQ_NUM; vdpasim_dev = vdpasim_create(&dev_attr); -- GitLab From 011c35bac5ef25f701d9a79bc731782889c0ff58 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:46 +0100 Subject: [PATCH 0858/1894] vdpa_sim: add supported_features field in vdpasim_dev_attr Introduce a new VDPASIM_FEATURES macro with the generic features supported by the vDPA simulator, and VDPASIM_NET_FEATURES macro with vDPA-net features. Add 'supported_features' field in vdpasim_dev_attr, to allow devices to specify their features. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-9-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 3a8e57ac7762a..6cf3c78b0e33c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -59,12 +59,15 @@ struct vdpasim_virtqueue { #define VDPASIM_VQ_NUM 0x2 #define VDPASIM_NAME "vdpasim-netdev" -static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_ACCESS_PLATFORM) | - (1ULL << VIRTIO_NET_F_MAC); +#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM)) + +#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_NET_F_MAC)) struct vdpasim_dev_attr { + u64 supported_features; int nvqs; u32 id; }; @@ -122,7 +125,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - vringh_init_iotlb(&vq->vring, vdpasim_features, + vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, VDPASIM_QUEUE_MAX, false, (struct vring_desc *)(uintptr_t)vq->desc_addr, (struct vring_avail *) @@ -131,7 +134,8 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) (uintptr_t)vq->device_addr); } -static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) +static void vdpasim_vq_reset(struct vdpasim *vdpasim, + struct vdpasim_virtqueue *vq) { vq->ready = false; vq->desc_addr = 0; @@ -139,8 +143,8 @@ static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) vq->device_addr = 0; vq->cb = NULL; vq->private = NULL; - vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, - false, NULL, NULL, NULL); + vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, + VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL); } static void vdpasim_reset(struct vdpasim *vdpasim) @@ -148,7 +152,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) int i; for (i = 0; i < vdpasim->dev_attr.nvqs; i++) - vdpasim_vq_reset(&vdpasim->vqs[i]); + vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); @@ -508,7 +512,9 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) static u64 vdpasim_get_features(struct vdpa_device *vdpa) { - return vdpasim_features; + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.supported_features; } static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) @@ -520,7 +526,7 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) return -EINVAL; - vdpasim->features = features & vdpasim_features; + vdpasim->features = features & vdpasim->dev_attr.supported_features; /* We generally only know whether guest is using the legacy interface * here, so generally that's the earliest we can set config fields. @@ -732,6 +738,7 @@ static int __init vdpasim_dev_init(void) struct vdpasim_dev_attr dev_attr = {}; dev_attr.id = VIRTIO_ID_NET; + dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; vdpasim_dev = vdpasim_create(&dev_attr); -- GitLab From a13b5918fdd0dd7987aa5f3c202f68ed6ad468bb Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:47 +0100 Subject: [PATCH 0859/1894] vdpa_sim: add work_fn in vdpasim_dev_attr Rename vdpasim_work() in vdpasim_net_work() and add it to the vdpasim_dev_attr structure. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-10-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 6cf3c78b0e33c..d356929f9dd38 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -70,6 +70,8 @@ struct vdpasim_dev_attr { u64 supported_features; int nvqs; u32 id; + + work_func_t work_fn; }; /* State of each vdpasim device */ @@ -163,7 +165,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) ++vdpasim->generation; } -static void vdpasim_work(struct work_struct *work) +static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); @@ -370,7 +372,7 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) goto err_alloc; vdpasim->dev_attr = *dev_attr; - INIT_WORK(&vdpasim->work, vdpasim_work); + INIT_WORK(&vdpasim->work, dev_attr->work_fn); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); @@ -740,6 +742,7 @@ static int __init vdpasim_dev_init(void) dev_attr.id = VIRTIO_ID_NET; dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; + dev_attr.work_fn = vdpasim_net_work; vdpasim_dev = vdpasim_create(&dev_attr); -- GitLab From cf1a3b35382c10ce315c32bd2b3d7789897fbe13 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:48 +0100 Subject: [PATCH 0860/1894] vdpa_sim: store parsed MAC address in a buffer As preparation for the next patches, we store the MAC address, parsed during the vdpasim_create(), in a buffer that will be used to fill 'config' together with other configurations. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-11-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index d356929f9dd38..ccb40501cbd7c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -40,6 +40,8 @@ static char *macaddr; module_param(macaddr, charp, 0); MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); +u8 macaddr_buf[ETH_ALEN]; + struct vdpasim_virtqueue { struct vringh vring; struct vringh_kiov iov; @@ -396,13 +398,13 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) goto err_iommu; if (macaddr) { - mac_pton(macaddr, vdpasim->config.mac); - if (!is_valid_ether_addr(vdpasim->config.mac)) { + mac_pton(macaddr, macaddr_buf); + if (!is_valid_ether_addr(macaddr_buf)) { ret = -EADDRNOTAVAIL; goto err_iommu; } } else { - eth_random_addr(vdpasim->config.mac); + eth_random_addr(macaddr_buf); } for (i = 0; i < dev_attr->nvqs; i++) @@ -538,6 +540,8 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) config->mtu = cpu_to_vdpasim16(vdpasim, 1500); config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(config->mac, macaddr_buf, ETH_ALEN); + return 0; } -- GitLab From f37cbbc65178e0a45823d281d290c4c02da9631c Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:49 +0100 Subject: [PATCH 0861/1894] vdpa_sim: make 'config' generic and usable for any device type Add new 'config_size' attribute in 'vdpasim_dev_attr' and allocates 'config' dynamically to support any device types. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-12-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index ccb40501cbd7c..4a0a6cadb9ff7 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -70,6 +70,7 @@ struct vdpasim_virtqueue { struct vdpasim_dev_attr { u64 supported_features; + size_t config_size; int nvqs; u32 id; @@ -84,7 +85,8 @@ struct vdpasim { struct vdpasim_dev_attr dev_attr; /* spinlock to synchronize virtqueue state */ spinlock_t lock; - struct virtio_net_config config; + /* virtio config according to device type */ + void *config; struct vhost_iotlb *iommu; void *buffer; u32 status; @@ -384,6 +386,10 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); + vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL); + if (!vdpasim->config) + goto err_iommu; + vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue), GFP_KERNEL); if (!vdpasim->vqs) @@ -524,7 +530,8 @@ static u64 vdpasim_get_features(struct vdpa_device *vdpa) static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct virtio_net_config *config = &vdpasim->config; + struct virtio_net_config *config = + (struct virtio_net_config *)vdpasim->config; /* DMA mapping must be done by driver */ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) @@ -596,8 +603,8 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - if (offset + len < sizeof(struct virtio_net_config)) - memcpy(buf, (u8 *)&vdpasim->config + offset, len); + if (offset + len < vdpasim->dev_attr.config_size) + memcpy(buf, vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, @@ -684,6 +691,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); kfree(vdpasim->vqs); + kfree(vdpasim->config); } static const struct vdpa_config_ops vdpasim_config_ops = { @@ -746,6 +754,7 @@ static int __init vdpasim_dev_init(void) dev_attr.id = VIRTIO_ID_NET; dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; + dev_attr.config_size = sizeof(struct virtio_net_config); dev_attr.work_fn = vdpasim_net_work; vdpasim_dev = vdpasim_create(&dev_attr); -- GitLab From 65b709586e222fa6ffd4166ac7fdb5d5dad113ee Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:50 +0100 Subject: [PATCH 0862/1894] vdpa_sim: add get_config callback in vdpasim_dev_attr The get_config callback can be used by the device to fill the config structure. The callback will be invoked in vdpasim_get_config() before copying bytes into caller buffer. Move vDPA-net config updates from vdpasim_set_features() in the new vdpasim_net_get_config() callback. This is safe since in vdpa_get_config() we already check that .set_features() callback is called before .get_config(). Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-13-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 35 +++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 4a0a6cadb9ff7..5eadcd19ab6f6 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -68,6 +68,8 @@ struct vdpasim_virtqueue { #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ (1ULL << VIRTIO_NET_F_MAC)) +struct vdpasim; + struct vdpasim_dev_attr { u64 supported_features; size_t config_size; @@ -75,6 +77,7 @@ struct vdpasim_dev_attr { u32 id; work_func_t work_fn; + void (*get_config)(struct vdpasim *vdpasim, void *config); }; /* State of each vdpasim device */ @@ -530,8 +533,6 @@ static u64 vdpasim_get_features(struct vdpa_device *vdpa) static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct virtio_net_config *config = - (struct virtio_net_config *)vdpasim->config; /* DMA mapping must be done by driver */ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) @@ -539,16 +540,6 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) vdpasim->features = features & vdpasim->dev_attr.supported_features; - /* We generally only know whether guest is using the legacy interface - * here, so generally that's the earliest we can set config fields. - * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which - * implies VIRTIO_F_VERSION_1, but let's not try to be clever here. - */ - - config->mtu = cpu_to_vdpasim16(vdpasim, 1500); - config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); - memcpy(config->mac, macaddr_buf, ETH_ALEN); - return 0; } @@ -603,8 +594,13 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - if (offset + len < vdpasim->dev_attr.config_size) - memcpy(buf, vdpasim->config + offset, len); + if (offset + len > vdpasim->dev_attr.config_size) + return; + + if (vdpasim->dev_attr.get_config) + vdpasim->dev_attr.get_config(vdpasim, vdpasim->config); + + memcpy(buf, vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, @@ -747,6 +743,16 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .free = vdpasim_free, }; +static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_net_config *net_config = + (struct virtio_net_config *)config; + + net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + static int __init vdpasim_dev_init(void) { struct vdpasim_dev_attr dev_attr = {}; @@ -755,6 +761,7 @@ static int __init vdpasim_dev_init(void) dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; dev_attr.config_size = sizeof(struct virtio_net_config); + dev_attr.get_config = vdpasim_net_get_config; dev_attr.work_fn = vdpasim_net_work; vdpasim_dev = vdpasim_create(&dev_attr); -- GitLab From c124a95e304bc5d37144e2fff6e52bb904d41810 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:51 +0100 Subject: [PATCH 0863/1894] vdpa_sim: add set_config callback in vdpasim_dev_attr The set_config callback can be used by the device to parse the config structure modified by the driver. The callback will be invoked, if set, in vdpasim_set_config() after copying bytes from caller buffer into vdpasim->config buffer. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-14-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 5eadcd19ab6f6..e219aa852ef89 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -78,6 +78,7 @@ struct vdpasim_dev_attr { work_func_t work_fn; void (*get_config)(struct vdpasim *vdpasim, void *config); + void (*set_config)(struct vdpasim *vdpasim, const void *config); }; /* State of each vdpasim device */ @@ -606,7 +607,15 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, const void *buf, unsigned int len) { - /* No writable config supportted by vdpasim */ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (offset + len > vdpasim->dev_attr.config_size) + return; + + memcpy(vdpasim->config + offset, buf, len); + + if (vdpasim->dev_attr.set_config) + vdpasim->dev_attr.set_config(vdpasim, vdpasim->config); } static u32 vdpasim_get_generation(struct vdpa_device *vdpa) -- GitLab From b240491b7a48028fb67e5377ffd1be21e9260c4e Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:52 +0100 Subject: [PATCH 0864/1894] vdpa_sim: set vringh notify callback Instead of calling the vq callback directly, we can leverage the vringh_notify() function, adding vdpasim_vq_notify() and setting it in the vringh notify callback. Suggested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-15-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index e219aa852ef89..19ff5e3527827 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -131,6 +131,17 @@ static struct vdpasim *dev_to_sim(struct device *dev) return vdpa_to_sim(vdpa); } +static void vdpasim_vq_notify(struct vringh *vring) +{ + struct vdpasim_virtqueue *vq = + container_of(vring, struct vdpasim_virtqueue, vring); + + if (!vq->cb) + return; + + vq->cb(vq->private); +} + static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; @@ -142,6 +153,8 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) (uintptr_t)vq->driver_addr, (struct vring_used *) (uintptr_t)vq->device_addr); + + vq->vring.notify = vdpasim_vq_notify; } static void vdpasim_vq_reset(struct vdpasim *vdpasim, @@ -155,6 +168,8 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim, vq->private = NULL; vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL); + + vq->vring.notify = NULL; } static void vdpasim_reset(struct vdpasim *vdpasim) @@ -231,10 +246,10 @@ static void vdpasim_net_work(struct work_struct *work) smp_wmb(); local_bh_disable(); - if (txq->cb) - txq->cb(txq->private); - if (rxq->cb) - rxq->cb(rxq->private); + if (vringh_need_notify_iotlb(&txq->vring) > 0) + vringh_notify(&txq->vring); + if (vringh_need_notify_iotlb(&rxq->vring) > 0) + vringh_notify(&rxq->vring); local_bh_enable(); if (++pkts > 4) { -- GitLab From 165be1f80b8807687f7426d3f36f1031d633e979 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:53 +0100 Subject: [PATCH 0865/1894] vdpa_sim: use kvmalloc to allocate vdpasim->buffer The next patch will make the buffer size configurable from each device. Since the buffer could be larger than a page, we use kvmalloc() instead of kmalloc(). Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-16-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 19ff5e3527827..87529899033ac 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -418,7 +418,7 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) if (!vdpasim->iommu) goto err_iommu; - vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + vdpasim->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL); if (!vdpasim->buffer) goto err_iommu; @@ -707,7 +707,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) struct vdpasim *vdpasim = vdpa_to_sim(vdpa); cancel_work_sync(&vdpasim->work); - kfree(vdpasim->buffer); + kvfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); kfree(vdpasim->vqs); -- GitLab From da7af6967c6e9815f8da60a8db1d0fe35b8e97b9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:54 +0100 Subject: [PATCH 0866/1894] vdpa_sim: make vdpasim->buffer size configurable Allow each device to specify the size of the buffer allocated in vdpa_sim. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-17-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 87529899033ac..60e45db29b158 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -73,6 +73,7 @@ struct vdpasim; struct vdpasim_dev_attr { u64 supported_features; size_t config_size; + size_t buffer_size; int nvqs; u32 id; @@ -418,7 +419,7 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) if (!vdpasim->iommu) goto err_iommu; - vdpasim->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL); + vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); if (!vdpasim->buffer) goto err_iommu; @@ -787,6 +788,7 @@ static int __init vdpasim_dev_init(void) dev_attr.config_size = sizeof(struct virtio_net_config); dev_attr.get_config = vdpasim_net_get_config; dev_attr.work_fn = vdpasim_net_work; + dev_attr.buffer_size = PAGE_SIZE; vdpasim_dev = vdpasim_create(&dev_attr); -- GitLab From 275900dfa17c32f0f52b460e1fbd769cf694ecd3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:55 +0100 Subject: [PATCH 0867/1894] vdpa_sim: split vdpasim_virtqueue's iov field in out_iov and in_iov vringh_getdesc_iotlb() manages 2 iovs for writable and readable descriptors. This is very useful for the block device, where for each request we have both types of descriptor. Let's split the vdpasim_virtqueue's iov field in out_iov and in_iov to use them with vringh_getdesc_iotlb(). We are using VIRTIO terminology for "out" (readable by the device) and "in" (writable by the device) descriptors. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-18-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 60e45db29b158..875e42390a134 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -44,7 +44,8 @@ u8 macaddr_buf[ETH_ALEN]; struct vdpasim_virtqueue { struct vringh vring; - struct vringh_kiov iov; + struct vringh_kiov in_iov; + struct vringh_kiov out_iov; unsigned short head; bool ready; u64 desc_addr; @@ -210,12 +211,12 @@ static void vdpasim_net_work(struct work_struct *work) while (true) { total_write = 0; - err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, + err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, &txq->head, GFP_ATOMIC); if (err <= 0) break; - err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, &rxq->head, GFP_ATOMIC); if (err <= 0) { vringh_complete_iotlb(&txq->vring, txq->head, 0); @@ -223,13 +224,13 @@ static void vdpasim_net_work(struct work_struct *work) } while (true) { - read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, vdpasim->buffer, PAGE_SIZE); if (read <= 0) break; - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, vdpasim->buffer, read); if (write <= 0) break; -- GitLab From db1e8bb6c63a77b74b0c6b49662fc50d49d5f90b Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 15 Dec 2020 15:42:56 +0100 Subject: [PATCH 0868/1894] vdpa: split vdpasim to core and net modules Introduce new vdpa_sim_net and vdpa_sim (core) drivers. This is a preparation for adding a vdpa simulator module for block devices. Signed-off-by: Max Gurtovoy [sgarzare: various cleanups/fixes] Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-19-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 13 +- drivers/vdpa/vdpa_sim/Makefile | 1 + drivers/vdpa/vdpa_sim/vdpa_sim.c | 221 +-------------------------- drivers/vdpa/vdpa_sim/vdpa_sim.h | 105 +++++++++++++ drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 177 +++++++++++++++++++++ 5 files changed, 298 insertions(+), 219 deletions(-) create mode 100644 drivers/vdpa/vdpa_sim/vdpa_sim.h create mode 100644 drivers/vdpa/vdpa_sim/vdpa_sim_net.c diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 2c892e890b9e3..92a6396f8a738 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -9,15 +9,20 @@ menuconfig VDPA if VDPA config VDPA_SIM - tristate "vDPA device simulator" + tristate "vDPA device simulator core" depends on RUNTIME_TESTING_MENU && HAS_DMA select DMA_OPS select VHOST_RING + help + Enable this module to support vDPA device simulators. These devices + are used for testing, prototyping and development of vDPA. + +config VDPA_SIM_NET + tristate "vDPA simulator for networking device" + depends on VDPA_SIM select GENERIC_NET_UTILS help - vDPA networking device simulator which loop TX traffic back - to RX. This device is used for testing, prototyping and - development of vDPA. + vDPA networking device simulator which loops TX traffic back to RX. config IFCVF tristate "Intel IFC VF vDPA driver" diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile index b40278f65e04f..79d4536d347ec 100644 --- a/drivers/vdpa/vdpa_sim/Makefile +++ b/drivers/vdpa/vdpa_sim/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o +obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 875e42390a134..b3fcc67bfdf0c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * VDPA networking device simulator. + * VDPA device simulator core. * * Copyright (c) 2020, Red Hat Inc. All rights reserved. * Author: Jason Wang @@ -14,17 +14,15 @@ #include #include #include -#include #include #include -#include #include -#include -#include + +#include "vdpa_sim.h" #define DRV_VERSION "0.1" #define DRV_AUTHOR "Jason Wang " -#define DRV_DESC "vDPA Device Simulator" +#define DRV_DESC "vDPA Device Simulator core" #define DRV_LICENSE "GPL v2" static int batch_mapping = 1; @@ -36,90 +34,9 @@ module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)"); -static char *macaddr; -module_param(macaddr, charp, 0); -MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); - -u8 macaddr_buf[ETH_ALEN]; - -struct vdpasim_virtqueue { - struct vringh vring; - struct vringh_kiov in_iov; - struct vringh_kiov out_iov; - unsigned short head; - bool ready; - u64 desc_addr; - u64 device_addr; - u64 driver_addr; - u32 num; - void *private; - irqreturn_t (*cb)(void *data); -}; - #define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_MAX 256 #define VDPASIM_VENDOR_ID 0 -#define VDPASIM_VQ_NUM 0x2 -#define VDPASIM_NAME "vdpasim-netdev" - -#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ - (1ULL << VIRTIO_F_VERSION_1) | \ - (1ULL << VIRTIO_F_ACCESS_PLATFORM)) - -#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ - (1ULL << VIRTIO_NET_F_MAC)) - -struct vdpasim; - -struct vdpasim_dev_attr { - u64 supported_features; - size_t config_size; - size_t buffer_size; - int nvqs; - u32 id; - - work_func_t work_fn; - void (*get_config)(struct vdpasim *vdpasim, void *config); - void (*set_config)(struct vdpasim *vdpasim, const void *config); -}; - -/* State of each vdpasim device */ -struct vdpasim { - struct vdpa_device vdpa; - struct vdpasim_virtqueue *vqs; - struct work_struct work; - struct vdpasim_dev_attr dev_attr; - /* spinlock to synchronize virtqueue state */ - spinlock_t lock; - /* virtio config according to device type */ - void *config; - struct vhost_iotlb *iommu; - void *buffer; - u32 status; - u32 generation; - u64 features; - /* spinlock to synchronize iommu table */ - spinlock_t iommu_lock; -}; - -/* TODO: cross-endian support */ -static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) -{ - return virtio_legacy_is_little_endian() || - (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); -} - -static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) -{ - return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); -} - -static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) -{ - return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); -} - -static struct vdpasim *vdpasim_dev; static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) { @@ -190,80 +107,6 @@ static void vdpasim_reset(struct vdpasim *vdpasim) ++vdpasim->generation; } -static void vdpasim_net_work(struct work_struct *work) -{ - struct vdpasim *vdpasim = container_of(work, struct - vdpasim, work); - struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; - struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; - ssize_t read, write; - size_t total_write; - int pkts = 0; - int err; - - spin_lock(&vdpasim->lock); - - if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) - goto out; - - if (!txq->ready || !rxq->ready) - goto out; - - while (true) { - total_write = 0; - err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, - &txq->head, GFP_ATOMIC); - if (err <= 0) - break; - - err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, - &rxq->head, GFP_ATOMIC); - if (err <= 0) { - vringh_complete_iotlb(&txq->vring, txq->head, 0); - break; - } - - while (true) { - read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, - vdpasim->buffer, - PAGE_SIZE); - if (read <= 0) - break; - - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, - vdpasim->buffer, read); - if (write <= 0) - break; - - total_write += write; - } - - /* Make sure data is wrote before advancing index */ - smp_wmb(); - - vringh_complete_iotlb(&txq->vring, txq->head, 0); - vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); - - /* Make sure used is visible before rasing the interrupt. */ - smp_wmb(); - - local_bh_disable(); - if (vringh_need_notify_iotlb(&txq->vring) > 0) - vringh_notify(&txq->vring); - if (vringh_need_notify_iotlb(&rxq->vring) > 0) - vringh_notify(&rxq->vring); - local_bh_enable(); - - if (++pkts > 4) { - schedule_work(&vdpasim->work); - goto out; - } - } - -out: - spin_unlock(&vdpasim->lock); -} - static int dir_to_perm(enum dma_data_direction dir) { int perm = -EFAULT; @@ -379,7 +222,7 @@ static const struct dma_map_ops vdpasim_dma_ops = { static const struct vdpa_config_ops vdpasim_config_ops; static const struct vdpa_config_ops vdpasim_batch_config_ops; -static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) { const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; @@ -424,23 +267,10 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) if (!vdpasim->buffer) goto err_iommu; - if (macaddr) { - mac_pton(macaddr, macaddr_buf); - if (!is_valid_ether_addr(macaddr_buf)) { - ret = -EADDRNOTAVAIL; - goto err_iommu; - } - } else { - eth_random_addr(macaddr_buf); - } - for (i = 0; i < dev_attr->nvqs; i++) vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; - ret = vdpa_register_device(&vdpasim->vdpa); - if (ret) - goto err_iommu; return vdpasim; @@ -449,6 +279,7 @@ err_iommu: err_alloc: return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(vdpasim_create); static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, u64 desc_area, u64 driver_area, @@ -769,46 +600,6 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .free = vdpasim_free, }; -static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) -{ - struct virtio_net_config *net_config = - (struct virtio_net_config *)config; - - net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); - net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); - memcpy(net_config->mac, macaddr_buf, ETH_ALEN); -} - -static int __init vdpasim_dev_init(void) -{ - struct vdpasim_dev_attr dev_attr = {}; - - dev_attr.id = VIRTIO_ID_NET; - dev_attr.supported_features = VDPASIM_NET_FEATURES; - dev_attr.nvqs = VDPASIM_VQ_NUM; - dev_attr.config_size = sizeof(struct virtio_net_config); - dev_attr.get_config = vdpasim_net_get_config; - dev_attr.work_fn = vdpasim_net_work; - dev_attr.buffer_size = PAGE_SIZE; - - vdpasim_dev = vdpasim_create(&dev_attr); - - if (!IS_ERR(vdpasim_dev)) - return 0; - - return PTR_ERR(vdpasim_dev); -} - -static void __exit vdpasim_dev_exit(void) -{ - struct vdpa_device *vdpa = &vdpasim_dev->vdpa; - - vdpa_unregister_device(vdpa); -} - -module_init(vdpasim_dev_init) -module_exit(vdpasim_dev_exit) - MODULE_VERSION(DRV_VERSION); MODULE_LICENSE(DRV_LICENSE); MODULE_AUTHOR(DRV_AUTHOR); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h new file mode 100644 index 0000000000000..b02142293d5bc --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + */ + +#ifndef _VDPA_SIM_H +#define _VDPA_SIM_H + +#include +#include +#include +#include +#include + +#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM)) + +struct vdpasim; + +struct vdpasim_virtqueue { + struct vringh vring; + struct vringh_kiov in_iov; + struct vringh_kiov out_iov; + unsigned short head; + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num; + void *private; + irqreturn_t (*cb)(void *data); +}; + +struct vdpasim_dev_attr { + u64 supported_features; + size_t config_size; + size_t buffer_size; + int nvqs; + u32 id; + + work_func_t work_fn; + void (*get_config)(struct vdpasim *vdpasim, void *config); + void (*set_config)(struct vdpasim *vdpasim, const void *config); +}; + +/* State of each vdpasim device */ +struct vdpasim { + struct vdpa_device vdpa; + struct vdpasim_virtqueue *vqs; + struct work_struct work; + struct vdpasim_dev_attr dev_attr; + /* spinlock to synchronize virtqueue state */ + spinlock_t lock; + /* virtio config according to device type */ + void *config; + struct vhost_iotlb *iommu; + void *buffer; + u32 status; + u32 generation; + u64 features; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; +}; + +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr); + +/* TODO: cross-endian support */ +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) +{ + return virtio_legacy_is_little_endian() || + (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) +{ + return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) +{ + return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val) +{ + return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val) +{ + return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val) +{ + return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val) +{ + return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val); +} + +#endif diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c new file mode 100644 index 0000000000000..c10b6981fdabf --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA simulator for networking device. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vdpa_sim.h" + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Jason Wang " +#define DRV_DESC "vDPA Device Simulator for networking device" +#define DRV_LICENSE "GPL v2" + +#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_NET_F_MAC)) + +#define VDPASIM_NET_VQ_NUM 2 + +static char *macaddr; +module_param(macaddr, charp, 0); +MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); + +u8 macaddr_buf[ETH_ALEN]; + +static struct vdpasim *vdpasim_net_dev; + +static void vdpasim_net_work(struct work_struct *work) +{ + struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); + struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; + struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; + ssize_t read, write; + size_t total_write; + int pkts = 0; + int err; + + spin_lock(&vdpasim->lock); + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + if (!txq->ready || !rxq->ready) + goto out; + + while (true) { + total_write = 0; + err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, + &txq->head, GFP_ATOMIC); + if (err <= 0) + break; + + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, + &rxq->head, GFP_ATOMIC); + if (err <= 0) { + vringh_complete_iotlb(&txq->vring, txq->head, 0); + break; + } + + while (true) { + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, + vdpasim->buffer, + PAGE_SIZE); + if (read <= 0) + break; + + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, + vdpasim->buffer, read); + if (write <= 0) + break; + + total_write += write; + } + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&txq->vring, txq->head, 0); + vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (vringh_need_notify_iotlb(&txq->vring) > 0) + vringh_notify(&txq->vring); + if (vringh_need_notify_iotlb(&rxq->vring) > 0) + vringh_notify(&rxq->vring); + local_bh_enable(); + + if (++pkts > 4) { + schedule_work(&vdpasim->work); + goto out; + } + } + +out: + spin_unlock(&vdpasim->lock); +} + +static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_net_config *net_config = + (struct virtio_net_config *)config; + + net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + +static int __init vdpasim_net_init(void) +{ + struct vdpasim_dev_attr dev_attr = {}; + int ret; + + if (macaddr) { + mac_pton(macaddr, macaddr_buf); + if (!is_valid_ether_addr(macaddr_buf)) { + ret = -EADDRNOTAVAIL; + goto out; + } + } else { + eth_random_addr(macaddr_buf); + } + + dev_attr.id = VIRTIO_ID_NET; + dev_attr.supported_features = VDPASIM_NET_FEATURES; + dev_attr.nvqs = VDPASIM_NET_VQ_NUM; + dev_attr.config_size = sizeof(struct virtio_net_config); + dev_attr.get_config = vdpasim_net_get_config; + dev_attr.work_fn = vdpasim_net_work; + dev_attr.buffer_size = PAGE_SIZE; + + vdpasim_net_dev = vdpasim_create(&dev_attr); + if (IS_ERR(vdpasim_net_dev)) { + ret = PTR_ERR(vdpasim_net_dev); + goto out; + } + + ret = vdpa_register_device(&vdpasim_net_dev->vdpa); + if (ret) + goto put_dev; + + return 0; + +put_dev: + put_device(&vdpasim_net_dev->vdpa.dev); +out: + return ret; +} + +static void __exit vdpasim_net_exit(void) +{ + struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa; + + vdpa_unregister_device(vdpa); +} + +module_init(vdpasim_net_init); +module_exit(vdpasim_net_exit); + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); -- GitLab From 83ef73b27eb2363f44faf9c3ee28a3fe752cfd15 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 9 Dec 2020 16:00:04 +0200 Subject: [PATCH 0869/1894] vdpa/mlx5: Use write memory barrier after updating CQ index Make sure to put dma write memory barrier after updating CQ consumer index so the hardware knows that there are available CQE slots in the queue. Failure to do this can cause the update of the RX doorbell record to get updated before the CQ consumer index resulting in CQ overrun. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20201209140004.15892-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 1fa6fcac82992..81b932f72e103 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -464,6 +464,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) { mlx5_cq_set_ci(&mvq->cq.mcq); + + /* make sure CQ cosumer update is visible to the hardware before updating + * RX doorbell record. + */ + dma_wmb(); rx_post(&mvq->vqqp, num); if (mvq->event_cb.callback) mvq->event_cb.callback(mvq->event_cb.private); -- GitLab From 697d1549140cdcdc4cfcd0bf94e62643008972b7 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:03 +0800 Subject: [PATCH 0870/1894] tools/virtio: include asm/bug.h WARN_ON is used in drivers/vhost/vringh.c, to avoid build failure, need include asm/bug.h Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-2-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/bug.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h index b14c2c3b6b857..813baf13f62a2 100644 --- a/tools/virtio/linux/bug.h +++ b/tools/virtio/linux/bug.h @@ -2,6 +2,8 @@ #ifndef BUG_H #define BUG_H +#include + #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) #define BUILD_BUG_ON(x) -- GitLab From b9ca93bcd186ec4144df91c619f6084cdad500ec Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:04 +0800 Subject: [PATCH 0871/1894] tools/virtio: add krealloc_array krealloc_array is used in drivers/vhost/vringh.c, add it to avoid build failure. Drop WARN_ON_ONCE, because duplicated with the one in bug.h Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-3-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/kernel.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 315e85cabedab..0b493542e61a6 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr) # define unlikely(x) (__builtin_expect(!!(x), 0)) # endif +static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, gfp); +} + #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #ifdef DEBUG #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) @@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0) - #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ -- GitLab From 1a5514cbb09aaf694d26ef26fd6da5c5d495cc22 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:05 +0800 Subject: [PATCH 0872/1894] tools/virtio: add barrier for aarch64 Add barrier for aarch64 for cross compiling, and most are from Linux Kernel. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-4-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/asm/barrier.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 04d563fc9b95c..468435ed64e6b 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -16,6 +16,16 @@ # define mb() abort() # define dma_rmb() abort() # define dma_wmb() abort() +#elif defined(__aarch64__) +#define dmb(opt) asm volatile("dmb " #opt : : : "memory") +#define virt_mb() __sync_synchronize() +#define virt_rmb() dmb(ishld) +#define virt_wmb() dmb(ishst) +#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0) +/* Weak barriers should be used. If not - it's a bug */ +# define mb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() #else #error Please fill in barrier macros #endif -- GitLab From ae93d8ea0fa701e84ab9df0db9fb60ec6c80d7b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Dec 2020 17:23:00 +0300 Subject: [PATCH 0873/1894] virtio_ring: Cut and paste bugs in vring_create_virtqueue_packed() There is a copy and paste bug in the error handling of this code and it uses "ring_dma_addr" three times instead of "device_event_dma_addr" and "driver_event_dma_addr". Fixes: 1ce9e6055fa0 (" virtio_ring: introduce packed ring support") Reported-by: Robert Buhren Reported-by: Felicitas Hetzelt Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8pGRJlEzyn+04u2@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index becc776979602..924b6b85376bd 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1676,9 +1676,9 @@ err_desc_extra: err_desc_state: kfree(vq); err_vq: - vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); err_device: - vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); err_driver: vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); err_ring: -- GitLab From 411ea23a76526e6efed0b601abb603d3c981b333 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Dec 2020 17:23:16 +0300 Subject: [PATCH 0874/1894] virtio_net: Fix error code in probe() Set a negative error code intead of returning success if the MTU has been changed to something invalid. Fixes: fe36cbe0671e ("virtio_net: clear MTU when out of range") Reported-by: Robert Buhren Reported-by: Felicitas Hetzelt Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8pGVJSeeCdII1Ys@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 21b71148c5324..34bb95dd92392 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3072,6 +3072,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "device MTU appears to have changed it is now %d < %d", mtu, dev->min_mtu); + err = -EINVAL; goto free; } -- GitLab From e152d8af4220a05c9797591609151d404866beaa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Dec 2020 17:23:36 +0300 Subject: [PATCH 0875/1894] virtio_ring: Fix two use after free bugs The "vq" struct is added to the "vdev->vqs" list prematurely. If we encounter an error later in the function then the "vq" is freed, but since it is still on the list that could lead to a use after free bug. Fixes: cbeedb72b97a ("virtio_ring: allocate desc state for split ring separately") Reported-by: Robert Buhren Reported-by: Felicitas Hetzelt Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8pGaG/zkI3jk8mk@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 924b6b85376bd..71e16b53e9c18 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->num_added = 0; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed( cpu_to_le16(vq->packed.event_flags_shadow); } + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; err_desc_extra: @@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->last_used_idx = 0; vq->num_added = 0; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); -- GitLab From 2e1139d613c7fb0956e82f72a8281c0a475ad4f8 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 4 Dec 2020 16:43:30 +0800 Subject: [PATCH 0876/1894] vhost scsi: fix error return code in vhost_scsi_set_endpoint() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 25b98b64e284 ("vhost scsi: alloc cmds per vq instead of session") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1607071411-33484-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 6ff8a50966915..4ce9f00ae10e8 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (!vhost_vq_is_setup(vq)) continue; - if (vhost_scsi_setup_vq_cmds(vq, vq->num)) + ret = vhost_scsi_setup_vq_cmds(vq, vq->num); + if (ret) goto destroy_vq_cmds; } -- GitLab From 1e38f0031c3055c9c7e5ffcb3bb09c95f69614ee Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Wed, 2 Dec 2020 12:19:30 +0100 Subject: [PATCH 0877/1894] uapi: virtio_ids.h: consistent indentions Fixing the differing indentions to be consistent and properly aligned. Signed-off-by: Enrico Weigelt, metux IT consult Link: https://lore.kernel.org/r/20201202111931.31953-1-info@metux.net Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 38 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index b052355ac7a32..3cb55e5277a11 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -29,24 +29,24 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#define VIRTIO_ID_NET 1 /* virtio net */ -#define VIRTIO_ID_BLOCK 2 /* virtio block */ -#define VIRTIO_ID_CONSOLE 3 /* virtio console */ -#define VIRTIO_ID_RNG 4 /* virtio rng */ -#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ -#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ -#define VIRTIO_ID_SCSI 8 /* virtio scsi */ -#define VIRTIO_ID_9P 9 /* 9p virtio console */ -#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ -#define VIRTIO_ID_CAIF 12 /* Virtio caif */ -#define VIRTIO_ID_GPU 16 /* virtio GPU */ -#define VIRTIO_ID_INPUT 18 /* virtio input */ -#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ -#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ -#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ -#define VIRTIO_ID_MEM 24 /* virtio mem */ -#define VIRTIO_ID_FS 26 /* virtio filesystem */ -#define VIRTIO_ID_PMEM 27 /* virtio pmem */ -#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ +#define VIRTIO_ID_SCSI 8 /* virtio scsi */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ +#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ +#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #endif /* _LINUX_VIRTIO_IDS_H */ -- GitLab From be618636de4186521ffba2cbe5105e9c3481b9cb Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Wed, 2 Dec 2020 12:19:31 +0100 Subject: [PATCH 0878/1894] uapi: virtio_ids: add missing device type IDs from OASIS spec The OASIS virtio spec (1.1) defines several IDs that aren't reflected in the header yet. Fixing this by adding the missing IDs, even though they're not yet used by the kernel yet. Signed-off-by: Enrico Weigelt, metux IT consult Link: https://lore.kernel.org/r/20201202111931.31953-2-info@metux.net Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 3cb55e5277a11..bc1c0621f5edd 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -34,15 +34,21 @@ #define VIRTIO_ID_CONSOLE 3 /* virtio console */ #define VIRTIO_ID_RNG 4 /* virtio rng */ #define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */ #define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ #define VIRTIO_ID_SCSI 8 /* virtio scsi */ #define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */ +#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_MEM 24 /* virtio mem */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ -- GitLab From 476c135e321716ad7a8a5d4a19a636e2dcc50526 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Nov 2020 08:39:59 +0200 Subject: [PATCH 0879/1894] vdpa: Add missing comment for virtqueue count Add missing comment for number of virtqueue. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20201112064005.349268-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 30bc7a7223bb7..0fefeb9768777 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -42,6 +42,7 @@ struct vdpa_vq_state { * @config: the configuration ops for this device. * @index: device index * @features_valid: were features initialized? for legacy guests + * @nvqs: maximum number of supported virtqueues */ struct vdpa_device { struct device dev; -- GitLab From 418eddef050d5f6393c303a94e3173847ab85466 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Nov 2020 08:40:00 +0200 Subject: [PATCH 0880/1894] vdpa: Use simpler version of ida allocation vdpa doesn't have any specific need to define start and end range of the device index. Hence use the simper version of the ida allocator. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20201112064005.349268-3-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index a69ffc991e130..c0825650c0559 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -89,7 +89,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (!vdev) goto err; - err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); + err = ida_alloc(&vdpa_index_ida, GFP_KERNEL); if (err < 0) goto err_ida; -- GitLab From d69c6ddd019f31081cc0232fa8ad8ea1cabdf22c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 17 Dec 2020 16:34:29 -0600 Subject: [PATCH 0881/1894] dt-bindings: Fix JSON pointers The correct syntax for JSON pointers begins with a '/' after the '#'. Without a '/', the string should be interpreted as a subschema identifier. The jsonschema module currently doesn't handle subschema identifiers and incorrectly allows JSON pointers to begin without a '/'. Let's fix this before it becomes a problem when jsonschema module is fixed. Converted with: perl -p -i -e 's/yaml#definitions/yaml#\/definitions/g' `find Documentation/devicetree/bindings/ -name "*.yaml"` Cc: Maxime Ripard Cc: Jonathan Cameron Cc: Lars-Peter Clausen Cc: Daniel Thompson Cc: Jingoo Han Cc: Pavel Machek Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Andrew Lunn Cc: Florian Fainelli Cc: Greg Kroah-Hartman Cc: Mark Brown Cc: netdev@vger.kernel.org Acked-By: Vinod Koul Acked-by: Lee Jones Acked-by: Guenter Roeck Acked-by: Sebastian Reichel Acked-by: Florian Fainelli Acked-by: Mark Brown Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20201217223429.354283-1-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/arm/idle-states.yaml | 2 +- .../bus/allwinner,sun50i-a64-de2.yaml | 2 +- .../bindings/bus/baikal,bt1-axi.yaml | 2 +- .../bindings/connector/usb-connector.yaml | 10 ++--- .../devicetree/bindings/dma/dma-common.yaml | 4 +- .../devicetree/bindings/dma/dma-router.yaml | 2 +- .../devicetree/bindings/dma/ingenic,dma.yaml | 2 +- .../bindings/dma/snps,dma-spear1340.yaml | 10 ++--- .../devicetree/bindings/eeprom/at24.yaml | 4 +- .../devicetree/bindings/eeprom/at25.yaml | 4 +- .../bindings/hwmon/moortec,mr75203.yaml | 2 +- .../bindings/hwmon/sensirion,shtc1.yaml | 4 +- .../devicetree/bindings/hwmon/ti,tmp513.yaml | 2 +- .../bindings/iio/light/upisemi,us5182.yaml | 2 +- .../iio/proximity/semtech,sx9310.yaml | 6 +-- .../devicetree/bindings/input/gpio-keys.yaml | 12 +++--- .../interrupt-controller/mti,gic.yaml | 4 +- .../interrupt-controller/ti,pruss-intc.yaml | 2 +- .../interrupt-controller/ti,sci-inta.yaml | 2 +- .../bindings/leds/backlight/common.yaml | 4 +- .../devicetree/bindings/leds/common.yaml | 16 ++++---- .../devicetree/bindings/leds/leds-lp55xx.yaml | 10 ++--- .../net/allwinner,sun8i-a83t-emac.yaml | 6 +-- .../bindings/net/amlogic,meson-dwmac.yaml | 2 +- .../devicetree/bindings/net/dsa/dsa.yaml | 6 +-- .../bindings/net/ethernet-controller.yaml | 24 ++++++------ .../devicetree/bindings/net/ethernet-phy.yaml | 20 +++++----- .../bindings/net/fsl,qoriq-mc-dpmac.yaml | 2 +- .../devicetree/bindings/net/mdio.yaml | 2 +- .../bindings/net/mediatek,star-emac.yaml | 2 +- .../devicetree/bindings/net/qcom,ipa.yaml | 2 +- .../devicetree/bindings/net/snps,dwmac.yaml | 38 +++++++++---------- .../bindings/net/socionext,uniphier-ave4.yaml | 2 +- .../bindings/net/ti,cpsw-switch.yaml | 2 +- .../devicetree/bindings/net/ti,dp83867.yaml | 12 +++--- .../devicetree/bindings/net/ti,dp83869.yaml | 8 ++-- .../bindings/net/ti,k3-am654-cpsw-nuss.yaml | 4 +- .../bindings/net/wireless/qcom,ath11k.yaml | 2 +- .../devicetree/bindings/phy/ti,omap-usb2.yaml | 4 +- .../power/mediatek,power-controller.yaml | 12 +++--- .../bindings/power/supply/cw2015_battery.yaml | 2 +- .../devicetree/bindings/powerpc/sleep.yaml | 2 +- .../devicetree/bindings/serial/8250.yaml | 6 +-- .../bindings/soc/ti/k3-ringacc.yaml | 2 +- .../sound/allwinner,sun4i-a10-codec.yaml | 2 +- .../bindings/sound/st,stm32-sai.yaml | 4 +- 46 files changed, 138 insertions(+), 138 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/idle-states.yaml b/Documentation/devicetree/bindings/arm/idle-states.yaml index ea805c1e6b20a..52bce5dbb11f5 100644 --- a/Documentation/devicetree/bindings/arm/idle-states.yaml +++ b/Documentation/devicetree/bindings/arm/idle-states.yaml @@ -313,7 +313,7 @@ patternProperties: wakeup-latency-us by this duration. idle-state-name: - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string description: A string used as a descriptive name for the idle state. diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml index 0503651cd214e..863a287ebc7e2 100644 --- a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml +++ b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml @@ -34,7 +34,7 @@ properties: description: The SRAM that needs to be claimed to access the display engine bus. - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array maxItems: 1 ranges: true diff --git a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml index 0bee4694578a1..4ac78b44e45eb 100644 --- a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml +++ b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml @@ -46,7 +46,7 @@ properties: const: 1 syscon: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the Baikal-T1 System Controller DT node interrupts: diff --git a/Documentation/devicetree/bindings/connector/usb-connector.yaml b/Documentation/devicetree/bindings/connector/usb-connector.yaml index a84464b3e1f22..4286ed767a0a5 100644 --- a/Documentation/devicetree/bindings/connector/usb-connector.yaml +++ b/Documentation/devicetree/bindings/connector/usb-connector.yaml @@ -37,7 +37,7 @@ properties: description: Size of the connector, should be specified in case of non-fullsize 'usb-a-connector' or 'usb-b-connector' compatible connectors. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - mini @@ -67,7 +67,7 @@ properties: power-role: description: Determines the power role that the Type C connector will support. "dual" refers to Dual Role Port (DRP). - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - source @@ -76,7 +76,7 @@ properties: try-power-role: description: Preferred power role. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - source @@ -86,7 +86,7 @@ properties: data-role: description: Data role if Type C connector supports USB data. "dual" refers Dual Role Device (DRD). - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - host @@ -105,7 +105,7 @@ properties: Type-C Cable and Connector specification, when Power Delivery is not supported. allOf: - - $ref: /schemas/types.yaml#definitions/string + - $ref: /schemas/types.yaml#/definitions/string enum: - default - 1.5A diff --git a/Documentation/devicetree/bindings/dma/dma-common.yaml b/Documentation/devicetree/bindings/dma/dma-common.yaml index 307b499e89686..ad06d36af208e 100644 --- a/Documentation/devicetree/bindings/dma/dma-common.yaml +++ b/Documentation/devicetree/bindings/dma/dma-common.yaml @@ -38,12 +38,12 @@ properties: maxItems: 255 dma-channels: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of DMA channels supported by the controller. dma-requests: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of DMA request signals supported by the controller. diff --git a/Documentation/devicetree/bindings/dma/dma-router.yaml b/Documentation/devicetree/bindings/dma/dma-router.yaml index 4cee5667b8a8f..e72748496fd91 100644 --- a/Documentation/devicetree/bindings/dma/dma-router.yaml +++ b/Documentation/devicetree/bindings/dma/dma-router.yaml @@ -23,7 +23,7 @@ properties: pattern: "^dma-router(@.*)?$" dma-masters: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: Array of phandles to the DMA controllers the router can direct the signal to. diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml index 00f19b3cac313..6a2043721b95f 100644 --- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml +++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml @@ -48,7 +48,7 @@ properties: ingenic,reserved-channels property. ingenic,reserved-channels: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: > Bitmask of channels to reserve for devices that need a specific channel. These channels will only be assigned when explicitely diff --git a/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml index ef1d6879c158d..6b35089ac0172 100644 --- a/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml +++ b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml @@ -54,7 +54,7 @@ properties: maximum: 16 dma-masters: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Number of DMA masters supported by the controller. In case if not specified the driver will try to auto-detect this and @@ -63,7 +63,7 @@ properties: maximum: 4 chan_allocation_order: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | DMA channels allocation order specifier. Zero means ascending order (first free allocated), while one - descending (last free allocated). @@ -71,7 +71,7 @@ properties: enum: [0, 1] chan_priority: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | DMA channels priority order. Zero means ascending channels priority so the very first channel has the highest priority. While 1 means @@ -80,7 +80,7 @@ properties: enum: [0, 1] block_size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Maximum block size supported by the DMA controller. enum: [3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095] @@ -139,7 +139,7 @@ properties: default: 256 snps,dma-protection-control: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Bits one-to-one passed to the AHB HPROT[3:1] bus. Each bit setting indicates the following features: bit 0 - privileged mode, diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 6edfa705b4860..d5117c638b75c 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -131,7 +131,7 @@ properties: default: 1 read-only: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Disables writes to the eeprom. @@ -141,7 +141,7 @@ properties: Total eeprom size in bytes. no-read-rollover: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that the multi-address eeprom does not automatically roll over reads to the next slave address. Please consult the manual of diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 7449736376788..121a601db22e4 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -45,13 +45,13 @@ properties: spi-max-frequency: true pagesize: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072] description: Size of the eeprom page. size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Total eeprom size in bytes. diff --git a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml index 6f3e3c01f7172..b79f069a04c2a 100644 --- a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml +++ b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml @@ -32,7 +32,7 @@ properties: PVT controller has 5 VM (voltage monitor) sensors. vm-map defines CPU core to VM instance mapping. A value of 0xff means that VM sensor is unused. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array maxItems: 5 clocks: diff --git a/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml b/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml index c523a1beb2b7c..7d49478d9668e 100644 --- a/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml +++ b/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml @@ -29,12 +29,12 @@ properties: const: 0x70 sensirion,blocking-io: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, the driver hold the i2c bus until measurement is finished. sensirion,low-precision: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, the sensor aquire data with low precision (not recommended). The driver aquire data with high precision by default. diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml index c17e5d3ee3f14..8020d739a078e 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml @@ -61,7 +61,7 @@ properties: Array of three(TMP513) or two(TMP512) n-Factor value for each remote temperature channel. See datasheet Table 11 for n-Factor range list and value interpretation. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 maxItems: 3 items: diff --git a/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml b/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml index 4a9b2827cf7b5..de5882cb33603 100644 --- a/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml +++ b/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml @@ -45,7 +45,7 @@ properties: default: 0x16 upisemi,continuous: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: | This chip has two power modes: one-shot (chip takes one measurement and then shuts itself down) and continuous (chip takes continuous diff --git a/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml b/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml index ccfb163f3d341..5de0bb2180e62 100644 --- a/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml +++ b/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml @@ -72,7 +72,7 @@ properties: - finest semtech,startup-sensor: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2, 3] default: 0 description: @@ -81,7 +81,7 @@ properties: compensation. semtech,proxraw-strength: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 2, 4, 8] default: 2 description: @@ -89,7 +89,7 @@ properties: represent 1-1/N. semtech,avg-pos-strength: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 16, 64, 128, 256, 512, 1024, 4294967295] default: 16 description: diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml index 6966ab009fa3d..060a309ff8e7c 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.yaml +++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml @@ -34,13 +34,13 @@ patternProperties: linux,code: description: Key / Axis code to emit. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 linux,input-type: description: Specify event type this button/key generates. If not specified defaults to <1> == EV_KEY. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 default: 1 @@ -56,12 +56,12 @@ patternProperties: linux,input-value = <0xffffffff>; /* -1 */ - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 debounce-interval: description: Debouncing interval time in milliseconds. If not specified defaults to 5. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 default: 5 @@ -79,7 +79,7 @@ patternProperties: EV_ACT_ANY - both asserted and deasserted EV_ACT_ASSERTED - asserted EV_ACT_DEASSERTED - deasserted - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2] linux,can-disable: @@ -118,7 +118,7 @@ then: poll-interval: description: Poll interval time in milliseconds - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 required: - poll-interval diff --git a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml index 039e08af98bb7..91bb3c2307a79 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml @@ -42,7 +42,7 @@ properties: Specifies the list of CPU interrupt vectors to which the GIC may not route interrupts. This property is ignored if the CPU is started in EIC mode. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 1 maxItems: 6 uniqueItems: true @@ -56,7 +56,7 @@ properties: It accepts two values: the 1st is the starting interrupt and the 2nd is the size of the reserved range. If not specified, the driver will allocate the last (2 * number of VPEs in the system). - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array items: - minimum: 0 maximum: 254 diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml index 1c4c009dedd04..c2ce215501a58 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml @@ -80,7 +80,7 @@ properties: mapping is provided. ti,irqs-reserved: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | Bitmask of host interrupts between 0 and 7 (corresponding to PRUSS INTC output interrupts 2 through 9) that are not connected to the Arm interrupt diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml index b5af120114990..3d89668573e88 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml @@ -76,7 +76,7 @@ properties: "limit" specifies the limit for translation ti,unmapped-event-sources: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: Array of phandles to DMA controllers where the unmapped events originate. diff --git a/Documentation/devicetree/bindings/leds/backlight/common.yaml b/Documentation/devicetree/bindings/leds/backlight/common.yaml index bc817f77d2b10..702ba350d8698 100644 --- a/Documentation/devicetree/bindings/leds/backlight/common.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/common.yaml @@ -22,7 +22,7 @@ properties: The default brightness that should be applied to the LED by the operating system on start-up. The brightness should not exceed the brightness the LED can provide. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 max-brightness: description: @@ -31,6 +31,6 @@ properties: on the brightness apart from what the driver says, as it could happen that a LED can be made so bright that it gets damaged or causes damage due to restrictions in a specific system, such as mounting conditions. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 additionalProperties: true diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml index f1211e7045f12..b1f363747a628 100644 --- a/Documentation/devicetree/bindings/leds/common.yaml +++ b/Documentation/devicetree/bindings/leds/common.yaml @@ -27,21 +27,21 @@ properties: List of device current outputs the LED is connected to. The outputs are identified by the numbers that must be defined in the LED device binding documentation. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array function: description: LED function. Use one of the LED_FUNCTION_* prefixed definitions from the header include/dt-bindings/leds/common.h. If there is no matching LED_FUNCTION available, add a new one. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string color: description: Color of the LED. Use one of the LED_COLOR_ID_* prefixed definitions from the header include/dt-bindings/leds/common.h. If there is no matching LED_COLOR_ID available, add a new one. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 minimum: 0 maximum: 9 @@ -49,7 +49,7 @@ properties: description: Integer to be used when more than one instance of the same function is needed, differing only with an ordinal number. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 label: description: @@ -66,7 +66,7 @@ properties: produced where the LED momentarily turns off (or on). The "keep" setting will keep the LED at whatever its current state is, without producing a glitch. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - on - off @@ -77,7 +77,7 @@ properties: description: This parameter, if present, is a string defining the trigger assigned to the LED. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: # LED will act as a back-light, controlled by the framebuffer system @@ -109,7 +109,7 @@ properties: brightness and duration (in ms). The exact format is described in: Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt - $ref: /schemas/types.yaml#definitions/uint32-matrix + $ref: /schemas/types.yaml#/definitions/uint32-matrix items: minItems: 2 maxItems: 2 @@ -143,7 +143,7 @@ properties: the device tree and be referenced by a phandle and a set of phandle arguments. A length of arguments should be specified by the #trigger-source-cells property in the source node. - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array # Required properties for flash LED child nodes: flash-max-microamp: diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml b/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml index 58e974793a797..f552cd143d5be 100644 --- a/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml +++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml @@ -35,7 +35,7 @@ properties: description: I2C slave address clock-mode: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | Input clock mode enum: @@ -49,7 +49,7 @@ properties: GPIO attached to the chip's enable pin pwr-sel: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | LP8501 specific property. Power selection for output channels. enum: @@ -70,14 +70,14 @@ patternProperties: $ref: common.yaml# properties: led-cur: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | Current setting at each LED channel (mA x10, 0 if LED is not connected) minimum: 0 maximum: 255 max-cur: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: Maximun current at each LED channel. reg: @@ -97,7 +97,7 @@ patternProperties: - 8 # LED output D9 chan-name: - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string description: name of channel required: diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index c7c9ad4e3f9fc..7f2578d48e3f0 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -38,7 +38,7 @@ properties: const: stmmaceth syscon: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the device containing the EMAC or GMAC clock register @@ -114,7 +114,7 @@ allOf: then: properties: allwinner,leds-active-low: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: EPHY LEDs are active low. @@ -126,7 +126,7 @@ allOf: const: allwinner,sun8i-h3-mdio-mux mdio-parent-bus: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to EMAC MDIO. diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml index 6b057b117aa03..1f133f4a2924c 100644 --- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml @@ -60,7 +60,7 @@ allOf: - const: timing-adjustment amlogic,tx-delay-ns: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The internal RGMII TX clock delay (provided by this driver) in nanoseconds. Allowed values are 0ns, 2ns, 4ns, 6ns. diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index 8e044631bcf73..8a3494db4d8d3 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -54,7 +54,7 @@ patternProperties: description: Describes the label associated with this port, which will become the netdev name - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string link: description: @@ -62,13 +62,13 @@ patternProperties: port is used as the outgoing port towards the phandle ports. The full routing information must be given, not just the one hop routes to neighbouring switches - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array ethernet: description: Should be a phandle to a valid Ethernet device node. This host device is what the switch port is connected to - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle phy-handle: true diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index cc93063a8f39f..0965f6515f9ec 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -16,7 +16,7 @@ properties: local-mac-address: description: Specifies the MAC address that was assigned to the network device. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array items: - minItems: 6 maxItems: 6 @@ -27,20 +27,20 @@ properties: program; should be used in cases where the MAC address assigned to the device by the boot program is different from the local-mac-address property. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array items: - minItems: 6 maxItems: 6 max-frame-size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Maximum transfer unit (IEEE defined MTU), rather than the maximum frame size (there\'s contradiction in the Devicetree Specification). max-speed: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Specifies maximum speed in Mbit/s supported by the device. @@ -101,7 +101,7 @@ properties: $ref: "#/properties/phy-connection-type" phy-handle: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Specifies a reference to a node representing a PHY device. @@ -114,7 +114,7 @@ properties: deprecated: true rx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The size of the controller\'s receive fifo in bytes. This is used for components that can have configurable receive fifo sizes, @@ -129,12 +129,12 @@ properties: If this property is present then the MAC applies the RX delay. sfp: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Specifies a reference to a node representing a SFP cage. tx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The size of the controller\'s transmit fifo in bytes. This is used for components that can have configurable fifo sizes. @@ -150,7 +150,7 @@ properties: description: Specifies the PHY management type. If auto is set and fixed-link is not specified, it uses MDIO for management. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string default: auto enum: - auto @@ -198,17 +198,17 @@ properties: speed: description: Link speed. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [10, 100, 1000] full-duplex: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that full-duplex is used. When absent, half duplex is assumed. asym-pause: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that asym_pause should be enabled. diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 6dd72faebd896..2766fe45bb98b 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -78,57 +78,57 @@ properties: Maximum PHY supported speed in Mbits / seconds. broken-turn-around: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates the PHY device does not correctly release the turn around line low at end of the control phase of the MDIO transaction. enet-phy-lane-swap: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates the PHY will swap the TX/RX lanes to compensate for the board being designed with the lanes swapped. eee-broken-100tx: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-1000t: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-10gt: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-1000kx: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-10gkx4: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-10gkr: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. phy-is-integrated: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates that the PHY is integrated into the same physical package as the Ethernet MAC. If needed, muxers @@ -158,7 +158,7 @@ properties: this property is missing the delay will be skipped. sfp: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Specifies a reference to a node representing a SFP cage. diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml index 2159b7d1f5370..7f620a71a9726 100644 --- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml +++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml @@ -31,7 +31,7 @@ properties: phy-mode: true pcs-handle: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: A reference to a node representing a PCS PHY device found on the internal MDIO bus. diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml index e811e0fd851c1..08e15fb1584f3 100644 --- a/Documentation/devicetree/bindings/net/mdio.yaml +++ b/Documentation/devicetree/bindings/net/mdio.yaml @@ -70,7 +70,7 @@ patternProperties: The ID number for the device. broken-turn-around: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates the MDIO device does not correctly release the turn around line low at end of the control phase of the diff --git a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml index 0bbd598704e98..e6a5ff2082536 100644 --- a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml +++ b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml @@ -42,7 +42,7 @@ properties: - const: trans mediatek,pericfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the device containing the PERICFG register range. This is used to control the MII mode. diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 4d8464b2676d9..d0cbbcf1b0e59 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -114,7 +114,7 @@ properties: validating firwmare used by the GSI. modem-remoteproc: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: This defines the phandle to the remoteproc node representing the modem subsystem. This is requied so the IPA driver can diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 11a6fdb657c93..b2f6083f556af 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -126,7 +126,7 @@ properties: in a different mode than the PHY in order to function. snps,axi-config: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: AXI BUS Mode parameters. Phandle to a node that can contain the following properties @@ -141,7 +141,7 @@ properties: * snps,rb, rebuild INCRx Burst snps,mtl-rx-config: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Multiple RX Queues parameters. Phandle to a node that can contain the following properties @@ -164,7 +164,7 @@ properties: * snps,priority, RX queue priority (Range 0x0 to 0xF) snps,mtl-tx-config: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Multiple TX Queues parameters. Phandle to a node that can contain the following properties @@ -198,7 +198,7 @@ properties: snps,reset-active-low: deprecated: true - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that the PHY Reset is active low @@ -208,55 +208,55 @@ properties: Triplet of delays. The 1st cell is reset pre-delay in micro seconds. The 2nd cell is reset pulse in micro seconds. The 3rd cell is reset post-delay in micro seconds. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 3 maxItems: 3 snps,aal: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Use Address-Aligned Beats snps,fixed-burst: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Program the DMA to use the fixed burst mode snps,mixed-burst: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Program the DMA to use the mixed burst mode snps,force_thresh_dma_mode: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Force DMA to use the threshold mode for both tx and rx snps,force_sf_dma_mode: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Force DMA to use the Store and Forward mode for both tx and rx. This flag is ignored if force_thresh_dma_mode is set. snps,en-tx-lpi-clockgating: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Enable gating of the MAC TX clock during TX low-power mode snps,multicast-filter-bins: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of multicast filter hash bins supported by this device instance snps,perfect-filter-entries: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of perfect filter entries supported by this device instance snps,ps-speed: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Port selection speed that can be passed to the core when PCS is supported. For example, this is used in case of SGMII and @@ -307,25 +307,25 @@ allOf: snps,pbl: description: Programmable Burst Length (tx and rx) - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 4, 8] snps,txpbl: description: Tx Programmable Burst Length. If set, DMA tx will use this value rather than snps,pbl. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 4, 8] snps,rxpbl: description: Rx Programmable Burst Length. If set, DMA rx will use this value rather than snps,pbl. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 4, 8] snps,no-pbl-x8: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Don\'t multiply the pbl/txpbl/rxpbl values by 8. For core rev < 3.50, don\'t multiply the values by 4. @@ -351,7 +351,7 @@ allOf: then: properties: snps,tso: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Enables the TSO feature otherwise it will be managed by MAC HW capability register. diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index cbacc04fc9e69..8a03a24a2019b 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -64,7 +64,7 @@ properties: - const: ether # for others socionext,syscon-phy-mode: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: A phandle to syscon with one argument that configures phy mode. The argument is the ID of MAC instance. diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml index dadeb8f811c0b..07a00f53adbf9 100644 --- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml +++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml @@ -70,7 +70,7 @@ properties: pinctrl-names: true syscon: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the system control device node which provides access to efuse IO range with MAC addresses diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.yaml b/Documentation/devicetree/bindings/net/ti,dp83867.yaml index 4050a36086585..047d757e8d828 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83867.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83867.yaml @@ -47,31 +47,31 @@ properties: takes precedence. tx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Transmitt FIFO depth see dt-bindings/net/ti-dp83867.h for values rx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Receive FIFO depth see dt-bindings/net/ti-dp83867.h for values ti,clk-output-sel: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Muxing option for CLK_OUT pin. See dt-bindings/net/ti-dp83867.h for applicable values. The CLK_OUT pin can also be disabled by this property. When omitted, the PHY's default will be left as is. ti,rx-internal-delay: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h for applicable values. Required only if interface type is PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID. ti,tx-internal-delay: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h for applicable values. Required only if interface type is @@ -101,7 +101,7 @@ properties: ti,fifo-depth: deprecated: true - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h for applicable values. diff --git a/Documentation/devicetree/bindings/net/ti,dp83869.yaml b/Documentation/devicetree/bindings/net/ti,dp83869.yaml index c3235f08e3263..70a1209cb13b9 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83869.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83869.yaml @@ -44,22 +44,22 @@ properties: to a maximum value (70 ohms). tx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Transmitt FIFO depth see dt-bindings/net/ti-dp83869.h for values rx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Receive FIFO depth see dt-bindings/net/ti-dp83869.h for values ti,clk-output-sel: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Muxing option for CLK_OUT pin see dt-bindings/net/ti-dp83869.h for values. ti,op-mode: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Operational mode for the PHY. If this is not set then the operational mode is set by the straps. see dt-bindings/net/ti-dp83869.h for values diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index 227270cbf892b..c47b58f3e3f6e 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -119,12 +119,12 @@ properties: description: label associated with this port ti,mac-only: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Specifies the port works in mac-only mode. ti,syscon-efuse: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: Phandle to the system control device node which provides access to efuse IO range with MAC addresses diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index 6af9991915598..85c2f699d602c 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -136,7 +136,7 @@ properties: - const: tcl2host-status-ring qcom,rproc: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: DT entry of q6v5-wcss remoteproc driver. Phandle to a node that can contain the following properties diff --git a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml index 83d5d0aceb04e..cbbf5e8b11973 100644 --- a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml +++ b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml @@ -44,13 +44,13 @@ properties: - const: refclk syscon-phy-power: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: phandle/offset pair. Phandle to the system control module and register offset to power on/off the PHY. ctrl-module: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: (deprecated) phandle of the control module used by PHY driver to power on the PHY. Use syscon-phy-power instead. diff --git a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml index fd12bafe35487..d14cb9bac8497 100644 --- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml +++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml @@ -83,11 +83,11 @@ patternProperties: SUSBSYS clocks. mediatek,infracfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the INFRACFG register range. mediatek,smi: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the SMI register range. patternProperties: @@ -131,11 +131,11 @@ patternProperties: SUSBSYS clocks. mediatek,infracfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the INFRACFG register range. mediatek,smi: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the SMI register range. patternProperties: @@ -179,11 +179,11 @@ patternProperties: SUSBSYS clocks. mediatek,infracfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the INFRACFG register range. mediatek,smi: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the SMI register range. required: diff --git a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml index ee92e6a076ac8..5fcdf58015368 100644 --- a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml +++ b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml @@ -27,7 +27,7 @@ properties: of this binary blob is kept secret by CellWise. The only way to obtain it is to mail two batteries to a test facility of CellWise and receive back a test report with the binary blob. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array minItems: 64 maxItems: 64 diff --git a/Documentation/devicetree/bindings/powerpc/sleep.yaml b/Documentation/devicetree/bindings/powerpc/sleep.yaml index 6494c7d08b93a..1b0936a5beec3 100644 --- a/Documentation/devicetree/bindings/powerpc/sleep.yaml +++ b/Documentation/devicetree/bindings/powerpc/sleep.yaml @@ -42,6 +42,6 @@ select: true properties: sleep: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array additionalProperties: true diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index c1d4c196f005b..f54cae9ff7b28 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -126,7 +126,7 @@ properties: maxItems: 1 current-speed: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The current active speed of the UART. reg-offset: @@ -154,7 +154,7 @@ properties: Set to indicate that the port does not implement loopback test mode. fifo-size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The fifo size of the UART. auto-flow-control: @@ -165,7 +165,7 @@ properties: property. tx-threshold: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Specify the TX FIFO low water indication for parts with programmable TX FIFO thresholds. diff --git a/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml index c3c595e235a86..ddea3d41971d0 100644 --- a/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml +++ b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml @@ -55,7 +55,7 @@ properties: description: TI-SCI RM subtype for GP ring range ti,sci: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: phandle on TI-SCI compatible System controller node ti,sci-dev-id: diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml index be390accdd07d..dd47fef9854d7 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml @@ -57,7 +57,7 @@ properties: A list of the connections between audio components. Each entry is a pair of strings, the first being the connection's sink, the second being the connection's source. - $ref: /schemas/types.yaml#definitions/non-unique-string-array + $ref: /schemas/types.yaml#/definitions/non-unique-string-array minItems: 2 maxItems: 18 items: diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml index 6ad48c7681c1a..f2443b6512826 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml @@ -106,7 +106,7 @@ patternProperties: Must contain the phandle and index of the SAI sub-block providing the synchronization. allOf: - - $ref: /schemas/types.yaml#definitions/phandle-array + - $ref: /schemas/types.yaml#/definitions/phandle-array - maxItems: 1 st,iec60958: @@ -117,7 +117,7 @@ patternProperties: configured according to protocol defined in related DAI link node, such as i2s, left justified, right justified, dsp and pdm protocols. allOf: - - $ref: /schemas/types.yaml#definitions/flag + - $ref: /schemas/types.yaml#/definitions/flag "#clock-cells": description: Configure the SAI device as master clock provider. -- GitLab From 479a41748fdd8aa3eb933b0fac554fb2b7931334 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 17 Nov 2020 14:07:52 -0600 Subject: [PATCH 0882/1894] media: dt-bindings: coda: Add missing 'additionalProperties' 'additionalProperties' is now required by the meta-schema. Add it for coda. As a result, 'interrupts', 'interrupt-names' and 'power-domains' need to be reworked to be defined at the top level. Cc: Philipp Zabel Cc: Mauro Carvalho Chehab Cc: linux-media@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20201117200752.4004368-1-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/media/coda.yaml | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/Documentation/devicetree/bindings/media/coda.yaml b/Documentation/devicetree/bindings/media/coda.yaml index 7bac0057faf73..36781ee4617f9 100644 --- a/Documentation/devicetree/bindings/media/coda.yaml +++ b/Documentation/devicetree/bindings/media/coda.yaml @@ -44,6 +44,21 @@ properties: - const: per - const: ahb + interrupts: + minItems: 1 + items: + - description: BIT processor interrupt + - description: JPEG unit interrupt + + interrupt-names: + minItems: 1 + items: + - const: bit + - const: jpeg + + power-domains: + maxItems: 1 + resets: maxItems: 1 @@ -59,6 +74,8 @@ required: - clocks - clock-names +additionalProperties: false + allOf: - if: properties: @@ -68,34 +85,17 @@ allOf: then: properties: interrupts: - items: - - description: BIT processor interrupt - - description: JPEG unit interrupt + minItems: 2 interrupt-names: - items: - - const: bit - - const: jpeg + minItems: 2 else: properties: interrupts: - items: - - description: BIT processor interrupt - - - if: - properties: - compatible: - contains: - enum: - - fsl,imx6dl-vpu - - fsl,imx6q-vpu - then: - properties: - power-domains: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle pointing to the PU power domain maxItems: 1 + power-domains: false + examples: - | vpu: video-codec@63ff4000 { -- GitLab From 64a21a18f55ebafc9e805787770df4e0518db887 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Wed, 2 Dec 2020 10:05:22 +0100 Subject: [PATCH 0883/1894] dt-bindings: xlnx,vcu-settings: fix dt_binding_check warnings When running make dt_binding_check, the xlnx,vcu-settings binding triggers the following two warnings: 'additionalProperties' is a required property example-0: vcu@a0041000:reg:0: [0, 2684620800, 0, 4096] is too long Fix the binding and make the checker happy. Signed-off-by: Michael Tretter Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201202090522.251607-1-m.tretter@pengutronix.de Signed-off-by: Rob Herring --- .../bindings/soc/xilinx/xlnx,vcu-settings.yaml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml b/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml index 378d0ced43c86..cb245f4002874 100644 --- a/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml +++ b/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml @@ -26,9 +26,18 @@ required: - compatible - reg +additionalProperties: false + examples: - | - xlnx_vcu: vcu@a0041000 { - compatible = "xlnx,vcu-settings", "syscon"; - reg = <0x0 0xa0041000 0x0 0x1000>; + fpga { + #address-cells = <2>; + #size-cells = <2>; + + xlnx_vcu: vcu@a0041000 { + compatible = "xlnx,vcu-settings", "syscon"; + reg = <0x0 0xa0041000 0x0 0x1000>; + }; }; + +... -- GitLab From c1efde3f9780ad337df1cc393f6471ac8e24f50f Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 4 Dec 2020 17:38:10 +0800 Subject: [PATCH 0884/1894] dt-bindings: serial: add the required property 'additionalProperties' When I do dt_binding_check for any YAML file, below wanring is always reported: xxx/serial/litex,liteuart.yaml: 'additionalProperties' is a required property xxx/serial/litex,liteuart.yaml: ignoring, error in schema: warning: no schema found in file: xxx/serial/litex,liteuart.yaml Signed-off-by: Zhen Lei Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201204093813.1275-3-thunder.leizhen@huawei.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/serial/litex,liteuart.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/litex,liteuart.yaml b/Documentation/devicetree/bindings/serial/litex,liteuart.yaml index bc79b3cca5424..c4f1f489dc2d0 100644 --- a/Documentation/devicetree/bindings/serial/litex,liteuart.yaml +++ b/Documentation/devicetree/bindings/serial/litex,liteuart.yaml @@ -29,6 +29,8 @@ required: - compatible - reg +additionalProperties: false + examples: - | uart0: serial@e0001800 { -- GitLab From c8f054f10507dc133c9aa51f478dabe772f16288 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 4 Dec 2020 17:38:11 +0800 Subject: [PATCH 0885/1894] dt-bindings: soc: add the required property 'additionalProperties' When I do dt_binding_check for any YAML file, below wanring is always reported: xxx/soc/litex/litex,soc-controller.yaml: 'additionalProperties' is a required property xxx/soc/litex/litex,soc-controller.yaml: ignoring, error in schema: warning: no schema found in file: xxx/soc/litex/litex,soc-controller.yaml Signed-off-by: Zhen Lei Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201204093813.1275-4-thunder.leizhen@huawei.com Signed-off-by: Rob Herring --- .../devicetree/bindings/soc/litex/litex,soc-controller.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml b/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml index e2b788796e791..c8b57c7fd08c2 100644 --- a/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml +++ b/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml @@ -28,6 +28,8 @@ required: - compatible - reg +additionalProperties: false + examples: - | soc_ctrl0: soc-controller@f0000000 { -- GitLab From d73982be2b00bbe76b53433cc56a1cd9555b9091 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 4 Dec 2020 17:38:12 +0800 Subject: [PATCH 0886/1894] dt-bindings: devapc: add the required property 'additionalProperties' When I do dt_binding_check for any YAML file, below wanring is always reported: xxx/soc/mediatek/devapc.yaml: 'additionalProperties' is a required property xxx/soc/mediatek/devapc.yaml: ignoring, error in schema: warning: no schema found in file: xxx/soc/mediatek/devapc.yaml Signed-off-by: Zhen Lei Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201204093813.1275-5-thunder.leizhen@huawei.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/soc/mediatek/devapc.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml b/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml index 6c763f873a632..31e4d3c339bfe 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml +++ b/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml @@ -44,6 +44,8 @@ required: - clocks - clock-names +additionalProperties: false + examples: - | #include -- GitLab From c4b8c562a75f568026038c001cfa7737dac272da Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 7 Dec 2020 12:23:58 +0800 Subject: [PATCH 0887/1894] dt-bindings: media: nokia,smia: eliminate yamllint warnings Eliminate the following yamllint warnings: ./Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml :4:1: [error] missing document start "---" (document-start) :29:9: [warning] wrong indentation: expected 10 but found 8 (indentation) :32:9: [warning] wrong indentation: expected 10 but found 8 (indentation) Signed-off-by: Zhen Lei Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201207042400.1498-3-thunder.leizhen@huawei.com Signed-off-by: Rob Herring --- .../devicetree/bindings/media/i2c/mipi-ccs.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml index d94bd67ccea11..0df0334d2d0db 100644 --- a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml +++ b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml @@ -1,6 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) # Copyright (C) 2014--2020 Intel Corporation - +%YAML 1.2 +--- $id: http://devicetree.org/schemas/media/i2c/mipi-ccs.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# @@ -26,11 +27,11 @@ properties: compatible: oneOf: - items: - - const: mipi-ccs-1.1 - - const: mipi-ccs + - const: mipi-ccs-1.1 + - const: mipi-ccs - items: - - const: mipi-ccs-1.0 - - const: mipi-ccs + - const: mipi-ccs-1.0 + - const: mipi-ccs - const: nokia,smia reg: -- GitLab From aeefc1a01e7c3905580a981e93032cd452275c99 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 7 Dec 2020 12:48:30 +0800 Subject: [PATCH 0888/1894] dt-bindings: display: eliminate yamllint warnings Eliminate the following yamllint warnings: ./Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml :52:9: [warning] wrong indentation: expected 6 but found 8 (indentation) ./Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml :42:8: [warning] wrong indentation: expected 8 but found 7 (indentation) :45:8: [warning] wrong indentation: expected 8 but found 7 (indentation) ./Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml :21:6: [warning] wrong indentation: expected 6 but found 5 (indentation) ./Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml :25:10: [warning] wrong indentation: expected 10 but found 9 (indentation) Signed-off-by: Zhen Lei Acked-by: Sam Ravnborg Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201207044830.1551-2-thunder.leizhen@huawei.com Signed-off-by: Rob Herring --- .../devicetree/bindings/display/bridge/analogix,anx7625.yaml | 4 ++-- .../devicetree/bindings/display/bridge/intel,keembay-dsi.yaml | 4 ++-- .../devicetree/bindings/display/intel,keembay-msscam.yaml | 4 ++-- .../devicetree/bindings/display/panel/novatek,nt36672a.yaml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml index 60585a4fc22bc..9392b5502a329 100644 --- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml +++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml @@ -49,8 +49,8 @@ properties: Video port for panel or connector. required: - - port@0 - - port@1 + - port@0 + - port@1 required: - compatible diff --git a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml index ab5be26252240..35c9dfd866501 100644 --- a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml @@ -39,10 +39,10 @@ properties: properties: '#address-cells': - const: 1 + const: 1 '#size-cells': - const: 0 + const: 0 port@0: type: object diff --git a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml index 40caa61188098..a222b52d8b8ff 100644 --- a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml +++ b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml @@ -18,8 +18,8 @@ description: | properties: compatible: items: - - const: intel,keembay-msscam - - const: syscon + - const: intel,keembay-msscam + - const: syscon reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml index d2170de6b7230..2f5df1d235aea 100644 --- a/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml +++ b/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml @@ -22,7 +22,7 @@ properties: compatible: items: - enum: - - tianma,fhd-video + - tianma,fhd-video - const: novatek,nt36672a description: This indicates the panel manufacturer of the panel that is in turn using the NT36672A panel driver. This compatible string -- GitLab From 246e18ba725c3b39d9d45b91fd93ce67e772fef4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 7 Dec 2020 12:55:27 +0800 Subject: [PATCH 0889/1894] dt-bindings: clock: imx8qxp-lpcg: eliminate yamllint warnings Eliminate the following yamllint warnings: ./Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml :32:13:[warning] wrong indentation: expected 14 but found 12 (indentation) :35:9: [warning] wrong indentation: expected 10 but found 8 (indentation) Signed-off-by: Zhen Lei Reviewed-by: Rob Herring Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201207045527.1607-2-thunder.leizhen@huawei.com Signed-off-by: Rob Herring --- .../bindings/clock/imx8qxp-lpcg.yaml | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml index e709e530e17a2..940486ef1051d 100644 --- a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml +++ b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml @@ -29,18 +29,18 @@ properties: - const: fsl,imx8qxp-lpcg - items: - enum: - - fsl,imx8qm-lpcg + - fsl,imx8qm-lpcg - const: fsl,imx8qxp-lpcg - enum: - - fsl,imx8qxp-lpcg-adma - - fsl,imx8qxp-lpcg-conn - - fsl,imx8qxp-lpcg-dc - - fsl,imx8qxp-lpcg-dsp - - fsl,imx8qxp-lpcg-gpu - - fsl,imx8qxp-lpcg-hsio - - fsl,imx8qxp-lpcg-img - - fsl,imx8qxp-lpcg-lsio - - fsl,imx8qxp-lpcg-vpu + - fsl,imx8qxp-lpcg-adma + - fsl,imx8qxp-lpcg-conn + - fsl,imx8qxp-lpcg-dc + - fsl,imx8qxp-lpcg-dsp + - fsl,imx8qxp-lpcg-gpu + - fsl,imx8qxp-lpcg-hsio + - fsl,imx8qxp-lpcg-img + - fsl,imx8qxp-lpcg-lsio + - fsl,imx8qxp-lpcg-vpu deprecated: true reg: maxItems: 1 -- GitLab From 21df8683b85611c8267fdf87ebb7b4056b88ad3a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 17 Dec 2020 00:59:45 +0000 Subject: [PATCH 0890/1894] dt-bindings/display: abt,y030xx067a: Fix binding The binding should use "unevaluatedProperties" instead of "additionalProperties", since it is a SPI device and may have SPI-related Device Tree properties, for instance the "spi-max-frequency" property that is present in the example. Fixes: e366a644c69d ("dt-bindings: display: Add ABT Y030XX067A panel bindings") Signed-off-by: Paul Cercueil Reviewed-by: Sam Ravnborg Link: https://lore.kernel.org/r/20201217005945.335111-1-paul%40crapouillou.net Signed-off-by: Rob Herring --- .../devicetree/bindings/display/panel/abt,y030xx067a.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml index 91cb4c3e01986..a108029ecfabc 100644 --- a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml +++ b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml @@ -32,7 +32,7 @@ required: - power-supply - reset-gpios -additionalProperties: false +unevaluatedProperties: false examples: - | -- GitLab From f6f5cd840ae782680c5e94048c72420e4e6857f9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Dec 2020 17:17:05 +0000 Subject: [PATCH 0891/1894] timekeeping: Fix spelling mistake in Kconfig "fullfill" -> "fulfill" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20201217171705.57586-1-colin.king@canonical.com --- kernel/time/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index a09b1d61df6a5..64051f47475c9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -141,7 +141,7 @@ config CONTEXT_TRACKING_FORCE dynticks working. This option stands for testing when an arch implements the - context tracking backend but doesn't yet fullfill all the + context tracking backend but doesn't yet fulfill all the requirements to make the full dynticks feature working. Without the full dynticks, there is no way to test the support for context tracking and the subsystems that rely on it: RCU -- GitLab From 698285da79f5b0b099db15a37ac661ac408c80eb Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 17 Dec 2020 22:29:46 +0100 Subject: [PATCH 0892/1894] net/sched: sch_taprio: ensure to reset/destroy all child qdiscs taprio_graft() can insert a NULL element in the array of child qdiscs. As a consquence, taprio_reset() might not reset child qdiscs completely, and taprio_destroy() might leak resources. Fix it by ensuring that loops that iterate over q->qdiscs[] don't end when they find the first NULL item. Fixes: 44d4775ca518 ("net/sched: sch_taprio: reset child qdiscs before freeing them") Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Suggested-by: Jakub Kicinski Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/13edef6778fef03adc751582562fba4a13e06d6a.1608240532.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c74817ec9964b..6f775275826a4 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1605,8 +1605,9 @@ static void taprio_reset(struct Qdisc *sch) hrtimer_cancel(&q->advance_timer); if (q->qdiscs) { - for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) - qdisc_reset(q->qdiscs[i]); + for (i = 0; i < dev->num_tx_queues; i++) + if (q->qdiscs[i]) + qdisc_reset(q->qdiscs[i]); } sch->qstats.backlog = 0; sch->q.qlen = 0; @@ -1626,7 +1627,7 @@ static void taprio_destroy(struct Qdisc *sch) taprio_disable_offload(dev, q, NULL); if (q->qdiscs) { - for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) + for (i = 0; i < dev->num_tx_queues; i++) qdisc_put(q->qdiscs[i]); kfree(q->qdiscs); -- GitLab From 320d159e2d63a97a40f24cd6dfda5a57eec65b91 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Dec 2020 11:10:53 +0100 Subject: [PATCH 0893/1894] dt-bindings: rtc: add reset-source property Some RTCs, e.g. the pcf2127, can be used as a hardware watchdog. But if the reset pin is not actually wired up, the driver exposes a watchdog device that doesn't actually work. Provide a standard binding that can be used to indicate that a given RTC can perform a reset of the machine, similar to wakeup-source. Suggested-by: Alexandre Belloni Signed-off-by: Rasmus Villemoes Reviewed-by: Rob Herring Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201218101054.25416-2-rasmus.villemoes@prevas.dk --- Documentation/devicetree/bindings/rtc/rtc.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml index 8acd2de3de3ad..d30dc045aac64 100644 --- a/Documentation/devicetree/bindings/rtc/rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc.yaml @@ -63,6 +63,11 @@ properties: description: Enables wake up of host system on alarm. + reset-source: + $ref: /schemas/types.yaml#/definitions/flag + description: + The RTC is able to reset the machine. + additionalProperties: true ... -- GitLab From 71ac13457d9d1007effde65b54818106b2c2b525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Dec 2020 11:10:54 +0100 Subject: [PATCH 0894/1894] rtc: pcf2127: only use watchdog when explicitly available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most boards using the pcf2127 chip (in my bubble) don't make use of the watchdog functionality and the respective output is not connected. The effect on such a board is that there is a watchdog device provided that doesn't work. So only register the watchdog if the device tree has a "reset-source" property. Signed-off-by: Uwe Kleine-König [RV: s/has-watchdog/reset-source/] Signed-off-by: Rasmus Villemoes Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201218101054.25416-3-rasmus.villemoes@prevas.dk --- drivers/rtc/rtc-pcf2127.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 33fa8b17b79c4..39a7b5116aa4b 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -336,7 +336,8 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127) u32 wdd_timeout; int ret; - if (!IS_ENABLED(CONFIG_WATCHDOG)) + if (!IS_ENABLED(CONFIG_WATCHDOG) || + !device_property_read_bool(dev, "reset-source")) return 0; pcf2127->wdd.parent = dev; -- GitLab From 9541b81322e60120b299222919957becd7a13683 Mon Sep 17 00:00:00 2001 From: Boris Protopopov Date: Thu, 17 Dec 2020 20:58:08 +0000 Subject: [PATCH 0895/1894] Add SMB 2 support for getting and setting SACLs Fix passing of the additional security info via version operations. Force new open when getting SACL and avoid reuse of files that were previously open without sufficient privileges to access SACLs. Signed-off-by: Boris Protopopov Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 4 ++-- fs/cifs/smb2pdu.c | 4 +++- fs/cifs/xattr.c | 10 ++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4a1761139e002..f19274857292b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3369,9 +3369,9 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, struct cifs_ntsd *pntsd = NULL; struct cifsFileInfo *open_file = NULL; - if (inode) + if (inode && !(info & SACL_SECINFO)) open_file = find_readable_file(CIFS_I(inode), true); - if (!open_file) + if (!open_file || (info & SACL_SECINFO)) return get_smb2_acl_by_path(cifs_sb, path, pacllen, info); pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 202d8742d1493..067eb44c7baa8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3480,8 +3480,10 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, - void **data, u32 *plen, u32 additional_info) + void **data, u32 *plen, u32 extra_info) { + __u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO | + extra_info; *plen = 0; return query_info(xid, tcon, persistent_fid, volatile_fid, diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 9318a2acf4ee5..6b658a1172ef0 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -340,21 +340,19 @@ static int cifs_xattr_get(const struct xattr_handler *handler, * fetch owner, DACL, and SACL if asked for full descriptor, * fetch owner and DACL otherwise */ - u32 acllen, additional_info = 0; + u32 acllen, extra_info; struct cifs_ntsd *pacl; if (pTcon->ses->server->ops->get_acl == NULL) goto out; /* rc already EOPNOTSUPP */ if (handler->flags == XATTR_CIFS_NTSD_FULL) { - additional_info = OWNER_SECINFO | GROUP_SECINFO | - DACL_SECINFO | SACL_SECINFO; + extra_info = SACL_SECINFO; } else { - additional_info = OWNER_SECINFO | GROUP_SECINFO | - DACL_SECINFO; + extra_info = 0; } pacl = pTcon->ses->server->ops->get_acl(cifs_sb, - inode, full_path, &acllen, additional_info); + inode, full_path, &acllen, extra_info); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); cifs_dbg(VFS, "%s: error %zd getting sec desc\n", -- GitLab From dd20166236953c8cd14f4c668bf972af32f0c6be Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 19 Dec 2020 03:15:43 +0000 Subject: [PATCH 0896/1894] io_uring: fix 0-iov read buffer select Doing vectored buf-select read with 0 iovec passed is meaningless and utterly broken, forbid it. Cc: # 5.7+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b74957856e681..f3690dfdd564f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3125,9 +3125,7 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, iov[0].iov_len = kbuf->len; return 0; } - if (!req->rw.len) - return 0; - else if (req->rw.len > 1) + if (req->rw.len != 1) return -EINVAL; #ifdef CONFIG_COMPAT -- GitLab From 87508224485323ce2d4e7fb929ec80f51adcc238 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 17 Dec 2020 16:52:15 +0200 Subject: [PATCH 0897/1894] net: mvpp2: disable force link UP during port init procedure Force link UP can be enabled by bootloader during tftpboot and breaks NFS support. Force link UP disabled during port init procedure. Fixes: f84bf386f395 ("net: mvpp2: initialize the GoP") Signed-off-by: Stefan Chulski Acked-by: Marcin Wojtas Link: https://lore.kernel.org/r/1608216735-14501-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index afdd22827223b..0f18d034ce3e8 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5487,7 +5487,7 @@ static int mvpp2_port_init(struct mvpp2_port *port) struct mvpp2 *priv = port->priv; struct mvpp2_txq_pcpu *txq_pcpu; unsigned int thread; - int queue, err; + int queue, err, val; /* Checks for hardware constraints */ if (port->first_rxq + port->nrxqs > @@ -5501,6 +5501,18 @@ static int mvpp2_port_init(struct mvpp2_port *port) mvpp2_egress_disable(port); mvpp2_port_disable(port); + if (mvpp2_is_xlg(port->phy_interface)) { + val = readl(port->base + MVPP22_XLG_CTRL0_REG); + val &= ~MVPP22_XLG_CTRL0_FORCE_LINK_PASS; + val |= MVPP22_XLG_CTRL0_FORCE_LINK_DOWN; + writel(val, port->base + MVPP22_XLG_CTRL0_REG); + } else { + val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); + val &= ~MVPP2_GMAC_FORCE_LINK_PASS; + val |= MVPP2_GMAC_FORCE_LINK_DOWN; + writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + } + port->tx_time_coal = MVPP2_TXDONE_COAL_USEC; port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs), -- GitLab From 3f48fab62bb81a7f9d01e9d43c40395fad011dd5 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 17 Dec 2020 20:30:17 +0200 Subject: [PATCH 0898/1894] net: mvpp2: Add TCAM entry to drop flow control pause frames Issue: Flow control frame used to pause GoP(MAC) was delivered to the CPU and created a load on the CPU. Since XOFF/XON frames are used only by MAC, these frames should be dropped inside MAC. Fix: According to 802.3-2012 - IEEE Standard for Ethernet pause frame has unique destination MAC address 01-80-C2-00-00-01. Add TCAM parser entry to track and drop pause frames by destination MAC. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608229817-21951-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/mvpp2/mvpp2_prs.c | 33 +++++++++++++++++++ .../net/ethernet/marvell/mvpp2/mvpp2_prs.h | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 5692c6087bbb0..f069593d7e50c 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -405,6 +405,38 @@ static int mvpp2_prs_tcam_first_free(struct mvpp2 *priv, unsigned char start, return -EINVAL; } +/* Drop flow control pause frames */ +static void mvpp2_prs_drop_fc(struct mvpp2 *priv) +{ + unsigned char da[ETH_ALEN] = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 }; + struct mvpp2_prs_entry pe; + unsigned int len; + + memset(&pe, 0, sizeof(pe)); + + /* For all ports - drop flow control frames */ + pe.index = MVPP2_PE_FC_DROP; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC); + + /* Set match on DA */ + len = ETH_ALEN; + while (len--) + mvpp2_prs_tcam_data_byte_set(&pe, len, da[len], 0xff); + + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK, + MVPP2_PRS_RI_DROP_MASK); + + mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC); + mvpp2_prs_hw_write(priv, &pe); +} + /* Enable/disable dropping all mac da's */ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add) { @@ -1162,6 +1194,7 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv) mvpp2_prs_hw_write(priv, &pe); /* Create dummy entries for drop all and promiscuous modes */ + mvpp2_prs_drop_fc(priv); mvpp2_prs_mac_drop_all_set(priv, 0, false); mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false); mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h index e22f6c85d3803..4b68dd3747338 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h @@ -129,7 +129,7 @@ #define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7) #define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 6) #define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 5) -/* reserved */ +#define MVPP2_PE_FC_DROP (MVPP2_PRS_TCAM_SRAM_SIZE - 4) #define MVPP2_PE_MAC_MC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 3) #define MVPP2_PE_MAC_UC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2) #define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1) -- GitLab From fec6079b2eeab319d9e3d074f54d3b6f623e9701 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 17 Dec 2020 20:37:46 +0200 Subject: [PATCH 0899/1894] net: mvpp2: prs: fix PPPoE with ipv6 packet parse Current PPPoE+IPv6 entry is jumping to 'next-hdr' field and not to 'DIP' field as done for IPv4. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Reported-by: Liron Himi Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608230266-22111-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index f069593d7e50c..a30eb90ba3d28 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -1680,8 +1680,9 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP6, MVPP2_PRS_RI_L3_PROTO_MASK); - /* Skip eth_type + 4 bytes of IPv6 header */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4, + /* Jump to DIP of IPV6 header */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 8 + + MVPP2_MAX_L3_ADDR_SIZE, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); /* Set L3 offset */ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -- GitLab From c82cf05190d482bb3546dffd6a337f38e105daf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 Nov 2020 14:21:21 +0100 Subject: [PATCH 0900/1894] clk: bcm: dvp: drop a variable that is assigned to only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The third parameter to devm_platform_get_and_ioremap_resource() is used only to provide the used resource. As this variable isn't used afterwards, switch to the function devm_platform_ioremap_resource() which doesn't provide this output parameter. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20201120132121.2678997-1-u.kleine-koenig@pengutronix.de Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2711-dvp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2711-dvp.c b/drivers/clk/bcm/clk-bcm2711-dvp.c index 8333e20dc9d22..6696200ecf7e2 100644 --- a/drivers/clk/bcm/clk-bcm2711-dvp.c +++ b/drivers/clk/bcm/clk-bcm2711-dvp.c @@ -25,7 +25,6 @@ static const struct clk_parent_data clk_dvp_parent = { static int clk_dvp_probe(struct platform_device *pdev) { struct clk_hw_onecell_data *data; - struct resource *res; struct clk_dvp *dvp; void __iomem *base; int ret; @@ -42,7 +41,7 @@ static int clk_dvp_probe(struct platform_device *pdev) return -ENOMEM; data = dvp->data; - base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) return PTR_ERR(base); -- GitLab From be439cc4c404f646a8ba090fa786d53c10926b12 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 2 Dec 2020 11:35:18 +0100 Subject: [PATCH 0901/1894] clk: bcm: dvp: Add MODULE_DEVICE_TABLE() Add MODULE_DEVICE_TABLE() so as to be able to use the driver as a module. More precisely, for the driver to be loaded automatically at boot. Fixes: 1bc95972715a ("clk: bcm: Add BCM2711 DVP driver") Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20201202103518.21889-1-nsaenzjulienne@suse.de Reviewed-by: Maxime Ripard Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2711-dvp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/bcm/clk-bcm2711-dvp.c b/drivers/clk/bcm/clk-bcm2711-dvp.c index 6696200ecf7e2..e63a42618ac2c 100644 --- a/drivers/clk/bcm/clk-bcm2711-dvp.c +++ b/drivers/clk/bcm/clk-bcm2711-dvp.c @@ -107,6 +107,7 @@ static const struct of_device_id clk_dvp_dt_ids[] = { { .compatible = "brcm,brcm2711-dvp", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, clk_dvp_dt_ids); static struct platform_driver clk_dvp_driver = { .probe = clk_dvp_probe, -- GitLab From 91274497c79170aaadc491d4ffe4de35495a060d Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 19 Nov 2020 17:43:07 +0200 Subject: [PATCH 0902/1894] clk: at91: sama7g5: fix compilation error pmc_data_allocate() has been changed. pmc_data_free() was removed. Adapt the code taking this into consideration. With this the programmable clocks were also saved in sama7g5_pmc so that they could be later referenced. Fixes: cb783bbbcf54 ("clk: at91: sama7g5: add clock support for sama7g5") Signed-off-by: Claudiu Beznea Reviewed-by: Tudor Ambarus Tested-by: Tudor Ambarus Link: https://lore.kernel.org/r/1605800597-16720-2-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 0db2ab3eca147..a092a940baa40 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -838,7 +838,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) sama7g5_pmc = pmc_data_allocate(PMC_I2S1_MUX + 1, nck(sama7g5_systemck), nck(sama7g5_periphck), - nck(sama7g5_gck)); + nck(sama7g5_gck), 8); if (!sama7g5_pmc) return; @@ -980,6 +980,8 @@ static void __init sama7g5_pmc_setup(struct device_node *np) sama7g5_prog_mux_table); if (IS_ERR(hw)) goto err_free; + + sama7g5_pmc->pchws[i] = hw; } for (i = 0; i < ARRAY_SIZE(sama7g5_systemck); i++) { @@ -1052,7 +1054,7 @@ err_free: kfree(alloc_mem); } - pmc_data_free(sama7g5_pmc); + kfree(sama7g5_pmc); } /* Some clks are used for a clocksource */ -- GitLab From 3d86ee17d4670406d07f92da6fa4f2aa82cdc5a2 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 19 Nov 2020 17:43:08 +0200 Subject: [PATCH 0903/1894] dt-bindings: clock: at91: add sama7g5 pll defines Add SAMA7G5 specific PLL defines to be referenced in a phandle as a PMC_TYPE_CORE clock. Suggested-by: Claudiu Beznea Signed-off-by: Eugen Hristev [claudiu.beznea@microchip.com: adapt comit message, adapt sama7g5.c] Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-3-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 6 +++--- include/dt-bindings/clock/at91.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index a092a940baa40..7ef7963126b67 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -182,13 +182,13 @@ static const struct { .p = "audiopll_fracck", .l = &pll_layout_divpmc, .t = PLL_TYPE_DIV, - .eid = PMC_I2S0_MUX, }, + .eid = PMC_AUDIOPMCPLL, }, { .n = "audiopll_diviock", .p = "audiopll_fracck", .l = &pll_layout_divio, .t = PLL_TYPE_DIV, - .eid = PMC_I2S1_MUX, }, + .eid = PMC_AUDIOIOPLL, }, }, [PLL_ID_ETH] = { @@ -835,7 +835,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) if (IS_ERR(regmap)) return; - sama7g5_pmc = pmc_data_allocate(PMC_I2S1_MUX + 1, + sama7g5_pmc = pmc_data_allocate(PMC_ETHPLL + 1, nck(sama7g5_systemck), nck(sama7g5_periphck), nck(sama7g5_gck), 8); diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index eba17106608b3..fab313f62e8f5 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -25,6 +25,16 @@ #define PMC_PLLBCK 8 #define PMC_AUDIOPLLCK 9 +/* SAMA7G5 */ +#define PMC_CPUPLL (PMC_MAIN + 1) +#define PMC_SYSPLL (PMC_MAIN + 2) +#define PMC_DDRPLL (PMC_MAIN + 3) +#define PMC_IMGPLL (PMC_MAIN + 4) +#define PMC_BAUDPLL (PMC_MAIN + 5) +#define PMC_AUDIOPMCPLL (PMC_MAIN + 6) +#define PMC_AUDIOIOPLL (PMC_MAIN + 7) +#define PMC_ETHPLL (PMC_MAIN + 8) + #ifndef AT91_PMC_MOSCS #define AT91_PMC_MOSCS 0 /* MOSCS Flag */ #define AT91_PMC_LOCKA 1 /* PLLA Lock */ -- GitLab From 83d002877365afac2cb65ef4ad36b445652ebda3 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 19 Nov 2020 17:43:09 +0200 Subject: [PATCH 0904/1894] clk: at91: sama7g5: allow SYS and CPU PLLs to be exported and referenced in DT Allow SYSPLL and CPUPLL to be referenced as a PMC_TYPE_CORE clock from phandle in DT. Suggested-by: Claudiu Beznea Signed-off-by: Eugen Hristev [claudiu.beznea@microchip.com: adapt commit message, add CPU PLL] Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-4-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 7ef7963126b67..d3c3469d47d98 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -117,7 +117,8 @@ static const struct { .p = "cpupll_fracck", .l = &pll_layout_divpmc, .t = PLL_TYPE_DIV, - .c = 1, }, + .c = 1, + .eid = PMC_CPUPLL, }, }, [PLL_ID_SYS] = { @@ -131,7 +132,8 @@ static const struct { .p = "syspll_fracck", .l = &pll_layout_divpmc, .t = PLL_TYPE_DIV, - .c = 1, }, + .c = 1, + .eid = PMC_SYSPLL, }, }, [PLL_ID_DDR] = { -- GitLab From e26b3006ff07020e509fb9e0e560e462ff4077c9 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 19 Nov 2020 17:43:10 +0200 Subject: [PATCH 0905/1894] clk: at91: clk-master: add 5th divisor for mck master clk-master can have 5 divisors with a field width of 3 bits on some products. Change the mask and number of divisors accordingly. Reported-by: Mihai Sain Signed-off-by: Eugen Hristev Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-5-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-master.c | 2 +- drivers/clk/at91/pmc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index bd0d8a69a2cf8..aafd003b30cfb 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -15,7 +15,7 @@ #define MASTER_PRES_MASK 0x7 #define MASTER_PRES_MAX MASTER_PRES_MASK #define MASTER_DIV_SHIFT 8 -#define MASTER_DIV_MASK 0x3 +#define MASTER_DIV_MASK 0x7 #define PMC_MCR 0x30 #define PMC_MCR_ID_MSK GENMASK(3, 0) diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 7b86affc6d7c9..0a9364bde339c 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -48,7 +48,7 @@ extern const struct clk_master_layout at91sam9x5_master_layout; struct clk_master_characteristics { struct clk_range output; - u32 divisors[4]; + u32 divisors[5]; u8 have_div3_pres; }; -- GitLab From 0bb4623f13d46b2ea054777accff0c41af8036be Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 19 Nov 2020 17:43:11 +0200 Subject: [PATCH 0906/1894] clk: at91: sama7g5: add 5th divisor for mck0 layout and characteristics This SoC has the 5th divisor for the mck0 master clock. Adapt the characteristics accordingly. Reported-by: Mihai Sain Signed-off-by: Eugen Hristev Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-6-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index d3c3469d47d98..d685e22b20146 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -775,13 +775,13 @@ static const struct clk_pll_characteristics pll_characteristics = { /* MCK0 characteristics. */ static const struct clk_master_characteristics mck0_characteristics = { .output = { .min = 140000000, .max = 200000000 }, - .divisors = { 1, 2, 4, 3 }, + .divisors = { 1, 2, 4, 3, 5 }, .have_div3_pres = 1, }; /* MCK0 layout. */ static const struct clk_master_layout mck0_layout = { - .mask = 0x373, + .mask = 0x773, .pres_shift = 4, .offset = 0x28, }; -- GitLab From 8dc4af8bef127425271e06d09370a2479dae69c3 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 19 Nov 2020 17:43:12 +0200 Subject: [PATCH 0907/1894] clk: at91: clk-sam9x60-pll: allow runtime changes for pll Allow runtime frequency changes for PLLs registered with proper flags. This is necessary for CPU PLL on SAMA7G5 which is used by DVFS. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-7-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-sam9x60-pll.c | 145 +++++++++++++++++++++++++---- drivers/clk/at91/pmc.h | 4 +- drivers/clk/at91/sam9x60.c | 22 ++++- drivers/clk/at91/sama7g5.c | 67 +++++++++---- 4 files changed, 197 insertions(+), 41 deletions(-) diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index 78f458a7b2ef4..34e3ab13741ac 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -229,6 +229,57 @@ static int sam9x60_frac_pll_set_rate(struct clk_hw *hw, unsigned long rate, return sam9x60_frac_pll_compute_mul_frac(core, rate, parent_rate, true); } +static int sam9x60_frac_pll_set_rate_chg(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_frac *frac = to_sam9x60_frac(core); + struct regmap *regmap = core->regmap; + unsigned long irqflags; + unsigned int val, cfrac, cmul; + long ret; + + ret = sam9x60_frac_pll_compute_mul_frac(core, rate, parent_rate, true); + if (ret <= 0) + return ret; + + spin_lock_irqsave(core->lock, irqflags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, AT91_PMC_PLL_UPDT_ID_MSK, + core->id); + regmap_read(regmap, AT91_PMC_PLL_CTRL1, &val); + cmul = (val & core->layout->mul_mask) >> core->layout->mul_shift; + cfrac = (val & core->layout->frac_mask) >> core->layout->frac_shift; + + if (cmul == frac->mul && cfrac == frac->frac) + goto unlock; + + regmap_write(regmap, AT91_PMC_PLL_CTRL1, + (frac->mul << core->layout->mul_shift) | + (frac->frac << core->layout->frac_shift)); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); + + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL, + AT91_PMC_PLL_CTRL0_ENLOCK | + AT91_PMC_PLL_CTRL0_ENPLL); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); + + while (!sam9x60_pll_ready(regmap, core->id)) + cpu_relax(); + +unlock: + spin_unlock_irqrestore(core->lock, irqflags); + + return ret; +} + static const struct clk_ops sam9x60_frac_pll_ops = { .prepare = sam9x60_frac_pll_prepare, .unprepare = sam9x60_frac_pll_unprepare, @@ -238,6 +289,15 @@ static const struct clk_ops sam9x60_frac_pll_ops = { .set_rate = sam9x60_frac_pll_set_rate, }; +static const struct clk_ops sam9x60_frac_pll_ops_chg = { + .prepare = sam9x60_frac_pll_prepare, + .unprepare = sam9x60_frac_pll_unprepare, + .is_prepared = sam9x60_frac_pll_is_prepared, + .recalc_rate = sam9x60_frac_pll_recalc_rate, + .round_rate = sam9x60_frac_pll_round_rate, + .set_rate = sam9x60_frac_pll_set_rate_chg, +}; + static int sam9x60_div_pll_prepare(struct clk_hw *hw) { struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); @@ -384,6 +444,44 @@ static int sam9x60_div_pll_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } +static int sam9x60_div_pll_set_rate_chg(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_div *div = to_sam9x60_div(core); + struct regmap *regmap = core->regmap; + unsigned long irqflags; + unsigned int val, cdiv; + + div->div = DIV_ROUND_CLOSEST(parent_rate, rate) - 1; + + spin_lock_irqsave(core->lock, irqflags); + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, AT91_PMC_PLL_UPDT_ID_MSK, + core->id); + regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); + cdiv = (val & core->layout->div_mask) >> core->layout->div_shift; + + /* Stop if nothing changed. */ + if (cdiv == div->div) + goto unlock; + + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + core->layout->div_mask, + (div->div << core->layout->div_shift)); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); + + while (!sam9x60_pll_ready(regmap, core->id)) + cpu_relax(); + +unlock: + spin_unlock_irqrestore(core->lock, irqflags); + + return 0; +} + static const struct clk_ops sam9x60_div_pll_ops = { .prepare = sam9x60_div_pll_prepare, .unprepare = sam9x60_div_pll_unprepare, @@ -393,17 +491,26 @@ static const struct clk_ops sam9x60_div_pll_ops = { .set_rate = sam9x60_div_pll_set_rate, }; +static const struct clk_ops sam9x60_div_pll_ops_chg = { + .prepare = sam9x60_div_pll_prepare, + .unprepare = sam9x60_div_pll_unprepare, + .is_prepared = sam9x60_div_pll_is_prepared, + .recalc_rate = sam9x60_div_pll_recalc_rate, + .round_rate = sam9x60_div_pll_round_rate, + .set_rate = sam9x60_div_pll_set_rate_chg, +}; + struct clk_hw * __init sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, const char *name, const char *parent_name, struct clk_hw *parent_hw, u8 id, const struct clk_pll_characteristics *characteristics, - const struct clk_pll_layout *layout, bool critical) + const struct clk_pll_layout *layout, u32 flags) { struct sam9x60_frac *frac; struct clk_hw *hw; struct clk_init_data init; - unsigned long parent_rate, flags; + unsigned long parent_rate, irqflags; unsigned int val; int ret; @@ -417,10 +524,12 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, init.name = name; init.parent_names = &parent_name; init.num_parents = 1; - init.ops = &sam9x60_frac_pll_ops; - init.flags = CLK_SET_RATE_GATE; - if (critical) - init.flags |= CLK_IS_CRITICAL; + if (flags & CLK_SET_RATE_GATE) + init.ops = &sam9x60_frac_pll_ops; + else + init.ops = &sam9x60_frac_pll_ops_chg; + + init.flags = flags; frac->core.id = id; frac->core.hw.init = &init; @@ -429,7 +538,7 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, frac->core.regmap = regmap; frac->core.lock = lock; - spin_lock_irqsave(frac->core.lock, flags); + spin_lock_irqsave(frac->core.lock, irqflags); if (sam9x60_pll_ready(regmap, id)) { regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, AT91_PMC_PLL_UPDT_ID_MSK, id); @@ -457,7 +566,7 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, goto free; } } - spin_unlock_irqrestore(frac->core.lock, flags); + spin_unlock_irqrestore(frac->core.lock, irqflags); hw = &frac->core.hw; ret = clk_hw_register(NULL, hw); @@ -469,7 +578,7 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, return hw; free: - spin_unlock_irqrestore(frac->core.lock, flags); + spin_unlock_irqrestore(frac->core.lock, irqflags); kfree(frac); return hw; } @@ -478,12 +587,12 @@ struct clk_hw * __init sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, const char *name, const char *parent_name, u8 id, const struct clk_pll_characteristics *characteristics, - const struct clk_pll_layout *layout, bool critical) + const struct clk_pll_layout *layout, u32 flags) { struct sam9x60_div *div; struct clk_hw *hw; struct clk_init_data init; - unsigned long flags; + unsigned long irqflags; unsigned int val; int ret; @@ -497,11 +606,11 @@ sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, init.name = name; init.parent_names = &parent_name; init.num_parents = 1; - init.ops = &sam9x60_div_pll_ops; - init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | - CLK_SET_RATE_PARENT; - if (critical) - init.flags |= CLK_IS_CRITICAL; + if (flags & CLK_SET_RATE_GATE) + init.ops = &sam9x60_div_pll_ops; + else + init.ops = &sam9x60_div_pll_ops_chg; + init.flags = flags; div->core.id = id; div->core.hw.init = &init; @@ -510,14 +619,14 @@ sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, div->core.regmap = regmap; div->core.lock = lock; - spin_lock_irqsave(div->core.lock, flags); + spin_lock_irqsave(div->core.lock, irqflags); regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, AT91_PMC_PLL_UPDT_ID_MSK, id); regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); div->div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, val); - spin_unlock_irqrestore(div->core.lock, flags); + spin_unlock_irqrestore(div->core.lock, irqflags); hw = &div->core.hw; ret = clk_hw_register(NULL, hw); diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 0a9364bde339c..bedcd85ad7509 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -190,14 +190,14 @@ struct clk_hw * __init sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, const char *name, const char *parent_name, u8 id, const struct clk_pll_characteristics *characteristics, - const struct clk_pll_layout *layout, bool critical); + const struct clk_pll_layout *layout, u32 flags); struct clk_hw * __init sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, const char *name, const char *parent_name, struct clk_hw *parent_hw, u8 id, const struct clk_pll_characteristics *characteristics, - const struct clk_pll_layout *layout, bool critical); + const struct clk_pll_layout *layout, u32 flags); struct clk_hw * __init at91_clk_register_programmable(struct regmap *regmap, const char *name, diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 3c4c956035954..dd62bb2880cf0 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -228,13 +228,24 @@ static void __init sam9x60_pmc_setup(struct device_node *np) hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "pllack_fracck", "mainck", sam9x60_pmc->chws[PMC_MAIN], 0, &plla_characteristics, - &pll_frac_layout, true); + &pll_frac_layout, + /* + * This feeds pllack_divck which + * feeds CPU. It should not be + * disabled. + */ + CLK_IS_CRITICAL | CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "pllack_divck", "pllack_fracck", 0, &plla_characteristics, - &pll_div_layout, true); + &pll_div_layout, + /* + * This feeds CPU. It should not + * be disabled. + */ + CLK_IS_CRITICAL | CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -243,13 +254,16 @@ static void __init sam9x60_pmc_setup(struct device_node *np) hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "upllck_fracck", "main_osc", main_osc_hw, 1, &upll_characteristics, - &pll_frac_layout, false); + &pll_frac_layout, CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "upllck_divck", "upllck_fracck", 1, &upll_characteristics, - &pll_div_layout, false); + &pll_div_layout, + CLK_SET_RATE_GATE | + CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index d685e22b20146..d7c2b731ad207 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -95,15 +95,15 @@ static const struct clk_pll_layout pll_layout_divio = { * @p: clock parent * @l: clock layout * @t: clock type - * @f: true if clock is critical and cannot be disabled + * @f: clock flags * @eid: export index in sama7g5->chws[] array */ static const struct { const char *n; const char *p; const struct clk_pll_layout *l; + unsigned long f; u8 t; - u8 c; u8 eid; } sama7g5_plls[][PLL_ID_MAX] = { [PLL_ID_CPU] = { @@ -111,13 +111,18 @@ static const struct { .p = "mainck", .l = &pll_layout_frac, .t = PLL_TYPE_FRAC, - .c = 1, }, + /* + * This feeds cpupll_divpmcck which feeds CPU. It should + * not be disabled. + */ + .f = CLK_IS_CRITICAL, }, { .n = "cpupll_divpmcck", .p = "cpupll_fracck", .l = &pll_layout_divpmc, .t = PLL_TYPE_DIV, - .c = 1, + /* This feeds CPU. It should not be disabled. */ + .f = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, .eid = PMC_CPUPLL, }, }, @@ -126,13 +131,22 @@ static const struct { .p = "mainck", .l = &pll_layout_frac, .t = PLL_TYPE_FRAC, - .c = 1, }, + /* + * This feeds syspll_divpmcck which may feed critial parts + * of the systems like timers. Therefore it should not be + * disabled. + */ + .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, }, { .n = "syspll_divpmcck", .p = "syspll_fracck", .l = &pll_layout_divpmc, .t = PLL_TYPE_DIV, - .c = 1, + /* + * This may feed critial parts of the systems like timers. + * Therefore it should not be disabled. + */ + .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, .eid = PMC_SYSPLL, }, }, @@ -141,55 +155,71 @@ static const struct { .p = "mainck", .l = &pll_layout_frac, .t = PLL_TYPE_FRAC, - .c = 1, }, + /* + * This feeds ddrpll_divpmcck which feeds DDR. It should not + * be disabled. + */ + .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, }, { .n = "ddrpll_divpmcck", .p = "ddrpll_fracck", .l = &pll_layout_divpmc, .t = PLL_TYPE_DIV, - .c = 1, }, + /* This feeds DDR. It should not be disabled. */ + .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, }, }, [PLL_ID_IMG] = { { .n = "imgpll_fracck", .p = "mainck", .l = &pll_layout_frac, - .t = PLL_TYPE_FRAC, }, + .t = PLL_TYPE_FRAC, + .f = CLK_SET_RATE_GATE, }, { .n = "imgpll_divpmcck", .p = "imgpll_fracck", .l = &pll_layout_divpmc, - .t = PLL_TYPE_DIV, }, + .t = PLL_TYPE_DIV, + .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT, }, }, [PLL_ID_BAUD] = { { .n = "baudpll_fracck", .p = "mainck", .l = &pll_layout_frac, - .t = PLL_TYPE_FRAC, }, + .t = PLL_TYPE_FRAC, + .f = CLK_SET_RATE_GATE, }, { .n = "baudpll_divpmcck", .p = "baudpll_fracck", .l = &pll_layout_divpmc, - .t = PLL_TYPE_DIV, }, + .t = PLL_TYPE_DIV, + .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT, }, }, [PLL_ID_AUDIO] = { { .n = "audiopll_fracck", .p = "main_xtal", .l = &pll_layout_frac, - .t = PLL_TYPE_FRAC, }, + .t = PLL_TYPE_FRAC, + .f = CLK_SET_RATE_GATE, }, { .n = "audiopll_divpmcck", .p = "audiopll_fracck", .l = &pll_layout_divpmc, .t = PLL_TYPE_DIV, + .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT, .eid = PMC_AUDIOPMCPLL, }, { .n = "audiopll_diviock", .p = "audiopll_fracck", .l = &pll_layout_divio, .t = PLL_TYPE_DIV, + .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT, .eid = PMC_AUDIOIOPLL, }, }, @@ -197,12 +227,15 @@ static const struct { { .n = "ethpll_fracck", .p = "main_xtal", .l = &pll_layout_frac, - .t = PLL_TYPE_FRAC, }, + .t = PLL_TYPE_FRAC, + .f = CLK_SET_RATE_GATE, }, { .n = "ethpll_divpmcck", .p = "ethpll_fracck", .l = &pll_layout_divpmc, - .t = PLL_TYPE_DIV, }, + .t = PLL_TYPE_DIV, + .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT, }, }, }; @@ -890,7 +923,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) sama7g5_plls[i][j].p, parent_hw, i, &pll_characteristics, sama7g5_plls[i][j].l, - sama7g5_plls[i][j].c); + sama7g5_plls[i][j].f); break; case PLL_TYPE_DIV: @@ -899,7 +932,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) sama7g5_plls[i][j].p, i, &pll_characteristics, sama7g5_plls[i][j].l, - sama7g5_plls[i][j].c); + sama7g5_plls[i][j].f); break; default: -- GitLab From 4011f03ee4756df3091ad0c2cfb0593bee8ecdf1 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 19 Nov 2020 17:43:13 +0200 Subject: [PATCH 0908/1894] clk: at91: sama7g5: remove mck0 from parent list of other clocks MCK0 is changed at runtime by DVFS. Due to this, since not all IPs are glitch free aware at MCK0 changes, remove MCK0 from parent list of other clocks (e.g. generic clock, programmable/system clock, MCKX). Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-8-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 55 ++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index d7c2b731ad207..335e9c943c651 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -280,7 +280,7 @@ static const struct { .ep = { "syspll_divpmcck", "ddrpll_divpmcck", "imgpll_divpmcck", }, .ep_mux_table = { 5, 6, 7, }, .ep_count = 3, - .ep_chg_id = 6, }, + .ep_chg_id = 5, }, { .n = "mck4", .id = 4, @@ -313,7 +313,7 @@ static const struct { }; /* Mux table for programmable clocks. */ -static u32 sama7g5_prog_mux_table[] = { 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, }; +static u32 sama7g5_prog_mux_table[] = { 0, 1, 2, 5, 6, 7, 8, 9, 10, }; /** * Peripheral clock description @@ -436,7 +436,7 @@ static const struct { .pp = { "audiopll_divpmcck", }, .pp_mux_table = { 9, }, .pp_count = 1, - .pp_chg_id = 4, }, + .pp_chg_id = 3, }, { .n = "csi_gclk", .id = 33, @@ -548,7 +548,7 @@ static const struct { .pp = { "ethpll_divpmcck", }, .pp_mux_table = { 10, }, .pp_count = 1, - .pp_chg_id = 4, }, + .pp_chg_id = 3, }, { .n = "gmac1_gclk", .id = 52, @@ -580,7 +580,7 @@ static const struct { .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, .pp_mux_table = { 5, 9, }, .pp_count = 2, - .pp_chg_id = 5, }, + .pp_chg_id = 4, }, { .n = "i2smcc1_gclk", .id = 58, @@ -588,7 +588,7 @@ static const struct { .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, .pp_mux_table = { 5, 9, }, .pp_count = 2, - .pp_chg_id = 5, }, + .pp_chg_id = 4, }, { .n = "mcan0_gclk", .id = 61, @@ -730,7 +730,7 @@ static const struct { .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, .pp_mux_table = { 5, 8, }, .pp_count = 2, - .pp_chg_id = 5, }, + .pp_chg_id = 4, }, { .n = "sdmmc1_gclk", .id = 81, @@ -738,7 +738,7 @@ static const struct { .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, .pp_mux_table = { 5, 8, }, .pp_count = 2, - .pp_chg_id = 5, }, + .pp_chg_id = 4, }, { .n = "sdmmc2_gclk", .id = 82, @@ -746,7 +746,7 @@ static const struct { .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, .pp_mux_table = { 5, 8, }, .pp_count = 2, - .pp_chg_id = 5, }, + .pp_chg_id = 4, }, { .n = "spdifrx_gclk", .id = 84, @@ -754,7 +754,7 @@ static const struct { .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, .pp_mux_table = { 5, 9, }, .pp_count = 2, - .pp_chg_id = 5, }, + .pp_chg_id = 4, }, { .n = "spdiftx_gclk", .id = 85, @@ -762,7 +762,7 @@ static const struct { .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, .pp_mux_table = { 5, 9, }, .pp_count = 2, - .pp_chg_id = 5, }, + .pp_chg_id = 4, }, { .n = "tcb0_ch0_gclk", .id = 88, @@ -961,9 +961,8 @@ static void __init sama7g5_pmc_setup(struct device_node *np) parent_names[0] = md_slck_name; parent_names[1] = td_slck_name; parent_names[2] = "mainck"; - parent_names[3] = "mck0"; for (i = 0; i < ARRAY_SIZE(sama7g5_mckx); i++) { - u8 num_parents = 4 + sama7g5_mckx[i].ep_count; + u8 num_parents = 3 + sama7g5_mckx[i].ep_count; u32 *mux_table; mux_table = kmalloc_array(num_parents, sizeof(*mux_table), @@ -971,10 +970,10 @@ static void __init sama7g5_pmc_setup(struct device_node *np) if (!mux_table) goto err_free; - SAMA7G5_INIT_TABLE(mux_table, 4); - SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_mckx[i].ep_mux_table, + SAMA7G5_INIT_TABLE(mux_table, 3); + SAMA7G5_FILL_TABLE(&mux_table[3], sama7g5_mckx[i].ep_mux_table, sama7g5_mckx[i].ep_count); - SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_mckx[i].ep, + SAMA7G5_FILL_TABLE(&parent_names[3], sama7g5_mckx[i].ep, sama7g5_mckx[i].ep_count); hw = at91_clk_sama7g5_register_master(regmap, sama7g5_mckx[i].n, @@ -997,20 +996,19 @@ static void __init sama7g5_pmc_setup(struct device_node *np) parent_names[0] = md_slck_name; parent_names[1] = td_slck_name; parent_names[2] = "mainck"; - parent_names[3] = "mck0"; - parent_names[4] = "syspll_divpmcck"; - parent_names[5] = "ddrpll_divpmcck"; - parent_names[6] = "imgpll_divpmcck"; - parent_names[7] = "baudpll_divpmcck"; - parent_names[8] = "audiopll_divpmcck"; - parent_names[9] = "ethpll_divpmcck"; + parent_names[3] = "syspll_divpmcck"; + parent_names[4] = "ddrpll_divpmcck"; + parent_names[5] = "imgpll_divpmcck"; + parent_names[6] = "baudpll_divpmcck"; + parent_names[7] = "audiopll_divpmcck"; + parent_names[8] = "ethpll_divpmcck"; for (i = 0; i < 8; i++) { char name[6]; snprintf(name, sizeof(name), "prog%d", i); hw = at91_clk_register_programmable(regmap, name, parent_names, - 10, i, + 9, i, &programmable_layout, sama7g5_prog_mux_table); if (IS_ERR(hw)) @@ -1047,9 +1045,8 @@ static void __init sama7g5_pmc_setup(struct device_node *np) parent_names[0] = md_slck_name; parent_names[1] = td_slck_name; parent_names[2] = "mainck"; - parent_names[3] = "mck0"; for (i = 0; i < ARRAY_SIZE(sama7g5_gck); i++) { - u8 num_parents = 4 + sama7g5_gck[i].pp_count; + u8 num_parents = 3 + sama7g5_gck[i].pp_count; u32 *mux_table; mux_table = kmalloc_array(num_parents, sizeof(*mux_table), @@ -1057,10 +1054,10 @@ static void __init sama7g5_pmc_setup(struct device_node *np) if (!mux_table) goto err_free; - SAMA7G5_INIT_TABLE(mux_table, 4); - SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_gck[i].pp_mux_table, + SAMA7G5_INIT_TABLE(mux_table, 3); + SAMA7G5_FILL_TABLE(&mux_table[3], sama7g5_gck[i].pp_mux_table, sama7g5_gck[i].pp_count); - SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_gck[i].pp, + SAMA7G5_FILL_TABLE(&parent_names[3], sama7g5_gck[i].pp, sama7g5_gck[i].pp_count); hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, -- GitLab From f803858af84e1e6916edfbc5ae0fac403c02ee46 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 19 Nov 2020 17:43:14 +0200 Subject: [PATCH 0909/1894] clk: at91: sama7g5: decrease lower limit for MCK0 rate On SAMA7G5 CPU clock is changed at run-time by DVFS. Since MCK0 and CPU clock shares the same parent clock (CPUPLL clock) the MCK0 is also changed by DVFS to avoid over/under clocking of MCK0 consumers. The lower limit is changed to be able to set MCK0 accordingly by DVFS. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-9-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 335e9c943c651..29d9781e67122 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -807,7 +807,7 @@ static const struct clk_pll_characteristics pll_characteristics = { /* MCK0 characteristics. */ static const struct clk_master_characteristics mck0_characteristics = { - .output = { .min = 140000000, .max = 200000000 }, + .output = { .min = 50000000, .max = 200000000 }, .divisors = { 1, 2, 4, 3, 5 }, .have_div3_pres = 1, }; -- GitLab From 120d5d8b4614ee26c576b29377a968093948473f Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 19 Nov 2020 17:43:15 +0200 Subject: [PATCH 0910/1894] clk: at91: sama7g5: do not allow cpu pll to go higher than 1GHz Since CPU PLL feeds both CPU clock and MCK0, MCK0 cannot go higher than 200MHz and MCK0 maximum prescaller is 5 limit the CPU PLL at 1GHz to avoid MCK0 overclocking while CPU PLL is changed by DVFS. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-10-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 61 +++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 29d9781e67122..e0c4d2eb9f597 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -89,11 +89,40 @@ static const struct clk_pll_layout pll_layout_divio = { .endiv_shift = 30, }; +/* + * CPU PLL output range. + * Notice: The upper limit has been setup to 1000000002 due to hardware + * block which cannot output exactly 1GHz. + */ +static const struct clk_range cpu_pll_outputs[] = { + { .min = 2343750, .max = 1000000002 }, +}; + +/* PLL output range. */ +static const struct clk_range pll_outputs[] = { + { .min = 2343750, .max = 1200000000 }, +}; + +/* CPU PLL characteristics. */ +static const struct clk_pll_characteristics cpu_pll_characteristics = { + .input = { .min = 12000000, .max = 50000000 }, + .num_output = ARRAY_SIZE(cpu_pll_outputs), + .output = cpu_pll_outputs, +}; + +/* PLL characteristics. */ +static const struct clk_pll_characteristics pll_characteristics = { + .input = { .min = 12000000, .max = 50000000 }, + .num_output = ARRAY_SIZE(pll_outputs), + .output = pll_outputs, +}; + /** * PLL clocks description * @n: clock name * @p: clock parent * @l: clock layout + * @c: clock characteristics * @t: clock type * @f: clock flags * @eid: export index in sama7g5->chws[] array @@ -102,6 +131,7 @@ static const struct { const char *n; const char *p; const struct clk_pll_layout *l; + const struct clk_pll_characteristics *c; unsigned long f; u8 t; u8 eid; @@ -110,6 +140,7 @@ static const struct { { .n = "cpupll_fracck", .p = "mainck", .l = &pll_layout_frac, + .c = &cpu_pll_characteristics, .t = PLL_TYPE_FRAC, /* * This feeds cpupll_divpmcck which feeds CPU. It should @@ -120,6 +151,7 @@ static const struct { { .n = "cpupll_divpmcck", .p = "cpupll_fracck", .l = &pll_layout_divpmc, + .c = &cpu_pll_characteristics, .t = PLL_TYPE_DIV, /* This feeds CPU. It should not be disabled. */ .f = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, @@ -130,6 +162,7 @@ static const struct { { .n = "syspll_fracck", .p = "mainck", .l = &pll_layout_frac, + .c = &pll_characteristics, .t = PLL_TYPE_FRAC, /* * This feeds syspll_divpmcck which may feed critial parts @@ -141,6 +174,7 @@ static const struct { { .n = "syspll_divpmcck", .p = "syspll_fracck", .l = &pll_layout_divpmc, + .c = &pll_characteristics, .t = PLL_TYPE_DIV, /* * This may feed critial parts of the systems like timers. @@ -154,6 +188,7 @@ static const struct { { .n = "ddrpll_fracck", .p = "mainck", .l = &pll_layout_frac, + .c = &pll_characteristics, .t = PLL_TYPE_FRAC, /* * This feeds ddrpll_divpmcck which feeds DDR. It should not @@ -164,6 +199,7 @@ static const struct { { .n = "ddrpll_divpmcck", .p = "ddrpll_fracck", .l = &pll_layout_divpmc, + .c = &pll_characteristics, .t = PLL_TYPE_DIV, /* This feeds DDR. It should not be disabled. */ .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, }, @@ -173,12 +209,14 @@ static const struct { { .n = "imgpll_fracck", .p = "mainck", .l = &pll_layout_frac, + .c = &pll_characteristics, .t = PLL_TYPE_FRAC, .f = CLK_SET_RATE_GATE, }, { .n = "imgpll_divpmcck", .p = "imgpll_fracck", .l = &pll_layout_divpmc, + .c = &pll_characteristics, .t = PLL_TYPE_DIV, .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | CLK_SET_RATE_PARENT, }, @@ -188,12 +226,14 @@ static const struct { { .n = "baudpll_fracck", .p = "mainck", .l = &pll_layout_frac, + .c = &pll_characteristics, .t = PLL_TYPE_FRAC, .f = CLK_SET_RATE_GATE, }, { .n = "baudpll_divpmcck", .p = "baudpll_fracck", .l = &pll_layout_divpmc, + .c = &pll_characteristics, .t = PLL_TYPE_DIV, .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | CLK_SET_RATE_PARENT, }, @@ -203,12 +243,14 @@ static const struct { { .n = "audiopll_fracck", .p = "main_xtal", .l = &pll_layout_frac, + .c = &pll_characteristics, .t = PLL_TYPE_FRAC, .f = CLK_SET_RATE_GATE, }, { .n = "audiopll_divpmcck", .p = "audiopll_fracck", .l = &pll_layout_divpmc, + .c = &pll_characteristics, .t = PLL_TYPE_DIV, .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | CLK_SET_RATE_PARENT, @@ -217,6 +259,7 @@ static const struct { { .n = "audiopll_diviock", .p = "audiopll_fracck", .l = &pll_layout_divio, + .c = &pll_characteristics, .t = PLL_TYPE_DIV, .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | CLK_SET_RATE_PARENT, @@ -227,12 +270,14 @@ static const struct { { .n = "ethpll_fracck", .p = "main_xtal", .l = &pll_layout_frac, + .c = &pll_characteristics, .t = PLL_TYPE_FRAC, .f = CLK_SET_RATE_GATE, }, { .n = "ethpll_divpmcck", .p = "ethpll_fracck", .l = &pll_layout_divpmc, + .c = &pll_characteristics, .t = PLL_TYPE_DIV, .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | CLK_SET_RATE_PARENT, }, @@ -793,18 +838,6 @@ static const struct { .pp_chg_id = INT_MIN, }, }; -/* PLL output range. */ -static const struct clk_range pll_outputs[] = { - { .min = 2343750, .max = 1200000000 }, -}; - -/* PLL characteristics. */ -static const struct clk_pll_characteristics pll_characteristics = { - .input = { .min = 12000000, .max = 50000000 }, - .num_output = ARRAY_SIZE(pll_outputs), - .output = pll_outputs, -}; - /* MCK0 characteristics. */ static const struct clk_master_characteristics mck0_characteristics = { .output = { .min = 50000000, .max = 200000000 }, @@ -921,7 +954,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, sama7g5_plls[i][j].n, sama7g5_plls[i][j].p, parent_hw, i, - &pll_characteristics, + sama7g5_plls[i][j].c, sama7g5_plls[i][j].l, sama7g5_plls[i][j].f); break; @@ -930,7 +963,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, sama7g5_plls[i][j].n, sama7g5_plls[i][j].p, i, - &pll_characteristics, + sama7g5_plls[i][j].c, sama7g5_plls[i][j].l, sama7g5_plls[i][j].f); break; -- GitLab From 7a110b9107ed8fe27277988cdb4d18e7043b7252 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 19 Nov 2020 17:43:16 +0200 Subject: [PATCH 0911/1894] clk: at91: clk-master: re-factor master clock Re-factor master clock driver by splitting it into 2 clocks: prescaller and divider clocks. Based on registered clock flags the prescaler's rate could be changed at runtime. This is necessary for platforms supporting DVFS (e.g. SAMA7G5) where master clock could be changed at run-time. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-11-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/at91rm9200.c | 21 ++- drivers/clk/at91/at91sam9260.c | 26 ++- drivers/clk/at91/at91sam9g45.c | 32 +++- drivers/clk/at91/at91sam9n12.c | 36 ++-- drivers/clk/at91/at91sam9rl.c | 23 ++- drivers/clk/at91/at91sam9x5.c | 28 ++- drivers/clk/at91/clk-master.c | 335 ++++++++++++++++++++++++++++----- drivers/clk/at91/dt-compat.c | 15 +- drivers/clk/at91/pmc.h | 16 +- drivers/clk/at91/sam9x60.c | 23 ++- drivers/clk/at91/sama5d2.c | 42 +++-- drivers/clk/at91/sama5d3.c | 38 ++-- drivers/clk/at91/sama5d4.c | 40 ++-- drivers/clk/at91/sama7g5.c | 13 +- 14 files changed, 542 insertions(+), 146 deletions(-) diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index 2c3d8e6ca63ca..0fad1009f3152 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(rm9200_mck_lock); + struct sck { char *n; char *p; @@ -137,9 +139,20 @@ static void __init at91rm9200_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "pllack"; parent_names[3] = "pllbck"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91rm9200_master_layout, - &rm9200_mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91rm9200_master_layout, + &rm9200_mck_characteristics, + &rm9200_mck_lock, CLK_SET_RATE_GATE, + INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91rm9200_master_layout, + &rm9200_mck_characteristics, + &rm9200_mck_lock, CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -181,7 +194,7 @@ static void __init at91rm9200_pmc_setup(struct device_node *np) for (i = 0; i < ARRAY_SIZE(at91rm9200_periphck); i++) { hw = at91_clk_register_peripheral(regmap, at91rm9200_periphck[i].n, - "masterck", + "masterck_div", at91rm9200_periphck[i].id); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9260.c b/drivers/clk/at91/at91sam9260.c index bb81ff731ad83..ceb5495f723af 100644 --- a/drivers/clk/at91/at91sam9260.c +++ b/drivers/clk/at91/at91sam9260.c @@ -32,6 +32,8 @@ struct at91sam926x_data { bool has_slck; }; +static DEFINE_SPINLOCK(at91sam9260_mck_lock); + static const struct clk_master_characteristics sam9260_mck_characteristics = { .output = { .min = 0, .max = 105000000 }, .divisors = { 1, 2, 4, 0 }, @@ -218,8 +220,8 @@ static const struct sck at91sam9261_systemck[] = { { .n = "pck1", .p = "prog1", .id = 9 }, { .n = "pck2", .p = "prog2", .id = 10 }, { .n = "pck3", .p = "prog3", .id = 11 }, - { .n = "hclk0", .p = "masterck", .id = 16 }, - { .n = "hclk1", .p = "masterck", .id = 17 }, + { .n = "hclk0", .p = "masterck_div", .id = 16 }, + { .n = "hclk1", .p = "masterck_div", .id = 17 }, }; static const struct pck at91sam9261_periphck[] = { @@ -413,9 +415,21 @@ static void __init at91sam926x_pmc_setup(struct device_node *np, parent_names[1] = "mainck"; parent_names[2] = "pllack"; parent_names[3] = "pllbck"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91rm9200_master_layout, - data->mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91rm9200_master_layout, + data->mck_characteristics, + &at91sam9260_mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91rm9200_master_layout, + data->mck_characteristics, + &at91sam9260_mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -457,7 +471,7 @@ static void __init at91sam926x_pmc_setup(struct device_node *np, for (i = 0; i < data->num_pck; i++) { hw = at91_clk_register_peripheral(regmap, data->pck[i].n, - "masterck", + "masterck_div", data->pck[i].id); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9g45.c b/drivers/clk/at91/at91sam9g45.c index cb4a406ed15dd..0214333dedd33 100644 --- a/drivers/clk/at91/at91sam9g45.c +++ b/drivers/clk/at91/at91sam9g45.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(at91sam9g45_mck_lock); + static const struct clk_master_characteristics mck_characteristics = { .output = { .min = 0, .max = 133333333 }, .divisors = { 1, 2, 4, 3 }, @@ -40,10 +42,10 @@ static const struct { char *p; u8 id; } at91sam9g45_systemck[] = { - { .n = "ddrck", .p = "masterck", .id = 2 }, - { .n = "uhpck", .p = "usbck", .id = 6 }, - { .n = "pck0", .p = "prog0", .id = 8 }, - { .n = "pck1", .p = "prog1", .id = 9 }, + { .n = "ddrck", .p = "masterck_div", .id = 2 }, + { .n = "uhpck", .p = "usbck", .id = 6 }, + { .n = "pck0", .p = "prog0", .id = 8 }, + { .n = "pck1", .p = "prog1", .id = 9 }, }; struct pck { @@ -148,9 +150,21 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91rm9200_master_layout, - &mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91rm9200_master_layout, + &mck_characteristics, + &at91sam9g45_mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91rm9200_master_layout, + &mck_characteristics, + &at91sam9g45_mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -166,7 +180,7 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; for (i = 0; i < 2; i++) { char name[6]; @@ -195,7 +209,7 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np) for (i = 0; i < ARRAY_SIZE(at91sam9g45_periphck); i++) { hw = at91_clk_register_peripheral(regmap, at91sam9g45_periphck[i].n, - "masterck", + "masterck_div", at91sam9g45_periphck[i].id); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9n12.c b/drivers/clk/at91/at91sam9n12.c index 93f7eb216122a..f9db5316a7f1b 100644 --- a/drivers/clk/at91/at91sam9n12.c +++ b/drivers/clk/at91/at91sam9n12.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(at91sam9n12_mck_lock); + static const struct clk_master_characteristics mck_characteristics = { .output = { .min = 0, .max = 133333333 }, .divisors = { 1, 2, 4, 3 }, @@ -54,12 +56,12 @@ static const struct { char *p; u8 id; } at91sam9n12_systemck[] = { - { .n = "ddrck", .p = "masterck", .id = 2 }, - { .n = "lcdck", .p = "masterck", .id = 3 }, - { .n = "uhpck", .p = "usbck", .id = 6 }, - { .n = "udpck", .p = "usbck", .id = 7 }, - { .n = "pck0", .p = "prog0", .id = 8 }, - { .n = "pck1", .p = "prog1", .id = 9 }, + { .n = "ddrck", .p = "masterck_div", .id = 2 }, + { .n = "lcdck", .p = "masterck_div", .id = 3 }, + { .n = "uhpck", .p = "usbck", .id = 6 }, + { .n = "udpck", .p = "usbck", .id = 7 }, + { .n = "pck0", .p = "prog0", .id = 8 }, + { .n = "pck1", .p = "prog1", .id = 9 }, }; static const struct clk_pcr_layout at91sam9n12_pcr_layout = { @@ -175,9 +177,21 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "pllbck"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91sam9x5_master_layout, - &mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91sam9x5_master_layout, + &mck_characteristics, + &at91sam9n12_mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91sam9x5_master_layout, + &mck_characteristics, + &at91sam9n12_mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -191,7 +205,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "pllbck"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; for (i = 0; i < 2; i++) { char name[6]; @@ -221,7 +235,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, &at91sam9n12_pcr_layout, at91sam9n12_periphck[i].n, - "masterck", + "masterck_div", at91sam9n12_periphck[i].id, &range, INT_MIN); if (IS_ERR(hw)) diff --git a/drivers/clk/at91/at91sam9rl.c b/drivers/clk/at91/at91sam9rl.c index a343eb69bb353..66736e03cfef4 100644 --- a/drivers/clk/at91/at91sam9rl.c +++ b/drivers/clk/at91/at91sam9rl.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(sam9rl_mck_lock); + static const struct clk_master_characteristics sam9rl_mck_characteristics = { .output = { .min = 0, .max = 94000000 }, .divisors = { 1, 2, 4, 0 }, @@ -117,9 +119,20 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "pllack"; parent_names[3] = "utmick"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91rm9200_master_layout, - &sam9rl_mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91rm9200_master_layout, + &sam9rl_mck_characteristics, + &sam9rl_mck_lock, CLK_SET_RATE_GATE, + INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91rm9200_master_layout, + &sam9rl_mck_characteristics, + &sam9rl_mck_lock, CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -129,7 +142,7 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "pllack"; parent_names[3] = "utmick"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; for (i = 0; i < 2; i++) { char name[6]; @@ -158,7 +171,7 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np) for (i = 0; i < ARRAY_SIZE(at91sam9rl_periphck); i++) { hw = at91_clk_register_peripheral(regmap, at91sam9rl_periphck[i].n, - "masterck", + "masterck_div", at91sam9rl_periphck[i].id); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c index 22b9aad9efb8d..79b9d3667228d 100644 --- a/drivers/clk/at91/at91sam9x5.c +++ b/drivers/clk/at91/at91sam9x5.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(mck_lock); + static const struct clk_master_characteristics mck_characteristics = { .output = { .min = 0, .max = 133333333 }, .divisors = { 1, 2, 4, 3 }, @@ -41,7 +43,7 @@ static const struct { char *p; u8 id; } at91sam9x5_systemck[] = { - { .n = "ddrck", .p = "masterck", .id = 2 }, + { .n = "ddrck", .p = "masterck_div", .id = 2 }, { .n = "smdck", .p = "smdclk", .id = 4 }, { .n = "uhpck", .p = "usbck", .id = 6 }, { .n = "udpck", .p = "usbck", .id = 7 }, @@ -196,9 +198,19 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91sam9x5_master_layout, - &mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -218,7 +230,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; for (i = 0; i < 2; i++) { char name[6]; @@ -245,7 +257,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, } if (has_lcdck) { - hw = at91_clk_register_system(regmap, "lcdck", "masterck", 3); + hw = at91_clk_register_system(regmap, "lcdck", "masterck_div", 3); if (IS_ERR(hw)) goto err_free; @@ -256,7 +268,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, &at91sam9x5_pcr_layout, at91sam9x5_periphck[i].n, - "masterck", + "masterck_div", at91sam9x5_periphck[i].id, &range, INT_MIN); if (IS_ERR(hw)) @@ -269,7 +281,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, &at91sam9x5_pcr_layout, extra_pcks[i].n, - "masterck", + "masterck_div", extra_pcks[i].id, &range, INT_MIN); if (IS_ERR(hw)) diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index aafd003b30cfb..a80427980bf73 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -58,83 +58,309 @@ static inline bool clk_master_ready(struct clk_master *master) static int clk_master_prepare(struct clk_hw *hw) { struct clk_master *master = to_clk_master(hw); + unsigned long flags; + + spin_lock_irqsave(master->lock, flags); while (!clk_master_ready(master)) cpu_relax(); + spin_unlock_irqrestore(master->lock, flags); + return 0; } static int clk_master_is_prepared(struct clk_hw *hw) { struct clk_master *master = to_clk_master(hw); + unsigned long flags; + bool status; - return clk_master_ready(master); + spin_lock_irqsave(master->lock, flags); + status = clk_master_ready(master); + spin_unlock_irqrestore(master->lock, flags); + + return status; } -static unsigned long clk_master_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) +static unsigned long clk_master_div_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) { - u8 pres; u8 div; - unsigned long rate = parent_rate; + unsigned long flags, rate = parent_rate; struct clk_master *master = to_clk_master(hw); const struct clk_master_layout *layout = master->layout; const struct clk_master_characteristics *characteristics = master->characteristics; unsigned int mckr; + spin_lock_irqsave(master->lock, flags); regmap_read(master->regmap, master->layout->offset, &mckr); + spin_unlock_irqrestore(master->lock, flags); + mckr &= layout->mask; - pres = (mckr >> layout->pres_shift) & MASTER_PRES_MASK; div = (mckr >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK; - if (characteristics->have_div3_pres && pres == MASTER_PRES_MAX) - rate /= 3; - else - rate >>= pres; - rate /= characteristics->divisors[div]; if (rate < characteristics->output.min) - pr_warn("master clk is underclocked"); + pr_warn("master clk div is underclocked"); else if (rate > characteristics->output.max) - pr_warn("master clk is overclocked"); + pr_warn("master clk div is overclocked"); return rate; } -static u8 clk_master_get_parent(struct clk_hw *hw) +static const struct clk_ops master_div_ops = { + .prepare = clk_master_prepare, + .is_prepared = clk_master_is_prepared, + .recalc_rate = clk_master_div_recalc_rate, +}; + +static int clk_master_div_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + const struct clk_master_characteristics *characteristics = + master->characteristics; + unsigned long flags; + int div, i; + + div = DIV_ROUND_CLOSEST(parent_rate, rate); + if (div > ARRAY_SIZE(characteristics->divisors)) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(characteristics->divisors); i++) { + if (!characteristics->divisors[i]) + break; + + if (div == characteristics->divisors[i]) { + div = i; + break; + } + } + + if (i == ARRAY_SIZE(characteristics->divisors)) + return -EINVAL; + + spin_lock_irqsave(master->lock, flags); + regmap_update_bits(master->regmap, master->layout->offset, + (MASTER_DIV_MASK << MASTER_DIV_SHIFT), + (div << MASTER_DIV_SHIFT)); + while (!clk_master_ready(master)) + cpu_relax(); + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static int clk_master_div_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_master *master = to_clk_master(hw); + const struct clk_master_characteristics *characteristics = + master->characteristics; + struct clk_hw *parent; + unsigned long parent_rate, tmp_rate, best_rate = 0; + int i, best_diff = INT_MIN, tmp_diff; + + parent = clk_hw_get_parent(hw); + if (!parent) + return -EINVAL; + + parent_rate = clk_hw_get_rate(parent); + if (!parent_rate) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(characteristics->divisors); i++) { + if (!characteristics->divisors[i]) + break; + + tmp_rate = DIV_ROUND_CLOSEST_ULL(parent_rate, + characteristics->divisors[i]); + tmp_diff = abs(tmp_rate - req->rate); + + if (!best_rate || best_diff > tmp_diff) { + best_diff = tmp_diff; + best_rate = tmp_rate; + } + + if (!best_diff) + break; + } + + req->best_parent_rate = best_rate; + req->best_parent_hw = parent; + req->rate = best_rate; + + return 0; +} + +static const struct clk_ops master_div_ops_chg = { + .prepare = clk_master_prepare, + .is_prepared = clk_master_is_prepared, + .recalc_rate = clk_master_div_recalc_rate, + .determine_rate = clk_master_div_determine_rate, + .set_rate = clk_master_div_set_rate, +}; + +static void clk_sama7g5_master_best_diff(struct clk_rate_request *req, + struct clk_hw *parent, + unsigned long parent_rate, + long *best_rate, + long *best_diff, + u32 div) +{ + unsigned long tmp_rate, tmp_diff; + + if (div == MASTER_PRES_MAX) + tmp_rate = parent_rate / 3; + else + tmp_rate = parent_rate >> div; + + tmp_diff = abs(req->rate - tmp_rate); + + if (*best_diff < 0 || *best_diff >= tmp_diff) { + *best_rate = tmp_rate; + *best_diff = tmp_diff; + req->best_parent_rate = parent_rate; + req->best_parent_hw = parent; + } +} + +static int clk_master_pres_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_master *master = to_clk_master(hw); + struct clk_rate_request req_parent = *req; + const struct clk_master_characteristics *characteristics = + master->characteristics; + struct clk_hw *parent; + long best_rate = LONG_MIN, best_diff = LONG_MIN; + u32 pres; + int i; + + if (master->chg_pid < 0) + return -EOPNOTSUPP; + + parent = clk_hw_get_parent_by_index(hw, master->chg_pid); + if (!parent) + return -EOPNOTSUPP; + + for (i = 0; i <= MASTER_PRES_MAX; i++) { + if (characteristics->have_div3_pres && i == MASTER_PRES_MAX) + pres = 3; + else + pres = 1 << i; + + req_parent.rate = req->rate * pres; + if (__clk_determine_rate(parent, &req_parent)) + continue; + + clk_sama7g5_master_best_diff(req, parent, req_parent.rate, + &best_diff, &best_rate, pres); + if (!best_diff) + break; + } + + return 0; +} + +static int clk_master_pres_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + unsigned int pres; + + pres = DIV_ROUND_CLOSEST(parent_rate, rate); + if (pres > MASTER_PRES_MAX) + return -EINVAL; + + else if (pres == 3) + pres = MASTER_PRES_MAX; + else + pres = ffs(pres) - 1; + + spin_lock_irqsave(master->lock, flags); + regmap_update_bits(master->regmap, master->layout->offset, + (MASTER_PRES_MASK << master->layout->pres_shift), + (pres << master->layout->pres_shift)); + + while (!clk_master_ready(master)) + cpu_relax(); + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static unsigned long clk_master_pres_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + const struct clk_master_characteristics *characteristics = + master->characteristics; + unsigned long flags; + unsigned int val, pres; + + spin_lock_irqsave(master->lock, flags); + regmap_read(master->regmap, master->layout->offset, &val); + spin_unlock_irqrestore(master->lock, flags); + + pres = (val >> master->layout->pres_shift) & MASTER_PRES_MASK; + if (pres == 3 && characteristics->have_div3_pres) + pres = 3; + else + pres = (1 << pres); + + return DIV_ROUND_CLOSEST_ULL(parent_rate, pres); +} + +static u8 clk_master_pres_get_parent(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; unsigned int mckr; + spin_lock_irqsave(master->lock, flags); regmap_read(master->regmap, master->layout->offset, &mckr); + spin_unlock_irqrestore(master->lock, flags); return mckr & AT91_PMC_CSS; } -static const struct clk_ops master_ops = { +static const struct clk_ops master_pres_ops = { .prepare = clk_master_prepare, .is_prepared = clk_master_is_prepared, - .recalc_rate = clk_master_recalc_rate, - .get_parent = clk_master_get_parent, + .recalc_rate = clk_master_pres_recalc_rate, + .get_parent = clk_master_pres_get_parent, }; -struct clk_hw * __init -at91_clk_register_master(struct regmap *regmap, +static const struct clk_ops master_pres_ops_chg = { + .prepare = clk_master_prepare, + .is_prepared = clk_master_is_prepared, + .determine_rate = clk_master_pres_determine_rate, + .recalc_rate = clk_master_pres_recalc_rate, + .get_parent = clk_master_pres_get_parent, + .set_rate = clk_master_pres_set_rate, +}; + +static struct clk_hw * __init +at91_clk_register_master_internal(struct regmap *regmap, const char *name, int num_parents, const char **parent_names, const struct clk_master_layout *layout, - const struct clk_master_characteristics *characteristics) + const struct clk_master_characteristics *characteristics, + const struct clk_ops *ops, spinlock_t *lock, u32 flags, + int chg_pid) { struct clk_master *master; struct clk_init_data init; struct clk_hw *hw; int ret; - if (!name || !num_parents || !parent_names) + if (!name || !num_parents || !parent_names || !lock) return ERR_PTR(-EINVAL); master = kzalloc(sizeof(*master), GFP_KERNEL); @@ -142,15 +368,17 @@ at91_clk_register_master(struct regmap *regmap, return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &master_ops; + init.ops = ops; init.parent_names = parent_names; init.num_parents = num_parents; - init.flags = 0; + init.flags = flags; master->hw.init = &init; master->layout = layout; master->characteristics = characteristics; master->regmap = regmap; + master->chg_pid = chg_pid; + master->lock = lock; hw = &master->hw; ret = clk_hw_register(NULL, &master->hw); @@ -162,37 +390,54 @@ at91_clk_register_master(struct regmap *regmap, return hw; } -static unsigned long -clk_sama7g5_master_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) +struct clk_hw * __init +at91_clk_register_master_pres(struct regmap *regmap, + const char *name, int num_parents, + const char **parent_names, + const struct clk_master_layout *layout, + const struct clk_master_characteristics *characteristics, + spinlock_t *lock, u32 flags, int chg_pid) { - struct clk_master *master = to_clk_master(hw); + const struct clk_ops *ops; - return DIV_ROUND_CLOSEST_ULL(parent_rate, (1 << master->div)); + if (flags & CLK_SET_RATE_GATE) + ops = &master_pres_ops; + else + ops = &master_pres_ops_chg; + + return at91_clk_register_master_internal(regmap, name, num_parents, + parent_names, layout, + characteristics, ops, + lock, flags, chg_pid); } -static void clk_sama7g5_master_best_diff(struct clk_rate_request *req, - struct clk_hw *parent, - unsigned long parent_rate, - long *best_rate, - long *best_diff, - u32 div) +struct clk_hw * __init +at91_clk_register_master_div(struct regmap *regmap, + const char *name, const char *parent_name, + const struct clk_master_layout *layout, + const struct clk_master_characteristics *characteristics, + spinlock_t *lock, u32 flags) { - unsigned long tmp_rate, tmp_diff; + const struct clk_ops *ops; - if (div == MASTER_PRES_MAX) - tmp_rate = parent_rate / 3; + if (flags & CLK_SET_RATE_GATE) + ops = &master_div_ops; else - tmp_rate = parent_rate >> div; + ops = &master_div_ops_chg; - tmp_diff = abs(req->rate - tmp_rate); + return at91_clk_register_master_internal(regmap, name, 1, + &parent_name, layout, + characteristics, ops, + lock, flags, -EINVAL); +} - if (*best_diff < 0 || *best_diff >= tmp_diff) { - *best_rate = tmp_rate; - *best_diff = tmp_diff; - req->best_parent_rate = parent_rate; - req->best_parent_hw = parent; - } +static unsigned long +clk_sama7g5_master_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + + return DIV_ROUND_CLOSEST_ULL(parent_rate, (1 << master->div)); } static int clk_sama7g5_master_determine_rate(struct clk_hw *hw, diff --git a/drivers/clk/at91/dt-compat.c b/drivers/clk/at91/dt-compat.c index a50084de97d41..a97b99c2dc127 100644 --- a/drivers/clk/at91/dt-compat.c +++ b/drivers/clk/at91/dt-compat.c @@ -24,6 +24,8 @@ #define GCK_INDEX_DT_AUDIO_PLL 5 +static DEFINE_SPINLOCK(mck_lock); + #ifdef CONFIG_HAVE_AT91_AUDIO_PLL static void __init of_sama5d2_clk_audio_pll_frac_setup(struct device_node *np) { @@ -388,9 +390,16 @@ of_at91_clk_master_setup(struct device_node *np, if (IS_ERR(regmap)) return; - hw = at91_clk_register_master(regmap, name, num_parents, - parent_names, layout, - characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", num_parents, + parent_names, layout, + characteristics, &mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto out_free_characteristics; + + hw = at91_clk_register_master_div(regmap, name, "masterck_pres", + layout, characteristics, + &mck_lock, CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto out_free_characteristics; diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index bedcd85ad7509..a49076c804a9a 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -155,10 +155,18 @@ at91_clk_register_sam9x5_main(struct regmap *regmap, const char *name, const char **parent_names, int num_parents); struct clk_hw * __init -at91_clk_register_master(struct regmap *regmap, const char *name, - int num_parents, const char **parent_names, - const struct clk_master_layout *layout, - const struct clk_master_characteristics *characteristics); +at91_clk_register_master_pres(struct regmap *regmap, const char *name, + int num_parents, const char **parent_names, + const struct clk_master_layout *layout, + const struct clk_master_characteristics *characteristics, + spinlock_t *lock, u32 flags, int chg_pid); + +struct clk_hw * __init +at91_clk_register_master_div(struct regmap *regmap, const char *name, + const char *parent_names, + const struct clk_master_layout *layout, + const struct clk_master_characteristics *characteristics, + spinlock_t *lock, u32 flags); struct clk_hw * __init at91_clk_sama7g5_register_master(struct regmap *regmap, diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index dd62bb2880cf0..240e48149bcdc 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -8,6 +8,7 @@ #include "pmc.h" static DEFINE_SPINLOCK(pmc_pll_lock); +static DEFINE_SPINLOCK(mck_lock); static const struct clk_master_characteristics mck_characteristics = { .output = { .min = 140000000, .max = 200000000 }, @@ -76,11 +77,11 @@ static const struct { char *p; u8 id; } sam9x60_systemck[] = { - { .n = "ddrck", .p = "masterck", .id = 2 }, + { .n = "ddrck", .p = "masterck_div", .id = 2 }, { .n = "uhpck", .p = "usbck", .id = 6 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, - { .n = "qspick", .p = "masterck", .id = 19 }, + { .n = "qspick", .p = "masterck_div", .id = 19 }, }; static const struct { @@ -272,9 +273,17 @@ static void __init sam9x60_pmc_setup(struct device_node *np) parent_names[0] = md_slck_name; parent_names[1] = "mainck"; parent_names[2] = "pllack_divck"; - hw = at91_clk_register_master(regmap, "masterck", 3, parent_names, - &sam9x60_master_layout, - &mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 3, + parent_names, &sam9x60_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", &sam9x60_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -290,7 +299,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) parent_names[0] = md_slck_name; parent_names[1] = td_slck_name; parent_names[2] = "mainck"; - parent_names[3] = "masterck"; + parent_names[3] = "masterck_div"; parent_names[4] = "pllack_divck"; parent_names[5] = "upllck_divck"; for (i = 0; i < 2; i++) { @@ -322,7 +331,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, &sam9x60_pcr_layout, sam9x60_periphck[i].n, - "masterck", + "masterck_div", sam9x60_periphck[i].id, &range, INT_MIN); if (IS_ERR(hw)) diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index 8b220762941ab..9a5cbc7cd55a4 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(mck_lock); + static const struct clk_master_characteristics mck_characteristics = { .output = { .min = 124000000, .max = 166000000 }, .divisors = { 1, 2, 4, 3 }, @@ -40,14 +42,14 @@ static const struct { char *p; u8 id; } sama5d2_systemck[] = { - { .n = "ddrck", .p = "masterck", .id = 2 }, - { .n = "lcdck", .p = "masterck", .id = 3 }, - { .n = "uhpck", .p = "usbck", .id = 6 }, - { .n = "udpck", .p = "usbck", .id = 7 }, - { .n = "pck0", .p = "prog0", .id = 8 }, - { .n = "pck1", .p = "prog1", .id = 9 }, - { .n = "pck2", .p = "prog2", .id = 10 }, - { .n = "iscck", .p = "masterck", .id = 18 }, + { .n = "ddrck", .p = "masterck_div", .id = 2 }, + { .n = "lcdck", .p = "masterck_div", .id = 3 }, + { .n = "uhpck", .p = "usbck", .id = 6 }, + { .n = "udpck", .p = "usbck", .id = 7 }, + { .n = "pck0", .p = "prog0", .id = 8 }, + { .n = "pck1", .p = "prog1", .id = 9 }, + { .n = "pck2", .p = "prog2", .id = 10 }, + { .n = "iscck", .p = "masterck_div", .id = 18 }, }; static const struct { @@ -235,15 +237,25 @@ static void __init sama5d2_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91sam9x5_master_layout, - &mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; sama5d2_pmc->chws[PMC_MCK] = hw; - hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck"); + hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck_div"); if (IS_ERR(hw)) goto err_free; @@ -259,7 +271,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; parent_names[5] = "audiopll_pmcck"; for (i = 0; i < 3; i++) { char name[6]; @@ -290,7 +302,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, &sama5d2_pcr_layout, sama5d2_periphck[i].n, - "masterck", + "masterck_div", sama5d2_periphck[i].id, &range, INT_MIN); if (IS_ERR(hw)) @@ -317,7 +329,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; parent_names[5] = "audiopll_pmcck"; for (i = 0; i < ARRAY_SIZE(sama5d2_gck); i++) { hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, diff --git a/drivers/clk/at91/sama5d3.c b/drivers/clk/at91/sama5d3.c index 7c6e0a5b9dc87..87009ee8effc9 100644 --- a/drivers/clk/at91/sama5d3.c +++ b/drivers/clk/at91/sama5d3.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(mck_lock); + static const struct clk_master_characteristics mck_characteristics = { .output = { .min = 0, .max = 166000000 }, .divisors = { 1, 2, 4, 3 }, @@ -40,14 +42,14 @@ static const struct { char *p; u8 id; } sama5d3_systemck[] = { - { .n = "ddrck", .p = "masterck", .id = 2 }, - { .n = "lcdck", .p = "masterck", .id = 3 }, - { .n = "smdck", .p = "smdclk", .id = 4 }, - { .n = "uhpck", .p = "usbck", .id = 6 }, - { .n = "udpck", .p = "usbck", .id = 7 }, - { .n = "pck0", .p = "prog0", .id = 8 }, - { .n = "pck1", .p = "prog1", .id = 9 }, - { .n = "pck2", .p = "prog2", .id = 10 }, + { .n = "ddrck", .p = "masterck_div", .id = 2 }, + { .n = "lcdck", .p = "masterck_div", .id = 3 }, + { .n = "smdck", .p = "smdclk", .id = 4 }, + { .n = "uhpck", .p = "usbck", .id = 6 }, + { .n = "udpck", .p = "usbck", .id = 7 }, + { .n = "pck0", .p = "prog0", .id = 8 }, + { .n = "pck1", .p = "prog1", .id = 9 }, + { .n = "pck2", .p = "prog2", .id = 10 }, }; static const struct { @@ -170,9 +172,19 @@ static void __init sama5d3_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91sam9x5_master_layout, - &mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; @@ -192,7 +204,7 @@ static void __init sama5d3_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; for (i = 0; i < 3; i++) { char name[6]; @@ -222,7 +234,7 @@ static void __init sama5d3_pmc_setup(struct device_node *np) hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, &sama5d3_pcr_layout, sama5d3_periphck[i].n, - "masterck", + "masterck_div", sama5d3_periphck[i].id, &sama5d3_periphck[i].r, INT_MIN); diff --git a/drivers/clk/at91/sama5d4.c b/drivers/clk/at91/sama5d4.c index 92d8d4141b433..57fff790188b6 100644 --- a/drivers/clk/at91/sama5d4.c +++ b/drivers/clk/at91/sama5d4.c @@ -7,6 +7,8 @@ #include "pmc.h" +static DEFINE_SPINLOCK(mck_lock); + static const struct clk_master_characteristics mck_characteristics = { .output = { .min = 125000000, .max = 200000000 }, .divisors = { 1, 2, 4, 3 }, @@ -39,14 +41,14 @@ static const struct { char *p; u8 id; } sama5d4_systemck[] = { - { .n = "ddrck", .p = "masterck", .id = 2 }, - { .n = "lcdck", .p = "masterck", .id = 3 }, - { .n = "smdck", .p = "smdclk", .id = 4 }, - { .n = "uhpck", .p = "usbck", .id = 6 }, - { .n = "udpck", .p = "usbck", .id = 7 }, - { .n = "pck0", .p = "prog0", .id = 8 }, - { .n = "pck1", .p = "prog1", .id = 9 }, - { .n = "pck2", .p = "prog2", .id = 10 }, + { .n = "ddrck", .p = "masterck_div", .id = 2 }, + { .n = "lcdck", .p = "masterck_div", .id = 3 }, + { .n = "smdck", .p = "smdclk", .id = 4 }, + { .n = "uhpck", .p = "usbck", .id = 6 }, + { .n = "udpck", .p = "usbck", .id = 7 }, + { .n = "pck0", .p = "prog0", .id = 8 }, + { .n = "pck1", .p = "prog1", .id = 9 }, + { .n = "pck2", .p = "prog2", .id = 10 }, }; static const struct { @@ -185,15 +187,25 @@ static void __init sama5d4_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - hw = at91_clk_register_master(regmap, "masterck", 4, parent_names, - &at91sam9x5_master_layout, - &mck_characteristics); + hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4, + parent_names, + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE, INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "masterck_div", + "masterck_pres", + &at91sam9x5_master_layout, + &mck_characteristics, &mck_lock, + CLK_SET_RATE_GATE); if (IS_ERR(hw)) goto err_free; sama5d4_pmc->chws[PMC_MCK] = hw; - hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck"); + hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck_div"); if (IS_ERR(hw)) goto err_free; @@ -215,7 +227,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "plladivck"; parent_names[3] = "utmick"; - parent_names[4] = "masterck"; + parent_names[4] = "masterck_div"; for (i = 0; i < 3; i++) { char name[6]; @@ -245,7 +257,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np) hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, &sama5d4_pcr_layout, sama5d4_periphck[i].n, - "masterck", + "masterck_div", sama5d4_periphck[i].id, &range, INT_MIN); if (IS_ERR(hw)) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index e0c4d2eb9f597..927eb3b2b126d 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -32,6 +32,7 @@ } while (0) static DEFINE_SPINLOCK(pmc_pll_lock); +static DEFINE_SPINLOCK(pmc_mck0_lock); static DEFINE_SPINLOCK(pmc_mckX_lock); /** @@ -984,8 +985,16 @@ static void __init sama7g5_pmc_setup(struct device_node *np) parent_names[1] = "mainck"; parent_names[2] = "cpupll_divpmcck"; parent_names[3] = "syspll_divpmcck"; - hw = at91_clk_register_master(regmap, "mck0", 4, parent_names, - &mck0_layout, &mck0_characteristics); + hw = at91_clk_register_master_pres(regmap, "mck0_pres", 4, parent_names, + &mck0_layout, &mck0_characteristics, + &pmc_mck0_lock, + CLK_SET_RATE_PARENT, 0); + if (IS_ERR(hw)) + goto err_free; + + hw = at91_clk_register_master_div(regmap, "mck0_div", "mck0_pres", + &mck0_layout, &mck0_characteristics, + &pmc_mck0_lock, 0); if (IS_ERR(hw)) goto err_free; -- GitLab From 91f3bf0d5315ea3f139ae440f2b7772ecdcd67ec Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 19 Nov 2020 17:43:17 +0200 Subject: [PATCH 0912/1894] clk: at91: sama7g5: register cpu clock Register CPU clock as being the master clock prescaler. This would be used by DVFS. The block schema of SAMA7G5's PMC contains also a divider between master clock prescaler and CPU (PMC_CPU_RATIO.RATIO) but the frequencies supported by SAMA7G5 could be directly received from CPUPLL + master clock prescaler and the extra divider would do no work in case it would be enabled. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1605800597-16720-12-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sama7g5.c | 13 ++++++------- include/dt-bindings/clock/at91.h | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 927eb3b2b126d..a6e20b35960e0 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -904,7 +904,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) if (IS_ERR(regmap)) return; - sama7g5_pmc = pmc_data_allocate(PMC_ETHPLL + 1, + sama7g5_pmc = pmc_data_allocate(PMC_CPU + 1, nck(sama7g5_systemck), nck(sama7g5_periphck), nck(sama7g5_gck), 8); @@ -981,18 +981,17 @@ static void __init sama7g5_pmc_setup(struct device_node *np) } } - parent_names[0] = md_slck_name; - parent_names[1] = "mainck"; - parent_names[2] = "cpupll_divpmcck"; - parent_names[3] = "syspll_divpmcck"; - hw = at91_clk_register_master_pres(regmap, "mck0_pres", 4, parent_names, + parent_names[0] = "cpupll_divpmcck"; + hw = at91_clk_register_master_pres(regmap, "cpuck", 1, parent_names, &mck0_layout, &mck0_characteristics, &pmc_mck0_lock, CLK_SET_RATE_PARENT, 0); if (IS_ERR(hw)) goto err_free; - hw = at91_clk_register_master_div(regmap, "mck0_div", "mck0_pres", + sama7g5_pmc->chws[PMC_CPU] = hw; + + hw = at91_clk_register_master_div(regmap, "mck0", "cpuck", &mck0_layout, &mck0_characteristics, &pmc_mck0_lock, 0); if (IS_ERR(hw)) diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index fab313f62e8f5..98e1b2ab64039 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -34,6 +34,7 @@ #define PMC_AUDIOPMCPLL (PMC_MAIN + 6) #define PMC_AUDIOIOPLL (PMC_MAIN + 7) #define PMC_ETHPLL (PMC_MAIN + 8) +#define PMC_CPU (PMC_MAIN + 9) #ifndef AT91_PMC_MOSCS #define AT91_PMC_MOSCS 0 /* MOSCS Flag */ -- GitLab From 6b9bae63de4fe24365ad0c2d23e77ae06f8c58e4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:57 -0800 Subject: [PATCH 0913/1894] perf script: Support data page size Display the data page size if it is available and asked by the user: Can be configured by the user, for example: perf script --fields comm,event,phys_addr,data_page_size dtlb mem-loads:uP: 3fec82ea8 4K dtlb mem-loads:uP: 3fec82e90 4K dtlb mem-loads:uP: 3e23700a4 4K dtlb mem-loads:uP: 3fec82f20 4K dtlb mem-loads:uP: 3e23700a4 4K dtlb mem-loads:uP: 3b4211bec 4K dtlb mem-loads:uP: 382205dc0 2M dtlb mem-loads:uP: 36fa082c0 2M dtlb mem-loads:uP: 377607340 2M dtlb mem-loads:uP: 330010180 2M dtlb mem-loads:uP: 33200fd80 2M dtlb mem-loads:uP: 31b012b80 2M Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 5 +++-- tools/perf/builtin-script.c | 17 +++++++++++++++-- tools/perf/util/event.h | 3 +++ tools/perf/util/session.c | 13 +++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4f712fb8f175d..44d37210fc8ff 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,8 +116,9 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc. + srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, + brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, + metric, misc, srccode, ipc, data_page_size. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1c322c1291855..edacfa98d073b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -30,6 +30,7 @@ #include "util/thread-stack.h" #include "util/time-utils.h" #include "util/path.h" +#include "util/event.h" #include "ui/ui.h" #include "print_binary.h" #include "archinsn.h" @@ -115,6 +116,7 @@ enum perf_output_field { PERF_OUTPUT_SRCCODE = 1ULL << 30, PERF_OUTPUT_IPC = 1ULL << 31, PERF_OUTPUT_TOD = 1ULL << 32, + PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, }; struct perf_script { @@ -179,6 +181,7 @@ struct output_option { {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, {.str = "ipc", .field = PERF_OUTPUT_IPC}, {.str = "tod", .field = PERF_OUTPUT_TOD}, + {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, }; enum { @@ -251,7 +254,8 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | - PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR, + PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | + PERF_OUTPUT_DATA_PAGE_SIZE, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -499,6 +503,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) return -EINVAL; + if (PRINT_FIELD(DATA_PAGE_SIZE) && + evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE)) + return -EINVAL; + return 0; } @@ -1920,6 +1928,7 @@ static void process_event(struct perf_script *script, unsigned int type = output_type(attr->type); struct evsel_script *es = evsel->priv; FILE *fp = es->fp; + char str[PAGE_SIZE_NAME_LEN]; if (output[type].fields == 0) return; @@ -2008,6 +2017,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); + if (PRINT_FIELD(DATA_PAGE_SIZE)) + fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str)); + perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); @@ -3506,7 +3518,8 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod", + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," + "data_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 448ac30c2fc45..ff403ea578e11 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -409,4 +409,7 @@ extern int sysctl_perf_event_max_stack; extern int sysctl_perf_event_max_contexts_per_stack; extern unsigned int proc_map_timeout; +#define PAGE_SIZE_NAME_LEN 32 +char *get_page_size_name(u64 size, char *str); + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b3c50b127918..50ff9795a4f11 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -32,6 +32,7 @@ #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" +#include "units.h" #include #ifdef HAVE_ZSTD_SUPPORT @@ -1258,10 +1259,19 @@ static void dump_event(struct evlist *evlist, union perf_event *event, event->header.size, perf_event__name(event->header.type)); } +char *get_page_size_name(u64 size, char *str) +{ + if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size)) + snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A"); + + return str; +} + static void dump_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *sample) { u64 sample_type; + char str[PAGE_SIZE_NAME_LEN]; if (!dump_trace) return; @@ -1296,6 +1306,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_PHYS_ADDR) printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr); + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str)); + if (sample_type & PERF_SAMPLE_TRANSACTION) printf("... transaction: %" PRIx64 "\n", sample->transaction); -- GitLab From a50d03e3b8b68df13e47dcbde6c5d39b4237c479 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:58 -0800 Subject: [PATCH 0914/1894] perf sort: Add sort option for data page size Add a new sort option "data_page_size" for --mem-mode sort. With this option applied, perf can sort and report by sample's data page size. Here is an example: perf report --stdio --mem-mode --sort=comm,symbol,phys_daddr,data_page_size # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 9K of event 'mem-loads:uP' # Total weight : 9028 # Sort order : comm,symbol,phys_daddr,data_page_size # # Overhead Command Symbol Data Physical # Address # Data Page Size # ........ ....... ............................ # ...................... ...................... # 11.19% dtlb [.] touch_buffer [.] 0x00000003fec82ea8 4K 8.61% dtlb [.] GetTickCount [.] 0x00000003c4f2c8a8 4K 4.52% dtlb [.] GetTickCount [.] 0x00000003fec82f58 4K 4.33% dtlb [.] __gettimeofday [.] 0x00000003fec82f48 4K 4.32% dtlb [.] GetTickCount [.] 0x00000003fec82f78 4K 4.28% dtlb [.] GetTickCount [.] 0x00000003fec82f50 4K 4.23% dtlb [.] GetTickCount [.] 0x00000003fec82f70 4K 4.11% dtlb [.] GetTickCount [.] 0x00000003fec82f68 4K 4.00% dtlb [.] Calibrate [.] 0x00000003fec82f98 4K 3.91% dtlb [.] Calibrate [.] 0x00000003fec82f90 4K 3.43% dtlb [.] touch_buffer [.] 0x00000003fec82e98 4K 3.42% dtlb [.] touch_buffer [.] 0x00000003fec82e90 4K 0.09% dtlb [.] DoDependentLoads [.] 0x000000036ea084c0 2M 0.08% dtlb [.] DoDependentLoads [.] 0x000000032b010b80 2M Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.c | 3 +++ tools/perf/util/hist.h | 1 + tools/perf/util/machine.c | 7 ++++-- tools/perf/util/map_symbol.h | 1 + tools/perf/util/sort.c | 30 ++++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 7 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index d068103690ccc..8f7f4e9605d89 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -150,6 +150,7 @@ OPTIONS - snoop: type of snoop (if any) for the data at the time of the sample - dcacheline: the cacheline the data address is on at the time of the sample - phys_daddr: physical address of data being executed on at the time of sample + - data_page_size: the data page size of data being executed on at the time of sample And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7feeaa07c7776..a08fb9ea411bd 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -188,6 +188,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR, unresolved_col_width + 4 + 2); + hists__new_col_len(hists, HISTC_MEM_DATA_PAGE_SIZE, + unresolved_col_width + 4 + 2); + } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index df6c6eea09605..14f66330923d6 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -56,6 +56,7 @@ enum hist_column { HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_DSO, HISTC_MEM_PHYS_DADDR, + HISTC_MEM_DATA_PAGE_SIZE, HISTC_MEM_LOCKED, HISTC_MEM_TLB, HISTC_MEM_LVL, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1ae32a81639c6..f841f3503cae6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2023,11 +2023,12 @@ static void ip__resolve_ams(struct thread *thread, ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = 0; + ams->data_page_size = 0; } static void ip__resolve_data(struct thread *thread, u8 m, struct addr_map_symbol *ams, - u64 addr, u64 phys_addr) + u64 addr, u64 phys_addr, u64 daddr_page_size) { struct addr_location al; @@ -2041,6 +2042,7 @@ static void ip__resolve_data(struct thread *thread, ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = phys_addr; + ams->data_page_size = daddr_page_size; } struct mem_info *sample__resolve_mem(struct perf_sample *sample, @@ -2053,7 +2055,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); ip__resolve_data(al->thread, al->cpumode, &mi->daddr, - sample->addr, sample->phys_addr); + sample->addr, sample->phys_addr, + sample->data_page_size); mi->data_src.val = sample->data_src; return mi; diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index 5b8ca93798e9a..7d22ade082c8f 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -19,5 +19,6 @@ struct addr_map_symbol { u64 addr; u64 al_addr; u64 phys_addr; + u64 data_page_size; }; #endif // __PERF_MAP_SYMBOL diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7d87bfcffb3f6..80907bc32683a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1462,6 +1462,35 @@ struct sort_entry sort_mem_phys_daddr = { .se_width_idx = HISTC_MEM_PHYS_DADDR, }; +static int64_t +sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = left->mem_info->daddr.data_page_size; + if (right->mem_info) + r = right->mem_info->daddr.data_page_size; + + return (int64_t)(r - l); +} + +static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + char str[PAGE_SIZE_NAME_LEN]; + + return repsep_snprintf(bf, size, "%-*s", width, + get_page_size_name(he->mem_info->daddr.data_page_size, str)); +} + +struct sort_entry sort_mem_data_page_size = { + .se_header = "Data Page Size", + .se_cmp = sort__data_page_size_cmp, + .se_snprintf = hist_entry__data_page_size_snprintf, + .se_width_idx = HISTC_MEM_DATA_PAGE_SIZE, +}; + static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1740,6 +1769,7 @@ static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr), + DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 66d39c4cfe2b3..e50f2b695bc43 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -255,6 +255,7 @@ enum sort_type { SORT_MEM_DCACHELINE, SORT_MEM_IADDR_SYMBOL, SORT_MEM_PHYS_DADDR, + SORT_MEM_DATA_PAGE_SIZE, }; /* -- GitLab From 2e7f545096f954a9726c9415763dd0bfbcac47e0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:59 -0800 Subject: [PATCH 0915/1894] perf mem: Factor out a function to generate sort order Now, "--phys-data" is the only option which impacts the sort order. A simple "if else" is enough to handle the option. But there will be more options added, e.g. "--data-page-size", which also impact the sort order. The code will become too complex to be maintained. Divide the sort order string into several small pieces. The first piece is always the default sort string for LOAD/STORE. Appends the specific sort string if related option is applied. No functional change. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-4-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index fdfbff7592f43..823742036ddb2 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -298,11 +298,35 @@ out_delete: perf_session__delete(session); return ret; } +static char *get_sort_order(struct perf_mem *mem) +{ + bool has_extra_options = mem->phys_addr ? true : false; + char sort[128]; + + /* + * there is no weight (cost) associated with stores, so don't print + * the column + */ + if (!(mem->operation & MEM_OPERATION_LOAD)) { + strcpy(sort, "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"); + } else if (has_extra_options) { + strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr," + "dso_daddr,snoop,tlb,locked"); + } else + return NULL; + + if (mem->phys_addr) + strcat(sort, ",phys_daddr"); + + return strdup(sort); +} static int report_events(int argc, const char **argv, struct perf_mem *mem) { const char **rep_argv; int ret, i = 0, j, rep_argc; + char *new_sort_order; if (mem->dump_raw) return report_raw_events(mem); @@ -316,20 +340,9 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) rep_argv[i++] = "--mem-mode"; rep_argv[i++] = "-n"; /* display number of samples */ - /* - * there is no weight (cost) associated with stores, so don't print - * the column - */ - if (!(mem->operation & MEM_OPERATION_LOAD)) { - if (mem->phys_addr) - rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked,phys_daddr"; - else - rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"; - } else if (mem->phys_addr) - rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr," - "dso_daddr,snoop,tlb,locked,phys_daddr"; + new_sort_order = get_sort_order(mem); + if (new_sort_order) + rep_argv[i++] = new_sort_order; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; -- GitLab From 01324f9e88b5cfc1f4c26eef66bdcb52596c9af8 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 2 Dec 2020 13:58:15 +0100 Subject: [PATCH 0916/1894] clk: at91: sam9x60: remove atmel,osc-bypass support The sam9x60 doesn't have the MOSCXTBY bit to enable the crystal oscillator bypass. Fixes: 01e2113de9a5 ("clk: at91: add sam9x60 pmc driver") Reported-by: Claudiu Beznea Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201202125816.168618-1-alexandre.belloni@bootlin.com Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea Signed-off-by: Stephen Boyd --- drivers/clk/at91/sam9x60.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 240e48149bcdc..5f6fa89571b74 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -175,7 +175,6 @@ static void __init sam9x60_pmc_setup(struct device_node *np) struct regmap *regmap; struct clk_hw *hw; int i; - bool bypass; i = of_property_match_string(np, "clock-names", "td_slck"); if (i < 0) @@ -210,10 +209,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) if (IS_ERR(hw)) goto err_free; - bypass = of_property_read_bool(np, "atmel,osc-bypass"); - - hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name, - bypass); + hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name, 0); if (IS_ERR(hw)) goto err_free; main_osc_hw = hw; -- GitLab From 5142cbcea324909be03b176540c0c2f3975922b4 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 30 Nov 2020 10:10:33 +0100 Subject: [PATCH 0917/1894] clk: si5351: Wait for bit clear after PLL reset Documentation states that SI5351_PLL_RESET_B and SI5351_PLL_RESET_A bits are self clearing bits, so wait until they are cleared before continuing. This fixes a case when the clock doesn't come up properly after a PLL reset. It worked properly when the frequency was below 900MHz, but with 900MHz it only works when we are waiting for the bit to clear. Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20201130091033.1687-1-s.hauer@pengutronix.de Signed-off-by: Stephen Boyd --- drivers/clk/clk-si5351.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c index 1e1702e609cbb..57e4597cdf4c2 100644 --- a/drivers/clk/clk-si5351.c +++ b/drivers/clk/clk-si5351.c @@ -902,6 +902,10 @@ static int _si5351_clkout_set_disable_state( static void _si5351_clkout_reset_pll(struct si5351_driver_data *drvdata, int num) { u8 val = si5351_reg_read(drvdata, SI5351_CLK0_CTRL + num); + u8 mask = val & SI5351_CLK_PLL_SELECT ? SI5351_PLL_RESET_B : + SI5351_PLL_RESET_A; + unsigned int v; + int err; switch (val & SI5351_CLK_INPUT_MASK) { case SI5351_CLK_INPUT_XTAL: @@ -909,9 +913,12 @@ static void _si5351_clkout_reset_pll(struct si5351_driver_data *drvdata, int num return; /* pll not used, no need to reset */ } - si5351_reg_write(drvdata, SI5351_PLL_RESET, - val & SI5351_CLK_PLL_SELECT ? SI5351_PLL_RESET_B : - SI5351_PLL_RESET_A); + si5351_reg_write(drvdata, SI5351_PLL_RESET, mask); + + err = regmap_read_poll_timeout(drvdata->regmap, SI5351_PLL_RESET, v, + !(v & mask), 0, 20000); + if (err < 0) + dev_err(&drvdata->client->dev, "Reset bit didn't clear\n"); dev_dbg(&drvdata->client->dev, "%s - %s: pll = %d\n", __func__, clk_hw_get_name(&drvdata->clkout[num].hw), -- GitLab From 6f37689cf6b38fff96de52e7f0d3e78f22803ba0 Mon Sep 17 00:00:00 2001 From: Terry Zhou Date: Fri, 6 Nov 2020 11:00:39 +0100 Subject: [PATCH 0918/1894] clk: mvebu: a3700: fix the XTAL MODE pin to MPP1_9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is an error in the current code that the XTAL MODE pin was set to NB MPP1_31 which should be NB MPP1_9. The latch register of NB MPP1_9 has different offset of 0x8. Signed-off-by: Terry Zhou [pali: Fix pin name in commit message] Signed-off-by: Pali Rohár Fixes: 7ea8250406a6 ("clk: mvebu: Add the xtal clock for Armada 3700 SoC") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201106100039.11385-1-pali@kernel.org Reviewed-by: Marek Behún Signed-off-by: Stephen Boyd --- drivers/clk/mvebu/armada-37xx-xtal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/mvebu/armada-37xx-xtal.c b/drivers/clk/mvebu/armada-37xx-xtal.c index e9e306d4e9af9..41271351cf1f4 100644 --- a/drivers/clk/mvebu/armada-37xx-xtal.c +++ b/drivers/clk/mvebu/armada-37xx-xtal.c @@ -13,8 +13,8 @@ #include #include -#define NB_GPIO1_LATCH 0xC -#define XTAL_MODE BIT(31) +#define NB_GPIO1_LATCH 0x8 +#define XTAL_MODE BIT(9) static int armada_3700_xtal_clock_probe(struct platform_device *pdev) { -- GitLab From d2d94fc567624f96187e8b52083795620f93e69f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 12 Dec 2020 13:28:18 +0100 Subject: [PATCH 0919/1894] clk: s2mps11: Fix a resource leak in error handling paths in the probe function Some resource should be released in the error handling path of the probe function, as already done in the remove function. The remove function was fixed in commit bf416bd45738 ("clk: s2mps11: Add missing of_node_put and of_clk_del_provider") Fixes: 7cc560dea415 ("clk: s2mps11: Add support for s2mps11") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201212122818.86195-1-christophe.jaillet@wanadoo.fr Reviewed-by: Krzysztof Kozlowski Signed-off-by: Stephen Boyd --- drivers/clk/clk-s2mps11.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index aa21371f9104c..a3e883a9f4067 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -195,6 +195,7 @@ static int s2mps11_clk_probe(struct platform_device *pdev) return ret; err_reg: + of_node_put(s2mps11_clks[0].clk_np); while (--i >= 0) clkdev_drop(s2mps11_clks[i].lookup); -- GitLab From 48f68de00c1405351fa0e7bc44bca067c49cd0a3 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 2 Dec 2020 21:38:17 +0100 Subject: [PATCH 0920/1894] clk: sunxi-ng: Make sure divider tables have sentinel Two clock divider tables are missing sentinel at the end. Effect of that is that clock framework reads past the last entry. Fix that with adding sentinel at the end. Issue was discovered with KASan. Fixes: 0577e4853bfb ("clk: sunxi-ng: Add H3 clocks") Fixes: c6a0637460c2 ("clk: sunxi-ng: Add A64 clocks") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201202203817.438713-1-jernej.skrabec@siol.net Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd --- drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 1 + drivers/clk/sunxi-ng/ccu-sun8i-h3.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index 5f66bf8797723..149cfde817cba 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -389,6 +389,7 @@ static struct clk_div_table ths_div_table[] = { { .val = 1, .div = 2 }, { .val = 2, .div = 4 }, { .val = 3, .div = 6 }, + { /* Sentinel */ }, }; static const char * const ths_parents[] = { "osc24M" }; static struct ccu_div ths_clk = { diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 6b636362379ee..7e629a4493afd 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -322,6 +322,7 @@ static struct clk_div_table ths_div_table[] = { { .val = 1, .div = 2 }, { .val = 2, .div = 4 }, { .val = 3, .div = 6 }, + { /* Sentinel */ }, }; static SUNXI_CCU_DIV_TABLE_WITH_GATE(ths_clk, "ths", "osc24M", 0x074, 0, 2, ths_div_table, BIT(31), 0); -- GitLab From 11a163f2c7d6a9f27ce144cd7e367a81c851621a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 12 Dec 2020 13:57:33 +0000 Subject: [PATCH 0921/1894] clk: ingenic: Fix divider calculation with div tables The previous code assumed that a higher hardware value always resulted in a bigger divider, which is correct for the regular clocks, but is an invalid assumption when a divider table is provided for the clock. Perfect example of this is the PLL0_HALF clock, which applies a /2 divider with the hardware value 0, and a /1 divider otherwise. Fixes: a9fa2893fcc6 ("clk: ingenic: Add support for divider tables") Cc: # 5.2 Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20201212135733.38050-1-paul@crapouillou.net Signed-off-by: Stephen Boyd --- drivers/clk/ingenic/cgu.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c index dac6edc670cce..c8e9cb6c8e39c 100644 --- a/drivers/clk/ingenic/cgu.c +++ b/drivers/clk/ingenic/cgu.c @@ -392,15 +392,21 @@ static unsigned int ingenic_clk_calc_hw_div(const struct ingenic_cgu_clk_info *clk_info, unsigned int div) { - unsigned int i; + unsigned int i, best_i = 0, best = (unsigned int)-1; for (i = 0; i < (1 << clk_info->div.bits) && clk_info->div.div_table[i]; i++) { - if (clk_info->div.div_table[i] >= div) - return i; + if (clk_info->div.div_table[i] >= div && + clk_info->div.div_table[i] < best) { + best = clk_info->div.div_table[i]; + best_i = i; + + if (div == best) + break; + } } - return i - 1; + return best_i; } static unsigned -- GitLab From 4b003f5fcadfa2d0e087e907b0c65d023f6e29fb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 18 Dec 2020 13:52:53 +0100 Subject: [PATCH 0922/1894] clk: vc5: Use "idt,voltage-microvolt" instead of "idt,voltage-microvolts" Commit 45c940184b501fc6 ("dt-bindings: clk: versaclock5: convert to yaml") accidentally changed "idt,voltage-microvolts" to "idt,voltage-microvolt" in the DT bindings, while the driver still used the former. Update the driver to match the bindings, as Documentation/devicetree/bindings/property-units.txt actually recommends using "microvolt". Fixes: 260249f929e81d3d ("clk: vc5: Enable addition output configurations of the Versaclock") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201218125253.3815567-1-geert+renesas@glider.be Reviewed-by: Luca Ceresoli Signed-off-by: Stephen Boyd --- drivers/clk/clk-versaclock5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index c90460e7ef215..43db67337bc06 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -739,8 +739,8 @@ static int vc5_update_power(struct device_node *np_output, { u32 value; - if (!of_property_read_u32(np_output, - "idt,voltage-microvolts", &value)) { + if (!of_property_read_u32(np_output, "idt,voltage-microvolt", + &value)) { clk_out->clk_output_cfg0_mask |= VC5_CLK_OUTPUT_CFG0_PWR_MASK; switch (value) { case 1800000: -- GitLab From 09926202e939fd699650ac0fc0baa5757e069390 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 20 Dec 2020 09:09:43 +0100 Subject: [PATCH 0923/1894] ALSA: hda/realtek: Add quirk for MSI-GP73 MSI-GP73 (with SSID 1462:1229) requires yet again ALC1220_FIXUP_CLEVO_P950 quirk like other MSI models. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=210793 Cc: Link: https://lore.kernel.org/r/20201220080943.24839-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c203eba8d9ff2..5478a89d94e30 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2516,6 +2516,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1275, "MSI-GL63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), -- GitLab From e16ab3db87b3d5d4118dfb68e955f62c4e09573a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 5 Dec 2020 11:35:25 +0100 Subject: [PATCH 0924/1894] mt76: usb: remove wake logic in mt76u_status_worker Similar to mmio code path, remove wake logic in mt76u_status_worker handler. Starting from commit 90d494c99a99 ("mt76: improve tx queue stop/wake")', the wake queue logic on the usb status path is no longer necessary since the hw queues are no longer stopped on the mt76 tx path. Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/00009bf0cfdc9565e4432cad3ed51888c667c25d.1607164041.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/usb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 8b08cad131c84..b95d093728b9b 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -811,7 +811,6 @@ static void mt76u_status_worker(struct mt76_worker *w) struct mt76_dev *dev = container_of(usb, struct mt76_dev, usb); struct mt76_queue_entry entry; struct mt76_queue *q; - bool wake; int i; for (i = 0; i < IEEE80211_NUM_ACS; i++) { @@ -829,10 +828,6 @@ static void mt76u_status_worker(struct mt76_worker *w) mt76_queue_tx_complete(dev, q, &entry); } - wake = q->stopped && q->queued < q->ndesc - 8; - if (wake) - q->stopped = false; - if (!q->queued) wake_up(&dev->tx_wait); @@ -841,8 +836,6 @@ static void mt76u_status_worker(struct mt76_worker *w) if (dev->drv->tx_status_data && !test_and_set_bit(MT76_READING_STATS, &dev->phy.state)) queue_work(dev->wq, &dev->usb.stat_work); - if (wake) - ieee80211_wake_queue(dev->hw, i); } } -- GitLab From 123bb2b737881127b450e8b3b1bae69a8949498e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 5 Dec 2020 11:35:26 +0100 Subject: [PATCH 0925/1894] mt76: sdio: remove wake logic in mt76s_process_tx_queue Similar to mmio/usb code path, remove wake logic in mt76s_process_tx_queue routine. Starting from commit 90d494c99a99 ("mt76: improve tx queue stop/wake"), the wake queue logic on the sdio status path is no longer necessary since the hw queues are no longer stopped on the mt76 tx path. Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/d2d7d9d437f4dec2ef1df0ed070b9cf299f021ad.1607164041.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/sdio.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c index 62b5b912818fa..7cd995118257d 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/sdio.c @@ -157,7 +157,7 @@ static void mt76s_net_worker(struct mt76_worker *w) static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q) { - bool wake, mcu = q == dev->q_mcu[MT_MCUQ_WM]; + bool mcu = q == dev->q_mcu[MT_MCUQ_WM]; struct mt76_queue_entry entry; int nframes = 0; @@ -177,21 +177,12 @@ static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q) nframes++; } - wake = q->stopped && q->queued < q->ndesc - 8; - if (wake) - q->stopped = false; - if (!q->queued) wake_up(&dev->tx_wait); - if (mcu) - goto out; - - mt76_txq_schedule(&dev->phy, q->qid); + if (!mcu) + mt76_txq_schedule(&dev->phy, q->qid); - if (wake) - ieee80211_wake_queue(dev->hw, q->qid); -out: return nframes; } -- GitLab From f7217f718747641fc80cd062f183107439f2a066 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 8 Dec 2020 10:18:11 +0100 Subject: [PATCH 0926/1894] mt76: mt76s: fix NULL pointer dereference in mt76s_process_tx_queue Fix a possible NULL pointer dereference in mt76s_process_tx_queue that can occur if status thread runs before allocating tx queues Fixes: 6a618acb7e62 ("mt76: sdio: convert {status/net}_work to mt76_worker") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/b49c1b4edacd87b2241a9fd0431dd4864c8963f6.1607418933.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/sdio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c index 7cd995118257d..0b6facb17ff72 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/sdio.c @@ -157,10 +157,14 @@ static void mt76s_net_worker(struct mt76_worker *w) static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q) { - bool mcu = q == dev->q_mcu[MT_MCUQ_WM]; struct mt76_queue_entry entry; int nframes = 0; + bool mcu; + if (!q) + return 0; + + mcu = q == dev->q_mcu[MT_MCUQ_WM]; while (q->queued > 0) { if (!q->entry[q->tail].done) break; -- GitLab From 0bd157fa2aaa2c77d6254321d7751aa9eec68c7b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 18 Dec 2020 09:32:02 -0800 Subject: [PATCH 0927/1894] mt76: mt7915: fix MESH ifdef block Fix a build error when CONFIG_MAC80211_MESH is not enabled: ../drivers/net/wireless/mediatek/mt76/mt7915/init.c:47:2: error: expected expression before '}' token }, { ^ Fixes: af901eb4ab80 ("mt76: mt7915: get rid of dbdc debugfs knob") Signed-off-by: Randy Dunlap Cc: Shayne Chen Cc: Ryder Lee Cc: Lorenzo Bianconi Cc: Felix Fietkau Cc: linux-wireless@vger.kernel.org Cc: Kalle Valo Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201218173202.23159-1-rdunlap@infradead.org --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index ed4635bd151a2..102a8f14c22d4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -40,9 +40,9 @@ static const struct ieee80211_iface_limit if_limits[] = { .types = BIT(NL80211_IFTYPE_ADHOC) }, { .max = 16, - .types = BIT(NL80211_IFTYPE_AP) | + .types = BIT(NL80211_IFTYPE_AP) #ifdef CONFIG_MAC80211_MESH - BIT(NL80211_IFTYPE_MESH_POINT) + | BIT(NL80211_IFTYPE_MESH_POINT) #endif }, { .max = MT7915_MAX_INTERFACES, -- GitLab From 00c18640c2430c4bafaaeede1f9dd6f7ec0e4b25 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 20 Dec 2020 10:45:02 -0700 Subject: [PATCH 0928/1894] io_uring: make ctx cancel on exit targeted to actual ctx Before IORING_SETUP_ATTACH_WQ, we could just cancel everything on the io-wq when exiting. But that's not the case if they are shared, so cancel for the specific ctx instead. Cc: stable@vger.kernel.org Fixes: 24369c2e3bb0 ("io_uring: add io-wq workqueue sharing") Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f3690dfdd564f..48b9332c23540 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8637,6 +8637,13 @@ static void io_ring_exit_work(struct work_struct *work) io_ring_ctx_free(ctx); } +static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + return req->ctx == data; +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); @@ -8651,7 +8658,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_poll_remove_all(ctx, NULL, NULL); if (ctx->io_wq) - io_wq_cancel_all(ctx->io_wq); + io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true); /* if we failed setting up the ctx, we might not have any rings */ io_iopoll_try_reap_events(ctx); -- GitLab From 446bc1c207331080d8c711a4456799b7d0b9df26 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 20 Dec 2020 10:47:42 -0700 Subject: [PATCH 0929/1894] io-wq: kill now unused io_wq_cancel_all() io_uring no longer issues full cancelations on the io-wq, so remove any remnants of this code and the IO_WQ_BIT_CANCEL flag. Signed-off-by: Jens Axboe --- fs/io-wq.c | 30 +----------------------------- fs/io-wq.h | 2 -- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index f72d53848dcbc..a564f36e260c1 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -36,8 +36,7 @@ enum { enum { IO_WQ_BIT_EXIT = 0, /* wq exiting */ - IO_WQ_BIT_CANCEL = 1, /* cancel work on list */ - IO_WQ_BIT_ERROR = 2, /* error on setup */ + IO_WQ_BIT_ERROR = 1, /* error on setup */ }; enum { @@ -561,12 +560,6 @@ get_next: next_hashed = wq_next_work(work); io_impersonate_work(worker, work); - /* - * OK to set IO_WQ_WORK_CANCEL even for uncancellable - * work, the worker function will do the right thing. - */ - if (test_bit(IO_WQ_BIT_CANCEL, &wq->state)) - work->flags |= IO_WQ_WORK_CANCEL; old_work = work; linked = wq->do_work(work); @@ -732,12 +725,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) return acct->nr_workers < acct->max_workers; } -static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data) -{ - send_sig(SIGINT, worker->task, 1); - return false; -} - /* * Iterate the passed in list and call the specific function for each * worker that isn't exiting @@ -938,21 +925,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val) work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); } -void io_wq_cancel_all(struct io_wq *wq) -{ - int node; - - set_bit(IO_WQ_BIT_CANCEL, &wq->state); - - rcu_read_lock(); - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL); - } - rcu_read_unlock(); -} - struct io_cb_cancel_data { work_cancel_fn *fn; void *data; diff --git a/fs/io-wq.h b/fs/io-wq.h index 75113bcd5889f..b158f8addcf3e 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -129,8 +129,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work) return work->flags & IO_WQ_WORK_HASHED; } -void io_wq_cancel_all(struct io_wq *wq); - typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, -- GitLab From 55583d72e2303638d30dd4a7aabef59ffa0a017a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 20 Dec 2020 13:21:43 +0000 Subject: [PATCH 0930/1894] io_uring: always progress task_work on task cancel Might happen that __io_uring_cancel_task_requests() cancels nothing but there are task_works pending. We need to always run them. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 48b9332c23540..d0ab6b503a9f5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8801,9 +8801,9 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, ret |= io_poll_remove_all(ctx, task, NULL); ret |= io_kill_timeouts(ctx, task, NULL); + ret |= io_run_task_work(); if (!ret) break; - io_run_task_work(); cond_resched(); } } -- GitLab From f57555eda979ca085d2524db81e14b8a6089e15e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 20 Dec 2020 13:21:44 +0000 Subject: [PATCH 0931/1894] io_uring: end waiting before task cancel attempts Get rid of TASK_UNINTERRUPTIBLE and waiting with finish_wait before going for next iteration in __io_uring_task_cancel(), because __io_uring_files_cancel() doesn't expect that sheduling is disallowed. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d0ab6b503a9f5..fbf747803dbc1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8989,9 +8989,9 @@ void __io_uring_task_cancel(void) if (inflight != tctx_inflight(tctx)) continue; schedule(); + finish_wait(&tctx->wait, &wait); } while (1); - finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); } -- GitLab From 6c5c16007a11676eb8d4aeeb090a940b20976747 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 20 Dec 2020 17:57:24 +0900 Subject: [PATCH 0932/1894] dt-bindings: Add Canaan vendor prefix Update Documentation/devicetree/bindings/vendor-prefixes.yaml to include "canaan" as a vendor prefix for "Canaan Inc.". Canaan is the vendor of the Kendryte K210 RISC-V SoC. Signed-off-by: Damien Le Moal Acked-by: Rob Herring Link: https://lore.kernel.org/r/20201220085725.19545-2-damien.lemoal@wdc.com Signed-off-by: Stephen Boyd --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 2735be1a84709..a07231aaf3e7c 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -179,6 +179,8 @@ patternProperties: description: CALAO Systems SAS "^calxeda,.*": description: Calxeda + "^canaan,.*": + description: Canaan, Inc. "^caninos,.*": description: Caninos Loucos Program "^capella,.*": -- GitLab From 0c797d2c7e82bfec69e8fceb0d03b1e016eed03b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 20 Dec 2020 17:57:25 +0900 Subject: [PATCH 0933/1894] dt-binding: clock: Document canaan,k210-clk bindings Document the device tree bindings of the Canaan Kendryte K210 SoC clock driver in Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml. The header file include/dt-bindings/clock/k210-clk.h is modified to include the complete list of IDs for all clocks of the SoC. Signed-off-by: Damien Le Moal Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201220085725.19545-3-damien.lemoal@wdc.com Signed-off-by: Stephen Boyd --- .../bindings/clock/canaan,k210-clk.yaml | 54 ++++++++++++++++++ include/dt-bindings/clock/k210-clk.h | 56 +++++++++++++++---- 2 files changed, 99 insertions(+), 11 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml diff --git a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml new file mode 100644 index 0000000000000..565ca468cb448 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/canaan,k210-clk.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Canaan Kendryte K210 Clock Device Tree Bindings + +maintainers: + - Damien Le Moal + +description: | + Canaan Kendryte K210 SoC clocks driver bindings. The clock + controller node must be defined as a child node of the K210 + system controller node. + + See also: + - dt-bindings/clock/k210-clk.h + +properties: + compatible: + const: canaan,k210-clk + + clocks: + description: + Phandle of the SoC 26MHz fixed-rate oscillator clock. + + '#clock-cells': + const: 1 + +required: + - compatible + - '#clock-cells' + - clocks + +additionalProperties: false + +examples: + - | + #include + clocks { + in0: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <26000000>; + }; + }; + + /* ... */ + sysclk: clock-controller { + #clock-cells = <1>; + compatible = "canaan,k210-clk"; + clocks = <&in0>; + }; diff --git a/include/dt-bindings/clock/k210-clk.h b/include/dt-bindings/clock/k210-clk.h index 5a2fd64d1a494..a48176ad3c23a 100644 --- a/include/dt-bindings/clock/k210-clk.h +++ b/include/dt-bindings/clock/k210-clk.h @@ -3,18 +3,52 @@ * Copyright (C) 2019-20 Sean Anderson * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ -#ifndef K210_CLK_H -#define K210_CLK_H +#ifndef CLOCK_K210_CLK_H +#define CLOCK_K210_CLK_H /* - * Arbitrary identifiers for clocks. - * The structure is: in0 -> pll0 -> aclk -> cpu - * - * Since we use the hardware defaults for now, set all these to the same clock. + * Kendryte K210 SoC clock identifiers (arbitrary values). */ -#define K210_CLK_PLL0 0 -#define K210_CLK_PLL1 0 -#define K210_CLK_ACLK 0 -#define K210_CLK_CPU 0 +#define K210_CLK_ACLK 0 +#define K210_CLK_CPU 0 +#define K210_CLK_SRAM0 1 +#define K210_CLK_SRAM1 2 +#define K210_CLK_AI 3 +#define K210_CLK_DMA 4 +#define K210_CLK_FFT 5 +#define K210_CLK_ROM 6 +#define K210_CLK_DVP 7 +#define K210_CLK_APB0 8 +#define K210_CLK_APB1 9 +#define K210_CLK_APB2 10 +#define K210_CLK_I2S0 11 +#define K210_CLK_I2S1 12 +#define K210_CLK_I2S2 13 +#define K210_CLK_I2S0_M 14 +#define K210_CLK_I2S1_M 15 +#define K210_CLK_I2S2_M 16 +#define K210_CLK_WDT0 17 +#define K210_CLK_WDT1 18 +#define K210_CLK_SPI0 19 +#define K210_CLK_SPI1 20 +#define K210_CLK_SPI2 21 +#define K210_CLK_I2C0 22 +#define K210_CLK_I2C1 23 +#define K210_CLK_I2C2 24 +#define K210_CLK_SPI3 25 +#define K210_CLK_TIMER0 26 +#define K210_CLK_TIMER1 27 +#define K210_CLK_TIMER2 28 +#define K210_CLK_GPIO 29 +#define K210_CLK_UART1 30 +#define K210_CLK_UART2 31 +#define K210_CLK_UART3 32 +#define K210_CLK_FPIOA 33 +#define K210_CLK_SHA 34 +#define K210_CLK_AES 35 +#define K210_CLK_OTP 36 +#define K210_CLK_RTC 37 -#endif /* K210_CLK_H */ +#define K210_NUM_CLKS 38 + +#endif /* CLOCK_K210_CLK_H */ -- GitLab From b044a535d9a6873a21d622934228cfcc6ee4ea27 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 28 Nov 2020 20:51:02 +0900 Subject: [PATCH 0934/1894] kbuild: doc: update the description about kbuild Makefiles This line was written in 2003. Now we have much more Makefiles. The number of Makefiles is not important. The point is we have a Makefile in (almost) every directory. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 0d5dd5413af03..a7b874097a917 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -71,7 +71,7 @@ The Makefiles have five parts:: .config the kernel configuration file. arch/$(ARCH)/Makefile the arch Makefile. scripts/Makefile.* common rules etc. for all kbuild Makefiles. - kbuild Makefiles there are about 500 of these. + kbuild Makefiles exist in every subdirectory The top Makefile reads the .config file, which comes from the kernel configuration process. -- GitLab From 8c4d9b145ba39fa31fd225e9051dd562260b6460 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 28 Nov 2020 20:51:03 +0900 Subject: [PATCH 0935/1894] kbuild: doc: replace arch/$(ARCH)/ with arch/$(SRCARCH)/ Precisely speaking, the arch directory is specified by $(SRCARCH), not $(ARCH). In old days, $(ARCH) actually matched to the arch directory because 32-bit and 64-bit were supported as separate architectures. Most architectures (except arm/arm64) were unified like follows: arch/i386, arch/x86_64 -> arch/x86 arch/sh, arch/sh64 -> arch/sh arch/sparc, arch/sparc64 -> arch/sparc To not break the user interface, commit 6752ed90da03 ("Kbuild: allow arch/xxx to use a different source path") introduced SRCARCH to point to the arch directory, still allowing to pass in the former ARCH=i386 or ARCH=x86_64. Update the documents for preciseness, and add the explanation of SRCARCH. Signed-off-by: Masahiro Yamada Reviewed-by: Randy Dunlap --- Documentation/kbuild/makefiles.rst | 61 +++++++++++++++++------------- Documentation/kbuild/modules.rst | 2 +- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index a7b874097a917..896fe1612d555 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -67,11 +67,11 @@ This document describes the Linux kernel Makefiles. The Makefiles have five parts:: - Makefile the top Makefile. - .config the kernel configuration file. - arch/$(ARCH)/Makefile the arch Makefile. - scripts/Makefile.* common rules etc. for all kbuild Makefiles. - kbuild Makefiles exist in every subdirectory + Makefile the top Makefile. + .config the kernel configuration file. + arch/$(SRCARCH)/Makefile the arch Makefile. + scripts/Makefile.* common rules etc. for all kbuild Makefiles. + kbuild Makefiles exist in every subdirectory The top Makefile reads the .config file, which comes from the kernel configuration process. @@ -82,7 +82,7 @@ It builds these goals by recursively descending into the subdirectories of the kernel source tree. The list of subdirectories which are visited depends upon the kernel configuration. The top Makefile textually includes an arch Makefile -with the name arch/$(ARCH)/Makefile. The arch Makefile supplies +with the name arch/$(SRCARCH)/Makefile. The arch Makefile supplies architecture-specific information to the top Makefile. Each subdirectory has a kbuild Makefile which carries out the commands @@ -933,7 +933,7 @@ When "make clean" is executed, make will descend down in arch/x86/boot, and clean as usual. The Makefile located in arch/x86/boot/ may use the subdir- trick to descend further down. -Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is +Note 1: arch/$(SRCARCH)/Makefile cannot use "subdir-", because that file is included in the top level makefile, and the kbuild infrastructure is not operational at that point. @@ -946,9 +946,9 @@ be visited during "make clean". The top level Makefile sets up the environment and does the preparation, before starting to descend down in the individual directories. The top level makefile contains the generic part, whereas -arch/$(ARCH)/Makefile contains what is required to set up kbuild +arch/$(SRCARCH)/Makefile contains what is required to set up kbuild for said architecture. -To do so, arch/$(ARCH)/Makefile sets up a number of variables and defines +To do so, arch/$(SRCARCH)/Makefile sets up a number of variables and defines a few targets. When kbuild executes, the following steps are followed (roughly): @@ -956,14 +956,14 @@ When kbuild executes, the following steps are followed (roughly): 1) Configuration of the kernel => produce .config 2) Store kernel version in include/linux/version.h 3) Updating all other prerequisites to the target prepare: - - Additional prerequisites are specified in arch/$(ARCH)/Makefile + - Additional prerequisites are specified in arch/$(SRCARCH)/Makefile 4) Recursively descend down in all directories listed in init-* core* drivers-* net-* libs-* and build all targets. - - The values of the above variables are expanded in arch/$(ARCH)/Makefile. + - The values of the above variables are expanded in arch/$(SRCARCH)/Makefile. 5) All object files are then linked and the resulting file vmlinux is located at the root of the obj tree. The very first objects linked are listed in head-y, assigned by - arch/$(ARCH)/Makefile. + arch/$(SRCARCH)/Makefile. 6) Finally, the architecture-specific part does any required post processing and builds the final bootimage. - This includes building boot records @@ -1169,7 +1169,7 @@ When kbuild executes, the following steps are followed (roughly): $(core-y), $(libs-y), $(drivers-y) and $(net-y). The top level Makefile defines values for all generic directories, - and arch/$(ARCH)/Makefile only adds architecture-specific + and arch/$(SRCARCH)/Makefile only adds architecture-specific directories. Example:: @@ -1189,15 +1189,15 @@ When kbuild executes, the following steps are followed (roughly): The actual goals are not standardized across architectures. It is common to locate any additional processing in a boot/ - directory below arch/$(ARCH)/. + directory below arch/$(SRCARCH)/. Kbuild does not provide any smart way to support building a - target specified in boot/. Therefore arch/$(ARCH)/Makefile shall + target specified in boot/. Therefore arch/$(SRCARCH)/Makefile shall call make manually to build a target in boot/. The recommended approach is to include shortcuts in - arch/$(ARCH)/Makefile, and use the full path when calling down - into the arch/$(ARCH)/boot/Makefile. + arch/$(SRCARCH)/Makefile, and use the full path when calling down + into the arch/$(SRCARCH)/boot/Makefile. Example:: @@ -1217,7 +1217,7 @@ When kbuild executes, the following steps are followed (roughly): #arch/x86/Makefile define archhelp - echo '* bzImage - Image (arch/$(ARCH)/boot/bzImage)' + echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' endif When make is executed without arguments, the first goal encountered @@ -1332,7 +1332,7 @@ When kbuild executes, the following steps are followed (roughly): objcopy Copy binary. Uses OBJCOPYFLAGS usually specified in - arch/$(ARCH)/Makefile. + arch/$(SRCARCH)/Makefile. OBJCOPYFLAGS_$@ may be used to set additional options. gzip @@ -1395,7 +1395,7 @@ When kbuild executes, the following steps are followed (roughly): -------------------------------- When the vmlinux image is built, the linker script - arch/$(ARCH)/kernel/vmlinux.lds is used. + arch/$(SRCARCH)/kernel/vmlinux.lds is used. The script is a preprocessed variant of the file vmlinux.lds.S located in the same directory. kbuild knows .lds files and includes a rule `*lds.S` -> `*lds`. @@ -1405,9 +1405,6 @@ When kbuild executes, the following steps are followed (roughly): #arch/x86/kernel/Makefile extra-y := vmlinux.lds - #Makefile - export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) - The assignment to extra-y is used to tell kbuild to build the target vmlinux.lds. The assignment to $(CPPFLAGS_vmlinux.lds) tells kbuild to use the @@ -1481,7 +1478,7 @@ See subsequent chapter for the syntax of the Kbuild file. If an architecture uses a verbatim copy of a header from include/asm-generic then this is listed in the file - arch/$(ARCH)/include/asm/Kbuild like this: + arch/$(SRCARCH)/include/asm/Kbuild like this: Example:: @@ -1492,7 +1489,7 @@ See subsequent chapter for the syntax of the Kbuild file. During the prepare phase of the build a wrapper include file is generated in the directory:: - arch/$(ARCH)/include/generated/asm + arch/$(SRCARCH)/include/generated/asm When a header is exported where the architecture uses the generic header a similar wrapper is generated as part @@ -1527,8 +1524,8 @@ See subsequent chapter for the syntax of the Kbuild file. to define the minimum set of ASM headers that all architectures must have. This works like optional generic-y. If a mandatory header is missing - in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate - a wrapper of the asm-generic one. + in arch/$(SRCARCH)/include/(uapi/)/asm, Kbuild will automatically + generate a wrapper of the asm-generic one. 9 Kbuild Variables ================== @@ -1564,6 +1561,16 @@ The top Makefile exports the following variables: make ARCH=m68k ... + SRCARCH + This variable specifies the directory in arch/ to build. + + ARCH and SRCARCH may not necessarily match. A couple of arch + directories are biarch, that is, a single `arch/*/` directory supports + both 32-bit and 64-bit. + + For example, you can pass in ARCH=i386, ARCH=x86_64, or ARCH=x86. + For all of them, SRCARCH=x86 because arch/x86/ supports both i386 and + x86_64. INSTALL_PATH This variable defines a place for the arch Makefiles to install diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index 85ccc878895e7..a1f3eb7a43e23 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -332,7 +332,7 @@ according to the following rule: There are two notable exceptions to this rule: larger subsystems have their own directory under include/, such as include/scsi; and architecture specific headers are located - under arch/$(ARCH)/include/. + under arch/$(SRCARCH)/include/. 4.1 Kernel Includes ------------------- -- GitLab From 23b53061ad5dd435d4d35c842cd84047dbbe2919 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 28 Nov 2020 20:51:04 +0900 Subject: [PATCH 0936/1894] kbuild: doc: fix 'List directories to visit when descending' section Fix stale information: - Fix the section number in the reference from 6.4 to 7.4. - Remove init-y and net-y. They were removed by commit 23febe375d94 ("kbuild: merge init-y into core-y") and commit 95fb6317b3ab ("kbuild: merge net-y and virt-y into drivers-y"), respectively. - Update the example because arch/sparc64/Makefile does not exit. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 896fe1612d555..9636e81bed782 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -278,7 +278,7 @@ more details, with real examples. actually recognize that there is a lib.a being built, the directory shall be listed in libs-y. - See also "6.4 List directories to visit when descending". + See also "7.4 List directories to visit when descending". Use of lib-y is normally restricted to `lib/` and `arch/*/lib`. @@ -1154,7 +1154,7 @@ When kbuild executes, the following steps are followed (roughly): machinery is all architecture-independent. - head-y, init-y, core-y, libs-y, drivers-y, net-y + head-y, core-y, libs-y, drivers-y $(head-y) lists objects to be linked first in vmlinux. $(libs-y) lists directories where a lib.a archive can be located. @@ -1162,11 +1162,9 @@ When kbuild executes, the following steps are followed (roughly): The rest list directories where a built-in.a object file can be located. - $(init-y) objects will be located after $(head-y). - Then the rest follows in this order: - $(core-y), $(libs-y), $(drivers-y) and $(net-y). + $(core-y), $(libs-y), $(drivers-y) The top level Makefile defines values for all generic directories, and arch/$(SRCARCH)/Makefile only adds architecture-specific @@ -1174,11 +1172,14 @@ When kbuild executes, the following steps are followed (roughly): Example:: - #arch/sparc64/Makefile - core-y += arch/sparc64/kernel/ - libs-y += arch/sparc64/prom/ arch/sparc64/lib/ - drivers-$(CONFIG_OPROFILE) += arch/sparc64/oprofile/ + # arch/sparc/Makefile + core-y += arch/sparc/ + + libs-y += arch/sparc/prom/ + libs-y += arch/sparc/lib/ + drivers-$(CONFIG_PM) += arch/sparc/power/ + drivers-$(CONFIG_OPROFILE) += arch/sparc/oprofile/ 7.5 Architecture-specific boot images ------------------------------------- -- GitLab From 41cac0834f885fac9b655eaa9214526c0c1d9afe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 28 Nov 2020 20:51:05 +0900 Subject: [PATCH 0937/1894] kbuild: doc: merge 'Special Rules' and 'Custom kbuild commands' sections The two sections "3.10 Special Rules" and "7.8 Custom kbuild commands" are related because you must understand both of them when you write custom rules. Actually I do not understand the policy about what to go into "3 The kbuild files" and what into "7 Architecture Makefile". This commit reworks the custom rule explanation as follows: - Merged "7.8 Custom kbuild commands" into "3.10 Special Rules". - Reword "Special Rules" to "Custom Rules" for consistency. - Update the example for kecho because the blackfin Makefile does not exist any more. - Replace the example for cmd_ with a simpler one. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 88 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 9636e81bed782..87ff7c6777491 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -18,7 +18,7 @@ This document describes the Linux kernel Makefiles. --- 3.7 Compilation flags --- 3.8 --- 3.9 Dependency tracking - --- 3.10 Special Rules + --- 3.10 Custom Rules --- 3.11 $(CC) support functions --- 3.12 $(LD) support functions --- 3.13 Script Invocation @@ -46,7 +46,7 @@ This document describes the Linux kernel Makefiles. --- 7.5 Architecture-specific boot images --- 7.6 Building non-kbuild targets --- 7.7 Commands useful for building a boot image - --- 7.8 Custom kbuild commands + --- 7.8 --- 7.9 Preprocessing linker scripts --- 7.10 Generic header files --- 7.11 Post-link pass @@ -422,21 +422,21 @@ more details, with real examples. Thus, if you change an option to $(CC) all affected files will be re-compiled. -3.10 Special Rules +3.10 Custom Rules ------------------ - Special rules are used when the kbuild infrastructure does + Custom rules are used when the kbuild infrastructure does not provide the required support. A typical example is header files generated during the build process. Another example are the architecture-specific Makefiles which - need special rules to prepare boot images etc. + need custom rules to prepare boot images etc. - Special rules are written as normal Make rules. + Custom rules are written as normal Make rules. Kbuild is not executing in the directory where the Makefile is - located, so all special rules shall provide a relative + located, so all custom rules shall use a relative path to prerequisite files and target files. - Two variables are used when defining special rules: + Two variables are used when defining custom rules: $(src) $(src) is a relative path which points to the directory @@ -454,7 +454,7 @@ more details, with real examples. $(obj)/53c8xx_d.h: $(src)/53c7,8xx.scr $(src)/script_asm.pl $(CPP) -DCHIP=810 - < $< | ... $(src)/script_asm.pl - This is a special rule, following the normal syntax + This is a custom rule, following the normal syntax required by make. The target file depends on two prerequisite files. References @@ -471,11 +471,33 @@ more details, with real examples. Example:: - #arch/blackfin/boot/Makefile - $(obj)/vmImage: $(obj)/vmlinux.gz - $(call if_changed,uimage) - @$(kecho) 'Kernel: $@ is ready' + # arch/arm/Makefile + $(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ + @$(kecho) ' Kernel: $(boot)/$@ is ready' + When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand + of a command is normally displayed. + To enable this behaviour for custom commands kbuild requires + two variables to be set:: + + quiet_cmd_ - what shall be echoed + cmd_ - the command to execute + + Example:: + + # lib/Makefile + quiet_cmd_crc32 = GEN $@ + cmd_crc32 = $< > $@ + + $(obj)/crc32table.h: $(obj)/gen_crc32table + $(call cmd,crc32) + + When updating the $(obj)/crc32table.h target, the line: + + GEN lib/crc32table.h + + will be displayed with "make KBUILD_VERBOSE=0". 3.11 $(CC) support functions ---------------------------- @@ -744,7 +766,7 @@ Both possibilities are described in the following. as a prerequisite. This is possible in two ways: - (1) List the prerequisite explicitly in a special rule. + (1) List the prerequisite explicitly in a custom rule. Example:: @@ -755,11 +777,11 @@ Both possibilities are described in the following. The target $(obj)/devlist.h will not be built before $(obj)/gen-devlist is updated. Note that references to - the host programs in special rules must be prefixed with $(obj). + the host programs in custom rules must be prefixed with $(obj). (2) Use always-y - When there is no suitable special rule, and the host program + When there is no suitable custom rule, and the host program shall be built when a makefile is entered, the always-y variable shall be used. @@ -1281,8 +1303,8 @@ When kbuild executes, the following steps are followed (roughly): otherwise the command line check will fail, and the target will always be built. Assignments to $(targets) are without $(obj)/ prefix. - if_changed may be used in conjunction with custom commands as - defined in 7.8 "Custom kbuild commands". + if_changed may be used in conjunction with custom rules as + defined in "3.10 Custom Rules". Note: It is a typical mistake to forget the FORCE prerequisite. Another common pitfall is that whitespace is sometimes @@ -1362,36 +1384,6 @@ When kbuild executes, the following steps are followed (roughly): targets += $(dtb-y) DTC_FLAGS ?= -p 1024 -7.8 Custom kbuild commands --------------------------- - - When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand - of a command is normally displayed. - To enable this behaviour for custom commands kbuild requires - two variables to be set:: - - quiet_cmd_ - what shall be echoed - cmd_ - the command to execute - - Example:: - - # - quiet_cmd_image = BUILD $@ - cmd_image = $(obj)/tools/build $(BUILDFLAGS) \ - $(obj)/vmlinux.bin > $@ - - targets += bzImage - $(obj)/bzImage: $(obj)/vmlinux.bin $(obj)/tools/build FORCE - $(call if_changed,image) - @echo 'Kernel: $@ is ready' - - When updating the $(obj)/bzImage target, the line: - - BUILD arch/x86/boot/bzImage - - will be displayed with "make KBUILD_VERBOSE=0". - - 7.9 Preprocessing linker scripts -------------------------------- -- GitLab From 39bb232ae614a6c905f92a535b5b54c4289d1665 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 28 Nov 2020 20:51:06 +0900 Subject: [PATCH 0938/1894] kbuild: doc: split if_changed explanation to a separate section The if_changed macro is currently explained in the section "Commands useful for building a boot image", but the use of if_changed is not limited to the boot image. It is often used together with custom rules. Let's split it as a separate section, and insert it after the "Custom Rules" section. I slightly reworded the explanation, re-numbered to fill the section, and also fixed the broken indentation of the Note: part. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 94 +++++++++++++++++------------- 1 file changed, 52 insertions(+), 42 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 87ff7c6777491..a573ee998cf5e 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -16,9 +16,9 @@ This document describes the Linux kernel Makefiles. --- 3.5 Library file goals - lib-y --- 3.6 Descending down in directories --- 3.7 Compilation flags - --- 3.8 - --- 3.9 Dependency tracking - --- 3.10 Custom Rules + --- 3.8 Dependency tracking + --- 3.9 Custom Rules + --- 3.10 Command change detection --- 3.11 $(CC) support functions --- 3.12 $(LD) support functions --- 3.13 Script Invocation @@ -410,7 +410,7 @@ more details, with real examples. AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt -3.9 Dependency tracking +3.8 Dependency tracking ----------------------- Kbuild tracks dependencies on the following: @@ -422,8 +422,8 @@ more details, with real examples. Thus, if you change an option to $(CC) all affected files will be re-compiled. -3.10 Custom Rules ------------------- +3.9 Custom Rules +---------------- Custom rules are used when the kbuild infrastructure does not provide the required support. A typical example is @@ -499,6 +499,52 @@ more details, with real examples. will be displayed with "make KBUILD_VERBOSE=0". +3.10 Command change detection +----------------------------- + + When the rule is evaluated, timestamps are compared between the target + and its prerequisite files. GNU Make updates the target when any of the + prerequisites is newer than that. + + The target should be rebuilt also when the command line has changed + since the last invocation. This is not supported by Make itself, so + Kbuild achieves this by a kind of meta-programming. + + if_changed is the macro used for this purpose, in the following form:: + + quiet_cmd_ = ... + cmd_ = ... + + : FORCE + $(call if_changed,) + + Any target that utilizes if_changed must be listed in $(targets), + otherwise the command line check will fail, and the target will + always be built. + + If the target is already listed in the recognized syntax such as + obj-y/m, lib-y/m, extra-y/m, always-y/m, hostprogs, userprogs, Kbuild + automatically adds it to $(targets). Otherwise, the target must be + explicitly added to $(targets). + + Assignments to $(targets) are without $(obj)/ prefix. if_changed may be + used in conjunction with custom rules as defined in "3.9 Custom Rules". + + Note: It is a typical mistake to forget the FORCE prerequisite. + Another common pitfall is that whitespace is sometimes significant; for + instance, the below will fail (note the extra space after the comma):: + + target: source(s) FORCE + + **WRONG!** $(call if_changed, objcopy) + + Note: + if_changed should not be used more than once per target. + It stores the executed command in a corresponding .cmd + file and multiple calls would result in overwrites and + unwanted results when the target is up to date and only the + tests on changed commands trigger execution of commands. + 3.11 $(CC) support functions ---------------------------- @@ -1287,42 +1333,6 @@ When kbuild executes, the following steps are followed (roughly): Kbuild provides a few macros that are useful when building a boot image. - if_changed - if_changed is the infrastructure used for the following commands. - - Usage:: - - target: source(s) FORCE - $(call if_changed,ld/objcopy/gzip/...) - - When the rule is evaluated, it is checked to see if any files - need an update, or the command line has changed since the last - invocation. The latter will force a rebuild if any options - to the executable have changed. - Any target that utilises if_changed must be listed in $(targets), - otherwise the command line check will fail, and the target will - always be built. - Assignments to $(targets) are without $(obj)/ prefix. - if_changed may be used in conjunction with custom rules as - defined in "3.10 Custom Rules". - - Note: It is a typical mistake to forget the FORCE prerequisite. - Another common pitfall is that whitespace is sometimes - significant; for instance, the below will fail (note the extra space - after the comma):: - - target: source(s) FORCE - - **WRONG!** $(call if_changed, ld/objcopy/gzip/...) - - Note: - if_changed should not be used more than once per target. - It stores the executed command in a corresponding .cmd - - file and multiple calls would result in overwrites and - unwanted results when the target is up to date and only the - tests on changed commands trigger execution of commands. - ld Link target. Often, LDFLAGS_$@ is used to set specific options to ld. -- GitLab From d0e628cd817f3b67ad80cceaf527c7bb37c27b1c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 28 Nov 2020 20:51:07 +0900 Subject: [PATCH 0939/1894] kbuild: doc: clarify the difference between extra-y and always-y The difference between extra-y and always-y is obscure. Basically, Kbuild builds targets listed in extra-y and always-y in visited Makefiles without relying on any dependency. The difference is that extra-y is used to list the targets needed for vmlinux whereas always-y is used to list the targets that must be always built irrespective of final targets. Kbuild skips extra-y when it is building only modules (i.e. 'make modules'). This is the long-standing behavior since extra-y was introduced in 2003, and it is explained in that commit log [1]. For clarification, this is the extra-y vs always-y table: extra-y always-y 'make' y y 'make vmlinux' y y 'make modules' n y Kbuild skips extra-y also when building external modules since obviously it never builds vmlinux. Unfortunately, extra-y is wrongly used in many places of upstream code, and even in external modules. Using extra-y in external module Makefiles is wrong. What you should use is probably always-y or 'targets'. The current documentation for extra-y is misleading. I rewrote it, and moved it to the section 3.7. always-y is not documented anywhere. I added. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/?id=f94e5fd7e5d09a56a60670a9bb211a791654bba8 Signed-off-by: Masahiro Yamada Reviewed-by: Randy Dunlap --- Documentation/kbuild/makefiles.rst | 110 +++++++++++++++++++---------- 1 file changed, 71 insertions(+), 39 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index a573ee998cf5e..3f1697ce1cd46 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -15,13 +15,15 @@ This document describes the Linux kernel Makefiles. --- 3.4 Objects which export symbols --- 3.5 Library file goals - lib-y --- 3.6 Descending down in directories - --- 3.7 Compilation flags - --- 3.8 Dependency tracking - --- 3.9 Custom Rules - --- 3.10 Command change detection - --- 3.11 $(CC) support functions - --- 3.12 $(LD) support functions - --- 3.13 Script Invocation + --- 3.7 Non-builtin vmlinux targets - extra-y + --- 3.8 Always built goals - always-y + --- 3.9 Compilation flags + --- 3.10 Dependency tracking + --- 3.11 Custom Rules + --- 3.12 Command change detection + --- 3.13 $(CC) support functions + --- 3.14 $(LD) support functions + --- 3.15 Script Invocation === 4 Host Program support --- 4.1 Simple Host Program @@ -321,7 +323,60 @@ more details, with real examples. names. This allows kbuild to totally skip the directory if the corresponding `CONFIG_` option is neither 'y' nor 'm'. -3.7 Compilation flags +3.7 Non-builtin vmlinux targets - extra-y +----------------------------------------- + + extra-y specifies targets which are needed for building vmlinux, + but not combined into built-in.a. + + Examples are: + + 1) head objects + + Some objects must be placed at the head of vmlinux. They are + directly linked to vmlinux without going through built-in.a + A typical use-case is an object that contains the entry point. + + arch/$(SRCARCH)/Makefile should specify such objects as head-y. + + Discussion: + Given that we can control the section order in the linker script, + why do we need head-y? + + 2) vmlinux linker script + + The linker script for vmlinux is located at + arch/$(SRCARCH)/kernel/vmlinux.lds + + Example:: + + # arch/x86/kernel/Makefile + extra-y := head_$(BITS).o + extra-y += head$(BITS).o + extra-y += ebda.o + extra-y += platform-quirks.o + extra-y += vmlinux.lds + + $(extra-y) should only contain targets needed for vmlinux. + + Kbuild skips extra-y when vmlinux is apparently not a final goal. + (e.g. 'make modules', or building external modules) + + If you intend to build targets unconditionally, always-y (explained + in the next section) is the correct syntax to use. + +3.8 Always built goals - always-y +--------------------------------- + + always-y specifies targets which are literally always built when + Kbuild visits the Makefile. + + Example:: + # ./Kbuild + offsets-file := include/generated/asm-offsets.h + always-y += $(offsets-file) + +3.9 Compilation flags --------------------- ccflags-y, asflags-y and ldflags-y @@ -410,8 +465,8 @@ more details, with real examples. AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt -3.8 Dependency tracking ------------------------ +3.10 Dependency tracking +------------------------ Kbuild tracks dependencies on the following: @@ -422,8 +477,8 @@ more details, with real examples. Thus, if you change an option to $(CC) all affected files will be re-compiled. -3.9 Custom Rules ----------------- +3.11 Custom Rules +----------------- Custom rules are used when the kbuild infrastructure does not provide the required support. A typical example is @@ -499,7 +554,7 @@ more details, with real examples. will be displayed with "make KBUILD_VERBOSE=0". -3.10 Command change detection +3.12 Command change detection ----------------------------- When the rule is evaluated, timestamps are compared between the target @@ -545,7 +600,7 @@ more details, with real examples. unwanted results when the target is up to date and only the tests on changed commands trigger execution of commands. -3.11 $(CC) support functions +3.13 $(CC) support functions ---------------------------- The kernel may be built with several different versions of @@ -660,7 +715,7 @@ more details, with real examples. endif endif -3.12 $(LD) support functions +3.14 $(LD) support functions ---------------------------- ld-option @@ -674,7 +729,7 @@ more details, with real examples. #Makefile LDFLAGS_vmlinux += $(call ld-option, -X) -3.13 Script invocation +3.15 Script invocation ---------------------- Make rules may invoke scripts to build the kernel. The rules shall @@ -1304,29 +1359,6 @@ When kbuild executes, the following steps are followed (roughly): When "make" is executed without arguments, bzImage will be built. -7.6 Building non-kbuild targets -------------------------------- - - extra-y - extra-y specifies additional targets created in the current - directory, in addition to any targets specified by `obj-*`. - - Listing all targets in extra-y is required for two purposes: - - 1) Enable kbuild to check changes in command lines - - - When $(call if_changed,xxx) is used - - 2) kbuild knows what files to delete during "make clean" - - Example:: - - #arch/x86/kernel/Makefile - extra-y := head.o init_task.o - - In this example, extra-y is used to list object files that - shall be built, but shall not be linked as part of built-in.a. - 7.7 Commands useful for building a boot image --------------------------------------------- -- GitLab From c0ea806f874eb32894249b4a67c2f2452881b775 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 28 Nov 2020 20:51:08 +0900 Subject: [PATCH 0940/1894] kbuild: doc: document subdir-y syntax There is no explanation about subdir-y. Let's document it. Signed-off-by: Masahiro Yamada Reviewed-by: Randy Dunlap --- Documentation/kbuild/makefiles.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 3f1697ce1cd46..d36768cf12506 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -319,6 +319,21 @@ more details, with real examples. that directory specifies obj-y, those objects will be left orphan. It is very likely a bug of the Makefile or of dependencies in Kconfig. + Kbuild also supports dedicated syntax, subdir-y and subdir-m, for + descending into subdirectories. It is a good fit when you know they + do not contain kernel-space objects at all. A typical usage is to let + Kbuild descend into subdirectories to build tools. + + Examples:: + + # scripts/Makefile + subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins + subdir-$(CONFIG_MODVERSIONS) += genksyms + subdir-$(CONFIG_SECURITY_SELINUX) += selinux + + Unlike obj-y/m, subdir-y/m does not need the trailing slash since this + syntax is always used for directories. + It is good practice to use a `CONFIG_` variable when assigning directory names. This allows kbuild to totally skip the directory if the corresponding `CONFIG_` option is neither 'y' nor 'm'. -- GitLab From 436e980e2ed526832de822cbf13c317a458b78e1 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 1 Dec 2020 14:17:30 +0100 Subject: [PATCH 0941/1894] kbuild: don't hardcode depmod path depmod is not guaranteed to be in /sbin, just let make look for it in the path like all the other invoked programs Signed-off-by: Dominique Martinet Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9ec53d947628d..f01c247eba583 100644 --- a/Makefile +++ b/Makefile @@ -450,7 +450,7 @@ LEX = flex YACC = bison AWK = awk INSTALLKERNEL := installkernel -DEPMOD = /sbin/depmod +DEPMOD = depmod PERL = perl PYTHON = python PYTHON3 = python3 -- GitLab From bc72d723ec6b75c53e935e819682c3e67b83e9c1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 1 Dec 2020 19:34:14 +0900 Subject: [PATCH 0942/1894] modpost: rename merror() to error() The log function names, warn(), merror(), fatal() are inconsistent. Commit 2a11665945d5 ("kbuild: distinguish between errors and warnings in modpost") intentionally chose merror() to avoid the conflict with the library function error(). See man page of error(3). But, we are already causing the conflict with warn() because it is also a library function. See man page of warn(3). err() would be a problem for the same reason. The common technique to work around name conflicts is to use macros. For example: /* in a header */ #define error(fmt, ...) __error(fmt, ##__VA_ARGS__) #define warn(fmt, ...) __warn(fmt, ##__VA_ARGS__) /* function definition */ void __error(const char *fmt, ...) { } void __warn(const char *fmt, ...) { } In this way, we can implement our own warn() and error(), still we can include and with no problem. And, commit 93c95e526a4e ("modpost: rework and consolidate logging interface") already did that. Since the log functions are all macros, we can use error() without causing "conflicting types" errors. Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 10 +++++----- scripts/mod/modpost.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index f882ce0d9327f..337f6ca4bda39 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -403,8 +403,8 @@ static void sym_update_namespace(const char *symname, const char *namespace) * actually an assertion. */ if (!s) { - merror("Could not update namespace(%s) for symbol %s\n", - namespace, symname); + error("Could not update namespace(%s) for symbol %s\n", + namespace, symname); return; } @@ -2226,7 +2226,7 @@ static int check_modname_len(struct module *mod) else mod_name++; if (strlen(mod_name) >= MODULE_NAME_LEN) { - merror("module name is too long [%s.ko]\n", mod->name); + error("module name is too long [%s.ko]\n", mod->name); return 1; } @@ -2319,8 +2319,8 @@ static int add_versions(struct buffer *b, struct module *mod) continue; } if (strlen(s->name) >= MODULE_NAME_LEN) { - merror("too long symbol \"%s\" [%s.ko]\n", - s->name, mod->name); + error("too long symbol \"%s\" [%s.ko]\n", + s->name, mod->name); err = 1; break; } diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 3aa052722233b..f453504ad4df1 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -202,5 +202,5 @@ enum loglevel { void modpost_log(enum loglevel loglevel, const char *fmt, ...); #define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args) -#define merror(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) +#define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) #define fatal(fmt, args...) modpost_log(LOG_FATAL, fmt, ##args) -- GitLab From 0fd3fbadd9a85e391828f3ef63ef1e96e2d2d752 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 1 Dec 2020 19:34:15 +0900 Subject: [PATCH 0943/1894] modpost: refactor error handling and clarify error/fatal difference We have 3 log functions. fatal() is special because it lets modpost bail out immediately. The difference between warn() and error() is the only prefix parts ("WARNING:" vs "ERROR:"). In my understanding, the expected handling of error() is to propagate the return code of the function to the exit code of modpost, as check_exports() etc. already does. This is a good manner in general because we should display as many error messages as possible in a single run of modpost. What is annoying about fatal() is that it kills modpost at the first error. People would need to run Kbuild again and again until they fix all errors. But, unfortunately, people tend to do: "This case should not be allowed. Let's replace warn() with fatal()." One of the reasons is probably it is tedious to manually hoist the error code to the main() function. This commit refactors error() so any single call for it automatically makes modpost return the error code. I also added comments in modpost.h for warn(), error(), and fatal(). Please use fatal() only when you have a strong reason to do so. For example: - Memory shortage (i.e. malloc() etc. has failed) - The ELF file is broken, and there is no point to continue parsing - Something really odd has happened For general coding errors, please use error(). Signed-off-by: Masahiro Yamada Tested-by: Quentin Perret --- scripts/mod/modpost.c | 43 ++++++++++++++----------------------------- scripts/mod/modpost.h | 13 +++++++++++++ 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 337f6ca4bda39..43e00867623a5 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -40,6 +40,8 @@ static int ignore_missing_files; /* If set to 1, only warn (instead of error) about missing ns imports */ static int allow_missing_ns_imports; +static bool error_occurred; + enum export { export_plain, export_unused, export_gpl, export_unused_gpl, export_gpl_future, export_unknown @@ -78,6 +80,8 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...) if (loglevel == LOG_FATAL) exit(1); + if (loglevel == LOG_ERROR) + error_occurred = true; } static inline bool strends(const char *str, const char *postfix) @@ -2174,22 +2178,18 @@ static void check_for_unused(enum export exp, const char *m, const char *s) } } -static int check_exports(struct module *mod) +static void check_exports(struct module *mod) { struct symbol *s, *exp; - int err = 0; for (s = mod->unres; s; s = s->next) { const char *basename; exp = find_symbol(s->name); if (!exp || exp->module == mod) { - if (have_vmlinux && !s->weak) { + if (have_vmlinux && !s->weak) modpost_log(warn_unresolved ? LOG_WARN : LOG_ERROR, "\"%s\" [%s.ko] undefined!\n", s->name, mod->name); - if (!warn_unresolved) - err = 1; - } continue; } basename = strrchr(mod->name, '/'); @@ -2203,8 +2203,6 @@ static int check_exports(struct module *mod) modpost_log(allow_missing_ns_imports ? LOG_WARN : LOG_ERROR, "module %s uses symbol %s from namespace %s, but does not import it.\n", basename, exp->name, exp->namespace); - if (!allow_missing_ns_imports) - err = 1; add_namespace(&mod->missing_namespaces, exp->namespace); } @@ -2212,11 +2210,9 @@ static int check_exports(struct module *mod) check_for_gpl_usage(exp->export, basename, exp->name); check_for_unused(exp->export, basename, exp->name); } - - return err; } -static int check_modname_len(struct module *mod) +static void check_modname_len(struct module *mod) { const char *mod_name; @@ -2225,12 +2221,8 @@ static int check_modname_len(struct module *mod) mod_name = mod->name; else mod_name++; - if (strlen(mod_name) >= MODULE_NAME_LEN) { + if (strlen(mod_name) >= MODULE_NAME_LEN) error("module name is too long [%s.ko]\n", mod->name); - return 1; - } - - return 0; } /** @@ -2289,10 +2281,9 @@ static void add_staging_flag(struct buffer *b, const char *name) /** * Record CRCs for unresolved symbols **/ -static int add_versions(struct buffer *b, struct module *mod) +static void add_versions(struct buffer *b, struct module *mod) { struct symbol *s, *exp; - int err = 0; for (s = mod->unres; s; s = s->next) { exp = find_symbol(s->name); @@ -2304,7 +2295,7 @@ static int add_versions(struct buffer *b, struct module *mod) } if (!modversions) - return err; + return; buf_printf(b, "\n"); buf_printf(b, "static const struct modversion_info ____versions[]\n"); @@ -2321,7 +2312,6 @@ static int add_versions(struct buffer *b, struct module *mod) if (strlen(s->name) >= MODULE_NAME_LEN) { error("too long symbol \"%s\" [%s.ko]\n", s->name, mod->name); - err = 1; break; } buf_printf(b, "\t{ %#8x, \"%s\" },\n", @@ -2329,8 +2319,6 @@ static int add_versions(struct buffer *b, struct module *mod) } buf_printf(b, "};\n"); - - return err; } static void add_depends(struct buffer *b, struct module *mod) @@ -2554,7 +2542,6 @@ int main(int argc, char **argv) char *missing_namespace_deps = NULL; char *dump_write = NULL, *files_source = NULL; int opt; - int err; int n; struct dump_list *dump_read_start = NULL; struct dump_list **dump_read_iter = &dump_read_start; @@ -2624,8 +2611,6 @@ int main(int argc, char **argv) if (!have_vmlinux) warn("Symbol info of vmlinux is missing. Unresolved symbol check will be entirely skipped.\n"); - err = 0; - for (mod = modules; mod; mod = mod->next) { char fname[PATH_MAX]; @@ -2634,14 +2619,14 @@ int main(int argc, char **argv) buf.pos = 0; - err |= check_modname_len(mod); - err |= check_exports(mod); + check_modname_len(mod); + check_exports(mod); add_header(&buf, mod); add_intree_flag(&buf, !external_module); add_retpoline(&buf); add_staging_flag(&buf, mod->name); - err |= add_versions(&buf, mod); + add_versions(&buf, mod); add_depends(&buf, mod); add_moddevtable(&buf, mod); add_srcversion(&buf, mod); @@ -2671,5 +2656,5 @@ int main(int argc, char **argv) free(buf.p); - return err; + return error_occurred ? 1 : 0; } diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index f453504ad4df1..e6f46eee0af02 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -201,6 +201,19 @@ enum loglevel { void modpost_log(enum loglevel loglevel, const char *fmt, ...); +/* + * warn - show the given message, then let modpost continue running, still + * allowing modpost to exit successfully. This should be used when + * we still allow to generate vmlinux and modules. + * + * error - show the given message, then let modpost continue running, but fail + * in the end. This should be used when we should stop building vmlinux + * or modules, but we can continue running modpost to catch as many + * issues as possible. + * + * fatal - show the given message, and bail out immediately. This should be + * used when there is no point to continue running modpost. + */ #define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args) #define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) #define fatal(fmt, args...) modpost_log(LOG_FATAL, fmt, ##args) -- GitLab From 1d6cd39293602e990b016994e51956eded35da7c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 1 Dec 2020 19:34:16 +0900 Subject: [PATCH 0944/1894] modpost: turn missing MODULE_LICENSE() into error Do not create modules with no license tag. Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 43e00867623a5..d55d7e5ef1110 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2018,7 +2018,7 @@ static void read_symbols(const char *modname) if (!mod->is_vmlinux) { license = get_modinfo(&info, "license"); if (!license) - warn("missing MODULE_LICENSE() in %s\n", modname); + error("missing MODULE_LICENSE() in %s\n", modname); while (license) { if (license_is_gpl_compatible(license)) mod->gpl_compatible = 1; -- GitLab From d6d692fa21d3057edf457a764832077da8aa44d2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 1 Dec 2020 19:34:17 +0900 Subject: [PATCH 0945/1894] modpost: change license incompatibility to error() from fatal() Change fatal() to error() to continue running to report more possible issues. There is no difference in the fact that modpost will fail anyway. Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index d55d7e5ef1110..d907c63b948f3 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2145,11 +2145,11 @@ static void check_for_gpl_usage(enum export exp, const char *m, const char *s) { switch (exp) { case export_gpl: - fatal("GPL-incompatible module %s.ko uses GPL-only symbol '%s'\n", + error("GPL-incompatible module %s.ko uses GPL-only symbol '%s'\n", m, s); break; case export_unused_gpl: - fatal("GPL-incompatible module %s.ko uses GPL-only symbol marked UNUSED '%s'\n", + error("GPL-incompatible module %s.ko uses GPL-only symbol marked UNUSED '%s'\n", m, s); break; case export_gpl_future: -- GitLab From c7299d98c00afa81c65d9fa13a18ea923f3281ff Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 1 Dec 2020 19:34:18 +0900 Subject: [PATCH 0946/1894] modpost: turn section mismatches to error from fatal() There is code that reports static EXPORT_SYMBOL a few lines below. It is not a good idea to bail out here. I renamed sec_mismatch_fatal to sec_mismatch_warn_only (with logical inversion) to match to CONFIG_SECTION_MISMATCH_WARN_ONLY. Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index d907c63b948f3..a750596d5cc23 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -34,7 +34,7 @@ static int external_module = 0; static int warn_unresolved = 0; /* How a symbol is exported */ static int sec_mismatch_count = 0; -static int sec_mismatch_fatal = 0; +static int sec_mismatch_warn_only = true; /* ignore missing files */ static int ignore_missing_files; /* If set to 1, only warn (instead of error) about missing ns imports */ @@ -2576,7 +2576,7 @@ int main(int argc, char **argv) warn_unresolved = 1; break; case 'E': - sec_mismatch_fatal = 1; + sec_mismatch_warn_only = false; break; case 'N': allow_missing_ns_imports = 1; @@ -2640,8 +2640,8 @@ int main(int argc, char **argv) if (dump_write) write_dump(dump_write); - if (sec_mismatch_count && sec_mismatch_fatal) - fatal("Section mismatches detected.\n" + if (sec_mismatch_count && !sec_mismatch_warn_only) + error("Section mismatches detected.\n" "Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.\n"); for (n = 0; n < SYMBOL_HASH_SIZE; n++) { struct symbol *s; -- GitLab From b9ed847b5ae69e0f2e685f9d53e2dd94c0db751e Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 1 Dec 2020 16:52:22 +0000 Subject: [PATCH 0947/1894] modpost: turn static exports into error Using EXPORT_SYMBOL*() on static functions is fundamentally wrong. Modpost currently reports that as a warning, but clearly this is not a pattern we should allow, and all in-tree occurences should have been fixed by now. So, promote the warn() message to error() to make sure this never happens again. Acked-by: Greg Kroah-Hartman Reviewed-by: Matthias Maennich Signed-off-by: Quentin Perret Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index a750596d5cc23..d6c81657d6955 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2648,9 +2648,9 @@ int main(int argc, char **argv) for (s = symbolhash[n]; s; s = s->next) { if (s->is_static) - warn("\"%s\" [%s] is a static %s\n", - s->name, s->module->name, - export_str(s->export)); + error("\"%s\" [%s] is a static %s\n", + s->name, s->module->name, + export_str(s->export)); } } -- GitLab From 9ab55d7f240fb05f84ec3b5e37f0c3ab2ce69053 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 1 Dec 2020 16:20:18 +0100 Subject: [PATCH 0948/1894] genksyms: Ignore module scoped _Static_assert() The C11 _Static_assert() keyword may be used at module scope, and we need to teach genksyms about it to not abort with an error. We currently have a growing number of static_assert() (but also direct usage of _Static_assert()) users at module scope: git grep -E '^_Static_assert\(|^static_assert\(' | grep -v '^tools' | wc -l 135 More recently, when enabling CONFIG_MODVERSIONS with CONFIG_KCSAN, we observe a number of warnings: WARNING: modpost: EXPORT symbol "<..all kcsan symbols..>" [vmlinux] [...] When running a preprocessed source through 'genksyms -w' a number of syntax errors point at usage of static_assert()s. In the case of kernel/kcsan/encoding.h, new static_assert()s had been introduced which used expressions that appear to cause genksyms to not even be able to recover from the syntax error gracefully (as it appears was the case previously). Therefore, make genksyms ignore all _Static_assert() and the contained expression. With the fix, usage of _Static_assert() no longer cause "syntax error" all over the kernel, and the above modpost warnings for KCSAN are gone, too. Signed-off-by: Marco Elver Acked-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- scripts/genksyms/keywords.c | 3 +++ scripts/genksyms/lex.l | 27 ++++++++++++++++++++++++++- scripts/genksyms/parse.y | 7 +++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/scripts/genksyms/keywords.c b/scripts/genksyms/keywords.c index 057c6cabad1d9..b85e0979a00ce 100644 --- a/scripts/genksyms/keywords.c +++ b/scripts/genksyms/keywords.c @@ -32,6 +32,9 @@ static struct resword { { "restrict", RESTRICT_KEYW }, { "asm", ASM_KEYW }, + // c11 keywords that can be used at module scope + { "_Static_assert", STATIC_ASSERT_KEYW }, + // attribute commented out in modutils 2.4.2. People are using 'attribute' as a // field name which breaks the genksyms parser. It is not a gcc keyword anyway. // KAO. }, diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l index e265c5d968612..ae76472efc43a 100644 --- a/scripts/genksyms/lex.l +++ b/scripts/genksyms/lex.l @@ -118,7 +118,7 @@ yylex(void) { static enum { ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, - ST_BRACKET, ST_BRACE, ST_EXPRESSION, + ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, ST_TABLE_5, ST_TABLE_6 } lexstate = ST_NOTSTARTED; @@ -201,6 +201,11 @@ repeat: case EXPORT_SYMBOL_KEYW: goto fini; + + case STATIC_ASSERT_KEYW: + lexstate = ST_STATIC_ASSERT; + count = 0; + goto repeat; } } if (!suppress_type_lookup) @@ -401,6 +406,26 @@ repeat: } break; + case ST_STATIC_ASSERT: + APP; + switch (token) + { + case '(': + ++count; + goto repeat; + case ')': + if (--count == 0) + { + lexstate = ST_NORMAL; + token = STATIC_ASSERT_PHRASE; + break; + } + goto repeat; + default: + goto repeat; + } + break; + case ST_TABLE_1: goto repeat; diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index e22b42245bcc2..8e9b5e69e8f01 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -80,6 +80,7 @@ static void record_compound(struct string_list **keyw, %token SHORT_KEYW %token SIGNED_KEYW %token STATIC_KEYW +%token STATIC_ASSERT_KEYW %token STRUCT_KEYW %token TYPEDEF_KEYW %token UNION_KEYW @@ -97,6 +98,7 @@ static void record_compound(struct string_list **keyw, %token BRACE_PHRASE %token BRACKET_PHRASE %token EXPRESSION_PHRASE +%token STATIC_ASSERT_PHRASE %token CHAR %token DOTS @@ -130,6 +132,7 @@ declaration1: | function_definition | asm_definition | export_definition + | static_assert | error ';' { $$ = $2; } | error '}' { $$ = $2; } ; @@ -493,6 +496,10 @@ export_definition: { export_symbol((*$3)->string); $$ = $5; } ; +/* Ignore any module scoped _Static_assert(...) */ +static_assert: + STATIC_ASSERT_PHRASE ';' { $$ = $2; } + ; %% -- GitLab From c613583b6a87434123fc6714acbec6522859185e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 8 Dec 2020 16:28:56 +0100 Subject: [PATCH 0949/1894] Documentation/kbuild: Document COMPILE_TEST dependencies Document best practises for using COMPILE_TEST dependencies. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig-language.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst index 1cf1aebdd6cdf..1b4875f04e133 100644 --- a/Documentation/kbuild/kconfig-language.rst +++ b/Documentation/kbuild/kconfig-language.rst @@ -553,6 +553,17 @@ with "depends on m". E.g.:: limits FOO to module (=m) or disabled (=n). +Compile-testing +~~~~~~~~~~~~~~~ +If a config symbol has a dependency, but the code controlled by the config +symbol can still be compiled if the dependency is not met, it is encouraged to +increase build coverage by adding an "|| COMPILE_TEST" clause to the +dependency. This is especially useful for drivers for more exotic hardware, as +it allows continuous-integration systems to compile-test the code on a more +common system, and detect bugs that way. +Note that compile-tested code should avoid crashing when run on a system where +the dependency is not met. + Kconfig recursive dependency limitations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- GitLab From 18084e435ff6e47197542db6bab98bafaa4c7b68 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 8 Dec 2020 16:28:57 +0100 Subject: [PATCH 0950/1894] Documentation/kbuild: Document platform dependency practises Document best practises for using architecture and platform dependencies. Signed-off-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig-language.rst | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst index 1b4875f04e133..226ae072da7da 100644 --- a/Documentation/kbuild/kconfig-language.rst +++ b/Documentation/kbuild/kconfig-language.rst @@ -564,6 +564,30 @@ common system, and detect bugs that way. Note that compile-tested code should avoid crashing when run on a system where the dependency is not met. +Architecture and platform dependencies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Due to the presence of stubs, most drivers can now be compiled on most +architectures. However, this does not mean it makes sense to have all drivers +available everywhere, as the actual hardware may only exist on specific +architectures and platforms. This is especially true for on-SoC IP cores, +which may be limited to a specific vendor or SoC family. + +To prevent asking the user about drivers that cannot be used on the system(s) +the user is compiling a kernel for, and if it makes sense, config symbols +controlling the compilation of a driver should contain proper dependencies, +limiting the visibility of the symbol to (a superset of) the platform(s) the +driver can be used on. The dependency can be an architecture (e.g. ARM) or +platform (e.g. ARCH_OMAP4) dependency. This makes life simpler not only for +distro config owners, but also for every single developer or user who +configures a kernel. + +Such a dependency can be relaxed by combining it with the compile-testing rule +above, leading to: + + config FOO + bool "Support for foo hardware" + depends on ARCH_FOO_VENDOR || COMPILE_TEST + Kconfig recursive dependency limitations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- GitLab From 135b4957eac43af2aedf8e2a277b9540f33c2558 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 20 Dec 2020 03:18:42 +0900 Subject: [PATCH 0951/1894] kconfig: fix return value of do_error_if() $(error-if,...) is expanded to an empty string. Currently, it relies on eval_clause() returning xstrdup("") when all attempts for expansion fail, but the correct implementation is to make do_error_if() return xstrdup(""). Fixes: 1d6272e6fe43 ("kconfig: add 'info', 'warning-if', and 'error-if' built-in functions") Signed-off-by: Masahiro Yamada --- scripts/kconfig/preprocess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c index 0243086fb1685..0590f86df6e40 100644 --- a/scripts/kconfig/preprocess.c +++ b/scripts/kconfig/preprocess.c @@ -114,7 +114,7 @@ static char *do_error_if(int argc, char *argv[]) if (!strcmp(argv[0], "y")) pperror("%s", argv[1]); - return NULL; + return xstrdup(""); } static char *do_filename(int argc, char *argv[]) -- GitLab From 90d39628ac799e93c0f7a56763eed5029632f1ba Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 21 Dec 2020 16:01:13 +0900 Subject: [PATCH 0952/1894] kconfig: doc: fix $(fileno) to $(filename) This is a typo. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig-macro-language.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/kconfig-macro-language.rst b/Documentation/kbuild/kconfig-macro-language.rst index 8b413ef9603da..6163467f6ae4f 100644 --- a/Documentation/kbuild/kconfig-macro-language.rst +++ b/Documentation/kbuild/kconfig-macro-language.rst @@ -97,7 +97,7 @@ Like Make, Kconfig provides several built-in functions. Every function takes a particular number of arguments. In Make, every built-in function takes at least one argument. Kconfig allows -zero argument for built-in functions, such as $(fileno), $(lineno). You could +zero argument for built-in functions, such as $(filename), $(lineno). You could consider those as "built-in variable", but it is just a matter of how we call it after all. Let's say "built-in function" here to refer to natively supported functionality. -- GitLab From 8b7c764e0644455a5991abea126e7ca6e03ee723 Mon Sep 17 00:00:00 2001 From: YangHui Date: Mon, 21 Dec 2020 14:22:07 +0800 Subject: [PATCH 0953/1894] ALSA: core: Remove redundant comments Remove redundant comments Signed-off-by: YangHui Link: https://lore.kernel.org/r/1608531727-5433-1-git-send-email-yanghui.def@gmail.com Signed-off-by: Takashi Iwai --- sound/core/init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/core/init.c b/sound/core/init.c index 764dbe673d488..75aec71c48a86 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -149,8 +149,6 @@ static void release_card_device(struct device *dev) * @extra_size: allocate this extra size after the main soundcard structure * @card_ret: the pointer to store the created card instance * - * Creates and initializes a soundcard structure. - * * The function allocates snd_card instance via kzalloc with the given * space for the driver to use freely. The allocated struct is stored * in the given card_ret pointer. -- GitLab From 525d9c57d0eeeb660d9b25e5b2d1c95975e3ba95 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Dec 2020 09:01:59 +0100 Subject: [PATCH 0954/1894] ALSA: usb-audio: Add alias entry for ASUS PRIME TRX40 PRO-S ASUS PRIME TRX40 PRO-S mobo with 0b05:1918 needs the same quirk alias for another ASUS mobo (0b05:1917) for the proper mixer mapping, etc. Add the corresponding entry. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=210783 Cc: Link: https://lore.kernel.org/r/20201221080159.24468-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/card.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/card.c b/sound/usb/card.c index cb0b6582dfee9..d731ca62d5994 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -383,6 +383,9 @@ static const struct usb_audio_device_name usb_audio_names[] = { /* ASUS ROG Strix */ PROFILE_NAME(0x0b05, 0x1917, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), + /* ASUS PRIME TRX40 PRO-S */ + PROFILE_NAME(0x0b05, 0x1918, + "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* Dell WD15 Dock */ PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"), -- GitLab From e40ad84c26b4deeee46666492ec66b9a534b8e59 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 17 Dec 2020 20:17:49 +0100 Subject: [PATCH 0955/1894] cpufreq: intel_pstate: Use most recent guaranteed performance values When turbo has been disabled by the BIOS, but HWP_CAP.GUARANTEED is changed later, user space may want to take advantage of this increased guaranteed performance. HWP_CAP.GUARANTEED is not a static value. It can be adjusted by an out-of-band agent or during an Intel Speed Select performance level change. The HWP_CAP.MAX is still the maximum achievable performance with turbo disabled by the BIOS, so HWP_CAP.GUARANTEED can still change as long as it remains less than or equal to HWP_CAP.MAX. When HWP_CAP.GUARANTEED is changed, the sysfs base_frequency attribute shows the most recent guaranteed frequency value. This attribute can be used by user space software to update the scaling min/max limits of the CPU. Currently, the ->setpolicy() callback already uses the latest HWP_CAP values when setting HWP_REQ, but the ->verify() callback will restrict the user settings to the to old guaranteed performance value which prevents user space from making use of the extra CPU capacity theoretically available to it after increasing HWP_CAP.GUARANTEED. To address this, read HWP_CAP in intel_pstate_verify_cpu_policy() to obtain the maximum P-state that can be used and use that to confine the policy max limit instead of using the cached and possibly stale pstate.max_freq value for this purpose. For consistency, update intel_pstate_update_perf_limits() to use the maximum available P-state returned by intel_pstate_get_hwp_max() to compute the maximum frequency instead of using the return value of intel_pstate_get_max_freq() which, again, may be stale. This issue is a side-effect of fixing the scaling frequency limits in commit eacc9c5a927e ("cpufreq: intel_pstate: Fix intel_pstate_get_hwp_max() for turbo disabled") which corrected the setting of the reduced scaling frequency values, but caused stale HWP_CAP.GUARANTEED to be used in the case at hand. Fixes: eacc9c5a927e ("cpufreq: intel_pstate: Fix intel_pstate_get_hwp_max() for turbo disabled") Reported-by: Srinivas Pandruvada Tested-by: Srinivas Pandruvada Cc: 5.8+ # 5.8+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d5ec0c962ec52..6e23376548ced 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2207,9 +2207,9 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, unsigned int policy_min, unsigned int policy_max) { - int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; int max_state, turbo_max; + int max_freq; /* * HWP needs some special consideration, because on BDX the @@ -2223,6 +2223,7 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; turbo_max = cpu->pstate.turbo_pstate; } + max_freq = max_state * cpu->pstate.scaling; max_policy_perf = max_state * policy_max / max_freq; if (policy_max == policy_min) { @@ -2325,9 +2326,18 @@ static void intel_pstate_adjust_policy_max(struct cpudata *cpu, static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, struct cpufreq_policy_data *policy) { + int max_freq; + update_turbo_state(); - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - intel_pstate_get_max_freq(cpu)); + if (hwp_active) { + int max_state, turbo_max; + + intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + max_freq = max_state * cpu->pstate.scaling; + } else { + max_freq = intel_pstate_get_max_freq(cpu); + } + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, max_freq); intel_pstate_adjust_policy_max(cpu, policy); } -- GitLab From b36f835b636908e4122f2e17310b1dbc380a3b19 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 15 Dec 2020 14:29:06 +1100 Subject: [PATCH 0956/1894] powerpc/boot: Fix build of dts/fsl The lkp robot reported that some configs fail to build, for example mpc85xx_smp_defconfig, with: cc1: fatal error: opening output file arch/powerpc/boot/dts/fsl/.mpc8540ads.dtb.dts.tmp: No such file or directory This bisects to: cc8a51ca6f05 ("kbuild: always create directories of targets") Although that commit claims to be about in-tree builds, it somehow breaks out-of-tree builds. But presumably it's just exposing a latent bug in our Makefiles. We can fix it by adding to targets for dts/fsl in the same way that we do for dts. Fixes: cc8a51ca6f05 ("kbuild: always create directories of targets") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201215032906.473460-1-mpe@ellerman.id.au --- arch/powerpc/boot/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ec0b2186e41c1..2b8da923ceca2 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -373,6 +373,8 @@ initrd-y := $(filter-out $(image-y), $(initrd-y)) targets += $(image-y) $(initrd-y) targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \ $(patsubst $(x).%, dts/%.dtb, $(filter $(x).%, $(image-y)))) +targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \ + $(patsubst $(x).%, dts/fsl/%.dtb, $(filter $(x).%, $(image-y)))) $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz -- GitLab From 0faa22f09caadc11af2aa7570870ebd2ac5b8170 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 20 Dec 2020 18:18:26 +0000 Subject: [PATCH 0957/1894] powerpc/time: Force inlining of get_tb() Force inlining of get_tb() in order to avoid getting following function in vdso32, leading to suboptimal performance in clock_gettime() 00000688 <.get_tb>: 688: 7c 6d 42 a6 mftbu r3 68c: 7c 8c 42 a6 mftb r4 690: 7d 2d 42 a6 mftbu r9 694: 7c 03 48 40 cmplw r3,r9 698: 40 e2 ff f0 bne+ 688 <.get_tb> 69c: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/df05d53eed1210cf1aa76d1fb44aa0fab29c018e.1608488286.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/timebase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h index b558b07959ceb..881f655caa0a9 100644 --- a/arch/powerpc/include/asm/vdso/timebase.h +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -49,7 +49,7 @@ static inline unsigned long get_tbl(void) return mftb(); } -static inline u64 get_tb(void) +static __always_inline u64 get_tb(void) { unsigned int tbhi, tblo, tbhi2; -- GitLab From 9014eab6a38c60fd185bc92ed60f46cf99a462ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 21 Dec 2020 08:41:54 +0100 Subject: [PATCH 0958/1894] powerpc/smp: Add __init to init_big_cores() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this link warning: WARNING: modpost: vmlinux.o(.text.unlikely+0x2d98): Section mismatch in reference from the function init_big_cores.isra.0() to the function .init.text:init_thread_group_cache_map() The function init_big_cores.isra.0() references the function __init init_thread_group_cache_map(). This is often because init_big_cores.isra.0 lacks a __init annotation or the annotation of init_thread_group_cache_map is wrong. Fixes: 425752c63b6f ("powerpc: Detect the presence of big-cores via "ibm, thread-groups"") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201221074154.403779-1-clg@kaod.org --- arch/powerpc/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2b9b1bb4c5f28..9e2246e80efd6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -990,7 +990,7 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -static int init_big_cores(void) +static int __init init_big_cores(void) { int cpu; -- GitLab From 42ed6d56ade21f367f27aa5915cc397510cfdef5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 18 Dec 2020 22:16:17 +1100 Subject: [PATCH 0959/1894] powerpc/vdso: Block R_PPC_REL24 relocations Add R_PPC_REL24 relocations to the list of relocations we do NOT support in the VDSO. These are generated in some cases and we do not support relocating them at runtime, so if they appear then the VDSO will not work at runtime, therefore it's preferable to break the build if we see them. Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201218111619.1206391-1-mpe@ellerman.id.au --- arch/powerpc/kernel/vdso32/Makefile | 2 +- arch/powerpc/kernel/vdso64/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 59aa2944ecaee..6616f4e794d0e 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 include $(srctree)/lib/vdso/Makefile obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index d365810a689a0..bf363ff371521 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 include $(srctree)/lib/vdso/Makefile obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o -- GitLab From 107521e8039688f7a9548f17919dfde670b911c1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 18 Dec 2020 22:16:18 +1100 Subject: [PATCH 0960/1894] powerpc/vdso: Don't pass 64-bit ABI cflags to 32-bit VDSO When building the 32-bit VDSO, we are building 32-bit code as part of a 64-bit kernel build. That requires us to tweak the cflags to trick the compiler into building 32-bit code for us. The main way we do that is by passing -m32, but there are other options that affect code generation and ABI selection. In particular when building vgettimeofday.c, we end up passing -mcall-aixdesc because it's in KBUILD_CFLAGS, which causes the compiler to generate function descriptors, and dot symbols, eg: $ nm arch/powerpc/kernel/vdso32/vgettimeofday.o 000005d0 T .__c_kernel_clock_getres 00000024 D __c_kernel_clock_getres ... We get away with that at the moment because we also use the DOTSYM macro, and that is also incorrectly prepending a '.' in 32-bit VDSO code due to a separate bug. But we shouldn't be generating function descriptors for this file, there's no 32-bit ABI that includes function descriptors, so the resulting object file is some frankenstein and it's surprising that it even links. So filter out all the ABI-related options we add to CFLAGS for 64-bit builds, so that they're not used when building 32-bit code. With that we only see regular text symbols: $ nm arch/powerpc/kernel/vdso32/vgettimeofday.o michael@alpine1-p1 000005d0 T __c_kernel_clock_getres 00000000 T __c_kernel_clock_gettime 00000200 T __c_kernel_clock_gettime64 00000410 T __c_kernel_gettimeofday 00000650 T __c_kernel_time Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201218111619.1206391-2-mpe@ellerman.id.au --- arch/powerpc/kernel/vdso32/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 6616f4e794d0e..9cb6f524854b9 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -27,7 +27,7 @@ endif CC32FLAGS := ifdef CONFIG_PPC64 CC32FLAGS += -m32 -KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) endif targets := $(obj-vdso32) vdso32.so.dbg -- GitLab From 2eda7f11000646909a10298951c9defb2321b240 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 18 Dec 2020 22:16:19 +1100 Subject: [PATCH 0961/1894] powerpc/vdso: Fix DOTSYM for 32-bit LE VDSO Skirmisher reported on IRC that the 32-bit LE VDSO was hanging. This turned out to be due to a branch to self in eg. __kernel_gettimeofday. Looking at the disassembly with objdump -dR shows why: 00000528 <__kernel_gettimeofday>: 528: f0 ff 21 94 stwu r1,-16(r1) 52c: a6 02 08 7c mflr r0 530: f0 ff 21 94 stwu r1,-16(r1) 534: 14 00 01 90 stw r0,20(r1) 538: 05 00 9f 42 bcl 20,4*cr7+so,53c <__kernel_gettimeofday+0x14> 53c: a6 02 a8 7c mflr r5 540: ff ff a5 3c addis r5,r5,-1 544: c4 fa a5 38 addi r5,r5,-1340 548: f0 00 a5 38 addi r5,r5,240 54c: 01 00 00 48 bl 54c <__kernel_gettimeofday+0x24> 54c: R_PPC_REL24 .__c_kernel_gettimeofday Because we don't process relocations for the VDSO, this branch remains a branch from 0x54c to 0x54c. With the preceding patch to prohibit R_PPC_REL24 relocations, we instead get a build failure: 0000054c R_PPC_REL24 .__c_kernel_gettimeofday 00000598 R_PPC_REL24 .__c_kernel_clock_gettime 000005e4 R_PPC_REL24 .__c_kernel_clock_gettime64 00000630 R_PPC_REL24 .__c_kernel_clock_getres 0000067c R_PPC_REL24 .__c_kernel_time arch/powerpc/kernel/vdso32/vdso32.so.dbg: dynamic relocations are not supported The root cause is that we're branching to `.__c_kernel_gettimeofday`. But this is 32-bit LE code, which doesn't use function descriptors, so there are no dot symbols. The reason we're trying to branch to a dot symbol is because we're using the DOTSYM macro, but the ifdefs we use to define the DOTSYM macro do not currently work for 32-bit LE. So like previous commits we need to differentiate if the current compilation unit is 64-bit, rather than the kernel as a whole. ie. switch from CONFIG_PPC64 to __powerpc64__. With that fixed 32-bit LE code gets the empty version of DOTSYM, which just resolves to the original symbol name, leading to a direct branch and no relocations: 000003f8 <__kernel_gettimeofday>: 3f8: f0 ff 21 94 stwu r1,-16(r1) 3fc: a6 02 08 7c mflr r0 400: f0 ff 21 94 stwu r1,-16(r1) 404: 14 00 01 90 stw r0,20(r1) 408: 05 00 9f 42 bcl 20,4*cr7+so,40c <__kernel_gettimeofday+0x14> 40c: a6 02 a8 7c mflr r5 410: ff ff a5 3c addis r5,r5,-1 414: f4 fb a5 38 addi r5,r5,-1036 418: f0 00 a5 38 addi r5,r5,240 41c: 85 06 00 48 bl aa0 <__c_kernel_gettimeofday> Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Reported-by: "Will Springer " Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201218111619.1206391-3-mpe@ellerman.id.au --- arch/powerpc/include/asm/ppc_asm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cfa814824285f..cc1bca5713327 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -180,7 +180,12 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define VCPU_GPR(n) __VCPU_GPR(__REG_##n) #ifdef __KERNEL__ -#ifdef CONFIG_PPC64 + +/* + * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit + * version below in the else case of the ifdef. + */ +#ifdef __powerpc64__ #define STACKFRAMESIZE 256 #define __STK_REG(i) (112 + ((i)-14)*8) -- GitLab From 3cce9d44321e460e7c88cdec4e4537a6e9ad7c0d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 19 Dec 2020 16:29:58 +0100 Subject: [PATCH 0962/1894] ARM: 9044/1: vfp: use undef hook for VFP support detection Commit f77ac2e378be9dd6 ("ARM: 9030/1: entry: omit FP emulation for UND exceptions taken in kernel mode") failed to take into account that there is in fact a case where we relied on this code path: during boot, the VFP detection code issues a read of FPSID, which will trigger an undef exception on cores that lack VFP support. So let's reinstate this logic using an undef hook which is registered only for the duration of the initcall to vpf_init(), and which sets VFP_arch to a non-zero value - as before - if no VFP support is present. Fixes: f77ac2e378be9dd6 ("ARM: 9030/1: entry: omit FP emulation for UND ...") Reported-by: "kernelci.org bot" Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/vfp/entry.S | 17 ----------------- arch/arm/vfp/vfpmodule.c | 25 ++++++++++++++++++++----- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 0186cf9da890b..27b0a1f27fbdf 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -37,20 +37,3 @@ ENDPROC(vfp_null_entry) .align 2 .LCvfp: .word vfp_vector - -@ This code is called if the VFP does not exist. It needs to flag the -@ failure to the VFP initialisation code. - - __INIT -ENTRY(vfp_testing_entry) - dec_preempt_count_ti r10, r4 - ldr r0, VFP_arch_address - str r0, [r0] @ set to non-zero value - ret r9 @ we have handled the fault -ENDPROC(vfp_testing_entry) - - .align 2 -VFP_arch_address: - .word VFP_arch - - __FINIT diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index c3b6451c18bda..2cb355c1b5b71 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -32,7 +32,6 @@ /* * Our undef handlers (in entry.S) */ -asmlinkage void vfp_testing_entry(void); asmlinkage void vfp_support_entry(void); asmlinkage void vfp_null_entry(void); @@ -43,7 +42,7 @@ asmlinkage void (*vfp_vector)(void) = vfp_null_entry; * Used in startup: set to non-zero if VFP checks fail * After startup, holds VFP architecture */ -unsigned int VFP_arch; +static unsigned int __initdata VFP_arch; /* * The pointer to the vfpstate structure of the thread which currently @@ -437,7 +436,7 @@ static void vfp_enable(void *unused) * present on all CPUs within a SMP complex. Needs to be called prior to * vfp_init(). */ -void vfp_disable(void) +void __init vfp_disable(void) { if (VFP_arch) { pr_debug("%s: should be called prior to vfp_init\n", __func__); @@ -707,7 +706,7 @@ static int __init vfp_kmode_exception_hook_init(void) register_undef_hook(&vfp_kmode_exception_hook[i]); return 0; } -core_initcall(vfp_kmode_exception_hook_init); +subsys_initcall(vfp_kmode_exception_hook_init); /* * Kernel-side NEON support functions @@ -753,6 +752,21 @@ EXPORT_SYMBOL(kernel_neon_end); #endif /* CONFIG_KERNEL_MODE_NEON */ +static int __init vfp_detect(struct pt_regs *regs, unsigned int instr) +{ + VFP_arch = UINT_MAX; /* mark as not present */ + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook vfp_detect_hook __initdata = { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_detect, +}; + /* * VFP support code initialisation. */ @@ -773,10 +787,11 @@ static int __init vfp_init(void) * The handler is already setup to just log calls, so * we just need to read the VFPSID register. */ - vfp_vector = vfp_testing_entry; + register_undef_hook(&vfp_detect_hook); barrier(); vfpsid = fmrx(FPSID); barrier(); + unregister_undef_hook(&vfp_detect_hook); vfp_vector = vfp_null_entry; pr_info("VFP support v0.3: "); -- GitLab From 6c7a6d22fcef9181239ea7248c6b0c4117b9325e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 27 Nov 2020 13:07:22 +0100 Subject: [PATCH 0963/1894] ARM: 9031/1: hyp-stub: remove unused .L__boot_cpu_mode_offset symbol Commit aaac3733171fca94 ("ARM: kvm: replace open coded VA->PA calculations with adr_l call") removed all uses of .L__boot_cpu_mode_offset, so there is no longer a need to define it. Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/kernel/hyp-stub.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 26d8e03b1dd33..327f63ec42c6e 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -228,12 +228,6 @@ ENTRY(__hyp_soft_restart) ret lr ENDPROC(__hyp_soft_restart) -#ifndef ZIMAGE -.align 2 -.L__boot_cpu_mode_offset: - .long __boot_cpu_mode - . -#endif - .align 5 ENTRY(__hyp_stub_vectors) __hyp_stub_reset: W(b) . -- GitLab From 76460d613d9b4096f3567bd444e3fc275db1b96b Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 30 Nov 2020 10:28:20 +0100 Subject: [PATCH 0964/1894] ARM: 9032/1: arm/mm: Convert PUD level pgtable helper macros into functions Macros used as functions can be problematic from the compiler perspective. There was a build failure report caused primarily because of non reference of an argument variable. Hence convert PUD level pgtable helper macros into functions in order to avoid such problems in the future. In the process, it fixes the argument variables sequence in set_pud() which probably remained hidden for being a macro. https://lore.kernel.org/linux-mm/202011020749.5XQ3Hfzc-lkp@intel.com/ https://lore.kernel.org/linux-mm/5fa49698.Vu2O3r+dU20UoEJ+%25lkp@intel.com/ Cc: Andrew Morton Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Linus Walleij Signed-off-by: Anshuman Khandual Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 3502c2f746ca7..9d4f5eef410b9 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -177,11 +177,28 @@ * the pud: the pud entry is never bad, always exists, and can't be set or * cleared. */ -#define pud_none(pud) (0) -#define pud_bad(pud) (0) -#define pud_present(pud) (1) -#define pud_clear(pudp) do { } while (0) -#define set_pud(pud,pudp) do { } while (0) +static inline int pud_none(pud_t pud) +{ + return 0; +} + +static inline int pud_bad(pud_t pud) +{ + return 0; +} + +static inline int pud_present(pud_t pud) +{ + return 1; +} + +static inline void pud_clear(pud_t *pudp) +{ +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ +} static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) { -- GitLab From 27bde183b0d3b0e8e84c80db1864a5c7bd20b5e7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 30 Nov 2020 11:24:09 +0100 Subject: [PATCH 0965/1894] ARM: 9033/1: arm/smp: Drop the macro S(x,s) Mapping between IPI type index and its string is direct without requiring an additional offset. Hence the existing macro S(x, s) is now redundant and can just be dropped. This also makes the code clean and simple. Cc: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 48099c6e1e4a6..6ab2b0ad5f400 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -524,14 +524,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } static const char *ipi_types[NR_IPI] __tracepoint_string = { -#define S(x,s) [x] = s - S(IPI_WAKEUP, "CPU wakeup interrupts"), - S(IPI_TIMER, "Timer broadcast interrupts"), - S(IPI_RESCHEDULE, "Rescheduling interrupts"), - S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CPU_STOP, "CPU stop interrupts"), - S(IPI_IRQ_WORK, "IRQ work interrupts"), - S(IPI_COMPLETION, "completion interrupts"), + [IPI_WAKEUP] = "CPU wakeup interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_COMPLETION] = "completion interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); -- GitLab From 0557ac83fd1a0a7cd6909665bad50006507115a0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Dec 2020 10:37:47 +0100 Subject: [PATCH 0966/1894] ARM: 9035/1: uncompress: Add be32tocpu macro DTB stores all values as 32-bit big-endian integers. Add a macro to convert such values to native CPU endianness, to reduce duplication. Signed-off-by: Geert Uytterhoeven Reviewed-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b54469..14d1995106c28 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -165,6 +165,16 @@ orr \res, \res, \tmp1, lsl #24 .endm + .macro be32tocpu, val, tmp +#ifndef __ARMEB__ + /* convert to little endian */ + eor \tmp, \val, \val, ror #16 + bic \tmp, \tmp, #0x00ff0000 + mov \val, \val, ror #8 + eor \val, \val, \tmp, lsr #8 +#endif + .endm + .section ".start", "ax" /* * sort out different calling conventions @@ -345,13 +355,7 @@ restart: adr r0, LC1 /* Get the initial DTB size */ ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif + be32tocpu r5, r1 dbgadtb r6, r5 /* 50% DTB growth should be good enough */ add r5, r5, r5, lsr #1 @@ -403,13 +407,7 @@ restart: adr r0, LC1 /* Get the current DTB size */ ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert r5 (dtb size) to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif + be32tocpu r5, r1 /* preserve 64-bit alignment */ add r5, r5, #7 -- GitLab From 1ecec38547d415054fdb63a231234f44396b6d06 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Dec 2020 10:37:14 +0100 Subject: [PATCH 0967/1894] ARM: 9036/1: uncompress: Fix dbgadtb size parameter name The dbgadtb macro is passed the size of the appended DTB, not the end address. Fixes: c03e41470e901123 ("ARM: 9010/1: uncompress: Print the location of appended DTB") Signed-off-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 14d1995106c28..4fb699769e6a6 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -116,7 +116,7 @@ /* * Debug print of the final appended DTB location */ - .macro dbgadtb, begin, end + .macro dbgadtb, begin, size #ifdef DEBUG kputc #'D' kputc #'T' @@ -129,7 +129,7 @@ kputc #'(' kputc #'0' kputc #'x' - kphex \end, 8 /* End of appended DTB */ + kphex \size, 8 /* Size of appended DTB */ kputc #')' kputc #'\n' #endif -- GitLab From 551b39efc6ffdc7a881122fbac0caa2a27a464d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Dec 2020 10:39:10 +0100 Subject: [PATCH 0968/1894] ARM: 9037/1: uncompress: Add OF_DT_MAGIC macro The DTB magic marker is stored as a 32-bit big-endian value, and thus depends on the CPU's endianness. Add a macro to define this value in native endianness, to reduce #ifdef clutter and (future) duplication. Signed-off-by: Geert Uytterhoeven Reviewed-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 4fb699769e6a6..8d546cb109213 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -11,6 +11,12 @@ #include "efi-header.S" +#ifdef __ARMEB__ +#define OF_DT_MAGIC 0xd00dfeed +#else +#define OF_DT_MAGIC 0xedfe0dd0 +#endif + AR_CLASS( .arch armv7-a ) M_CLASS( .arch armv7-m ) @@ -335,11 +341,7 @@ restart: adr r0, LC1 */ ldr lr, [r6, #0] -#ifndef __ARMEB__ - ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian -#else - ldr r1, =0xd00dfeed -#endif + ldr r1, =OF_DT_MAGIC cmp lr, r1 bne dtb_check_done @ not found -- GitLab From 0cda9bc15dfc459bd178d6ba93389df52dd57957 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Dec 2020 01:34:16 +0100 Subject: [PATCH 0969/1894] ARM: 9038/1: Link with '-z norelro' When linking a multi_v7_defconfig + CONFIG_KASAN=y kernel with LD=ld.lld, the following error occurs: $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- LLVM=1 zImage ld.lld: error: section: .exit.data is not contiguous with other relro sections LLD defaults to '-z relro', which is unneeded for the kernel because program headers are not used nor is there any position independent code generation or linking for ARM. Add '-z norelro' to LDFLAGS_vmlinux to avoid this error. Link: https://github.com/ClangBuiltLinux/linux/issues/1189 Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4d76eab2b22d7..3c0a64cefe52f 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux := --no-undefined -X --pic-veneer +LDFLAGS_vmlinux := --no-undefined -X --pic-veneer -z norelro ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 -- GitLab From 10fce53c0ef8f6e79115c3d9e0d7ea1338c3fa37 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 17 Nov 2020 08:41:01 +0100 Subject: [PATCH 0970/1894] ARM: 9027/1: head.S: explicitly map DT even if it lives in the first physical section The early ATAGS/DT mapping code uses SECTION_SHIFT to mask low order bits of R2, and decides that no ATAGS/DTB were provided if the resulting value is 0x0. This means that on systems where DRAM starts at 0x0 (such as Raspberry Pi), no explicit mapping of the DT will be created if R2 points into the first 1 MB section of memory. This was not a problem before, because the decompressed kernel is loaded at the base of DRAM and mapped using sections as well, and so as long as the DT is referenced via a virtual address that uses the same translation (the linear map, in this case), things work fine. However, commit 7a1be318f579 ("9012/1: move device tree mapping out of linear region") changes this, and now the DT is referenced via a virtual address that is disjoint from the linear mapping of DRAM, and so we need the early code to create the DT mapping unconditionally. So let's create the early DT mapping for any value of R2 != 0x0. Reported-by: "kernelci.org bot" Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 9b18d8c661295..26ffd462a9755 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -274,10 +274,10 @@ __create_page_tables: * We map 2 sections in case the ATAGs/DTB crosses a section boundary. */ mov r0, r2, lsr #SECTION_SHIFT - movs r0, r0, lsl #SECTION_SHIFT + cmp r2, #0 ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER) addne r3, r3, r4 - orrne r6, r7, r0 + orrne r6, r7, r0, lsl #SECTION_SHIFT strne r6, [r3], #1 << PMD_ORDER addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] -- GitLab From d5c243989fb0cb03c74d7340daca3b819f706ee7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 21 Dec 2020 06:18:03 +0000 Subject: [PATCH 0971/1894] powerpc/32: Fix vmap stack - Properly set r1 before activating MMU on syscall too We need r1 to be properly set before activating MMU, otherwise any new exception taken while saving registers into the stack in syscall prologs will use the user stack, which is wrong and will even lockup or crash when KUAP is selected. Do that by switching the meaning of r11 and r1 until we have saved r1 to the stack: copy r1 into r11 and setup the new stack pointer in r1. To avoid complicating and impacting all generic and specific prolog code (and more), copy back r1 into r11 once r11 is save onto the stack. We could get rid of copying r1 back and forth at the cost of rewriting everything to use r1 instead of r11 all the way when CONFIG_VMAP_STACK is set, but the effort is probably not worth it for now. Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a3d819d5c348cee9783a311d5d3f3ba9b48fd219.1608531452.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 541664d957024..a2f72c966bafb 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -121,18 +121,28 @@ #ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SRR0 mtctr r11 -#endif andi. r11, r9, MSR_PR - lwz r11,TASK_STACK-THREAD(r12) + mr r11, r1 + lwz r1,TASK_STACK-THREAD(r12) beq- 99f - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -#ifdef CONFIG_VMAP_STACK + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ mtmsr r10 isync + tovirt(r12, r12) + stw r11,GPR1(r1) + stw r11,0(r1) + mr r11, r1 +#else + andi. r11, r9, MSR_PR + lwz r11,TASK_STACK-THREAD(r12) + beq- 99f + addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE + tophys(r11, r11) + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1, r11) /* set new kernel sp */ #endif - tovirt_vmstack r12, r12 - tophys_novmstack r11, r11 mflr r10 stw r10, _LINK(r11) #ifdef CONFIG_VMAP_STACK @@ -140,9 +150,6 @@ #else mfspr r10,SPRN_SRR0 #endif - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt_novmstack r1, r11 /* set new kernel sp */ stw r10,_NIP(r11) mfcr r10 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ -- GitLab From 7e90285716518d810857a1d362983d99da9bbf66 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 16 Dec 2020 13:46:54 +0000 Subject: [PATCH 0972/1894] docs: submitting-patches: Trivial - fix grammatical error "it is a used" does not make sense. Should be "it is used". Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20201216134654.271508-1-lee.jones@linaro.org Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index fb8261a4be308..2c48a00a436e7 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -446,7 +446,7 @@ patch. This tag documents that potentially interested parties have been included in the discussion. Co-developed-by: states that the patch was co-created by multiple developers; -it is a used to give attribution to co-authors (in addition to the author +it is used to give attribution to co-authors (in addition to the author attributed by the From: tag) when several people work on a single patch. Since Co-developed-by: denotes authorship, every Co-developed-by: must be immediately followed by a Signed-off-by: of the associated co-author. Standard sign-off -- GitLab From 27ab873e0ca640cbe1375aa5a0cdd0607cb6bbdc Mon Sep 17 00:00:00 2001 From: Milan Lakhani Date: Tue, 15 Dec 2020 20:42:36 +0000 Subject: [PATCH 0973/1894] Documentation: process: Correct numbering Renumber the steps in submit-checklist.rst as some numbers were skipped. Signed-off-by: Milan Lakhani Link: https://lore.kernel.org/r/1608064956-5512-1-git-send-email-milan.lakhani@codethink.co.uk Signed-off-by: Jonathan Corbet --- Documentation/process/submit-checklist.rst | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst index 1879f881c300d..230ee42f872f1 100644 --- a/Documentation/process/submit-checklist.rst +++ b/Documentation/process/submit-checklist.rst @@ -75,44 +75,44 @@ and elsewhere regarding submitting Linux kernel patches. 13) Has been build- and runtime tested with and without ``CONFIG_SMP`` and ``CONFIG_PREEMPT.`` -16) All codepaths have been exercised with all lockdep features enabled. +14) All codepaths have been exercised with all lockdep features enabled. -17) All new ``/proc`` entries are documented under ``Documentation/`` +15) All new ``/proc`` entries are documented under ``Documentation/`` -18) All new kernel boot parameters are documented in +16) All new kernel boot parameters are documented in ``Documentation/admin-guide/kernel-parameters.rst``. -19) All new module parameters are documented with ``MODULE_PARM_DESC()`` +17) All new module parameters are documented with ``MODULE_PARM_DESC()`` -20) All new userspace interfaces are documented in ``Documentation/ABI/``. +18) All new userspace interfaces are documented in ``Documentation/ABI/``. See ``Documentation/ABI/README`` for more information. Patches that change userspace interfaces should be CCed to linux-api@vger.kernel.org. -21) Check that it all passes ``make headers_check``. +19) Check that it all passes ``make headers_check``. -22) Has been checked with injection of at least slab and page-allocation +20) Has been checked with injection of at least slab and page-allocation failures. See ``Documentation/fault-injection/``. If the new code is substantial, addition of subsystem-specific fault injection might be appropriate. -23) Newly-added code has been compiled with ``gcc -W`` (use +21) Newly-added code has been compiled with ``gcc -W`` (use ``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good for finding bugs like "warning: comparison between signed and unsigned". -24) Tested after it has been merged into the -mm patchset to make sure +22) Tested after it has been merged into the -mm patchset to make sure that it still works with all of the other queued patches and various changes in the VM, VFS, and other subsystems. -25) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a +23) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a comment in the source code that explains the logic of what they are doing and why. -26) If any ioctl's are added by the patch, then also update +24) If any ioctl's are added by the patch, then also update ``Documentation/userspace-api/ioctl/ioctl-number.rst``. -27) If your modified source code depends on or uses any of the kernel +25) If your modified source code depends on or uses any of the kernel APIs or features that are related to the following ``Kconfig`` symbols, then test multiple builds with the related ``Kconfig`` symbols disabled and/or ``=m`` (if that option is available) [not all of these at the -- GitLab From 9bf19b78a203b6ed20ed7b5d7222f5ef7a49aed4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 17 Dec 2020 19:37:56 +0100 Subject: [PATCH 0974/1894] Documentation/submitting-patches: Document the SoB chain Document what a chain of Signed-off-by's in a patch commit message should mean, explicitly. This has been carved out from a tip subsystem handbook patchset by Thomas Gleixner: https://lkml.kernel.org/r/20181107171010.421878737@linutronix.de and incorporates follow-on comments. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20201217183756.GE23634@zn.tnic Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 2c48a00a436e7..5ba54120bef7e 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -411,6 +411,12 @@ Some people also put extra tags at the end. They'll just be ignored for now, but you can do this to mark internal company procedures or just point out some special detail about the sign-off. +Any further SoBs (Signed-off-by:'s) following the author's SoB are from +people handling and transporting the patch, but were not involved in its +development. SoB chains should reflect the **real** route a patch took +as it was propagated to the maintainers and ultimately to Linus, with +the first SoB entry signalling primary authorship of a single author. + When to use Acked-by:, Cc:, and Co-developed-by: ------------------------------------------------ -- GitLab From c635b0cea6b812898563809a13e65278989b2c72 Mon Sep 17 00:00:00 2001 From: Fengfei Xi Date: Thu, 10 Dec 2020 16:21:34 +0800 Subject: [PATCH 0975/1894] docs: admin-guide: Fix default value of max_map_count in sysctl/vm.rst Since the default value of sysctl_max_map_count is defined as DEFAULT_MAX_MAP_COUNT from mm/util.c int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; DEFAULT_MAX_MAP_COUNT is defined as 65530 (65535-5) in include/linux/mm.h #define MAPCOUNT_ELF_CORE_MARGIN (5) #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) Signed-off-by: Fengfei Xi Link: https://lore.kernel.org/r/20201210082134.36957-1-xi.fengfei@h3c.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/vm.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index e0cf17ad2ae55..82b5e7ad3e913 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -428,7 +428,7 @@ While most applications need less than a thousand maps, certain programs, particularly malloc debuggers, may consume lots of them, e.g., up to one or two maps per allocation. -The default value is 65536. +The default value is 65530. memory_failure_early_kill: -- GitLab From bfe55584713b4d4d518ffe9cf2dab1129eba6321 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 20 Dec 2020 15:18:07 +0100 Subject: [PATCH 0976/1894] MAINTAINERS: switch to different email address Switching to private mail account as work email is polluted with a legal disclaimer. Just making it extra clear by changing the email address in the MAINTAINERS file as well. Signed-off-by: Arend van Spriel Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201220141807.17278-1-aspriel@gmail.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5c1a6ba5ef26e..85a7c3a2ed63a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3545,7 +3545,7 @@ S: Supported F: drivers/net/ethernet/broadcom/bnxt/ BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER -M: Arend van Spriel +M: Arend van Spriel M: Franky Lin M: Hante Meuleman M: Chi-hsien Lin -- GitLab From a590370d918fc66c62df6620445791fbe840344a Mon Sep 17 00:00:00 2001 From: Roman Guskov Date: Mon, 21 Dec 2020 13:35:32 +0100 Subject: [PATCH 0977/1894] spi: stm32: FIFO threshold level - fix align packet size if cur_bpw <= 8 and xfer_len < 4 then the value of fthlv will be 1 and SPI registers content may have been lost. * If SPI data register is accessed as a 16-bit register and DSIZE <= 8bit, better to select FTHLV = 2, 4, 6 etc * If SPI data register is accessed as a 32-bit register and DSIZE > 8bit, better to select FTHLV = 2, 4, 6 etc, while if DSIZE <= 8bit, better to select FTHLV = 4, 8, 12 etc Signed-off-by: Roman Guskov Fixes: dcbe0d84dfa5 ("spi: add driver for STM32 SPI controller") Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/20201221123532.27272-1-rguskov@dh-electronics.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 471dedf3d3392..6017209c6d2f7 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -493,9 +493,9 @@ static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi, u32 xfer_len) /* align packet size with data registers access */ if (spi->cur_bpw > 8) - fthlv -= (fthlv % 2); /* multiple of 2 */ + fthlv += (fthlv % 2) ? 1 : 0; else - fthlv -= (fthlv % 4); /* multiple of 4 */ + fthlv += (fthlv % 4) ? (4 - (fthlv % 4)) : 0; if (!fthlv) fthlv = 1; -- GitLab From 252bd1256396cebc6fc3526127fdb0b317601318 Mon Sep 17 00:00:00 2001 From: Hyeongseok Kim Date: Thu, 3 Dec 2020 09:46:59 +0900 Subject: [PATCH 0978/1894] dm verity: skip verity work if I/O error when system is shutting down If emergency system shutdown is called, like by thermal shutdown, a dm device could be alive when the block device couldn't process I/O requests anymore. In this state, the handling of I/O errors by new dm I/O requests or by those already in-flight can lead to a verity corruption state, which is a misjudgment. So, skip verity work in response to I/O error when system is shutting down. Signed-off-by: Hyeongseok Kim Reviewed-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index f74982dcbea0d..6b8e5bdd8526d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -537,6 +537,15 @@ static int verity_verify_io(struct dm_verity_io *io) return 0; } +/* + * Skip verity work in response to I/O error when system is shutting down. + */ +static inline bool verity_is_system_shutting_down(void) +{ + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF + || system_state == SYSTEM_RESTART; +} + /* * End one "io" structure with a given error. */ @@ -564,7 +573,8 @@ static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; - if (bio->bi_status && !verity_fec_is_enabled(io->v)) { + if (bio->bi_status && + (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { verity_finish_io(io, bio->bi_status); return; } -- GitLab From 3b66e4a8e58a85af3212c7117d7a29c9ef6679a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20G=C3=BCnther?= Date: Fri, 18 Dec 2020 19:38:07 +0100 Subject: [PATCH 0979/1894] regulator: bd718x7: Add enable times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the typical startup times from the data sheet so boards get a reasonable default. Not setting any enable time can lead to board hangs when e.g. clocks are enabled too soon afterwards. This fixes gpu power domain resume on the Librem 5. [Moved #defines into driver, seems to be general agreement and avoids any cross tree issues -- broonie] Signed-off-by: Guido Günther Reviewed-by: Matti Vaittinen Link: https://lore.kernel.org/r/41fb2ed19f584f138336344e2297ae7301f72b75.1608316658.git.agx@sigxcpu.org Signed-off-by: Mark Brown --- drivers/regulator/bd718x7-regulator.c | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c index e6d5d98c3ceab..9309765d0450e 100644 --- a/drivers/regulator/bd718x7-regulator.c +++ b/drivers/regulator/bd718x7-regulator.c @@ -15,6 +15,36 @@ #include #include +/* Typical regulator startup times as per data sheet in uS */ +#define BD71847_BUCK1_STARTUP_TIME 144 +#define BD71847_BUCK2_STARTUP_TIME 162 +#define BD71847_BUCK3_STARTUP_TIME 162 +#define BD71847_BUCK4_STARTUP_TIME 240 +#define BD71847_BUCK5_STARTUP_TIME 270 +#define BD71847_BUCK6_STARTUP_TIME 200 +#define BD71847_LDO1_STARTUP_TIME 440 +#define BD71847_LDO2_STARTUP_TIME 370 +#define BD71847_LDO3_STARTUP_TIME 310 +#define BD71847_LDO4_STARTUP_TIME 400 +#define BD71847_LDO5_STARTUP_TIME 530 +#define BD71847_LDO6_STARTUP_TIME 400 + +#define BD71837_BUCK1_STARTUP_TIME 160 +#define BD71837_BUCK2_STARTUP_TIME 180 +#define BD71837_BUCK3_STARTUP_TIME 180 +#define BD71837_BUCK4_STARTUP_TIME 180 +#define BD71837_BUCK5_STARTUP_TIME 160 +#define BD71837_BUCK6_STARTUP_TIME 240 +#define BD71837_BUCK7_STARTUP_TIME 220 +#define BD71837_BUCK8_STARTUP_TIME 200 +#define BD71837_LDO1_STARTUP_TIME 440 +#define BD71837_LDO2_STARTUP_TIME 370 +#define BD71837_LDO3_STARTUP_TIME 310 +#define BD71837_LDO4_STARTUP_TIME 400 +#define BD71837_LDO5_STARTUP_TIME 310 +#define BD71837_LDO6_STARTUP_TIME 400 +#define BD71837_LDO7_STARTUP_TIME 530 + /* * BD718(37/47/50) have two "enable control modes". ON/OFF can either be * controlled by software - or by PMIC internal HW state machine. Whether @@ -613,6 +643,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK1_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK1_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -646,6 +677,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK2_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK2_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -680,6 +712,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd71847_buck3_volt_range_sel, .enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -706,6 +739,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_range_mask = BD71847_BUCK4_RANGE_MASK, .linear_range_selectors = bd71847_buck4_volt_range_sel, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -727,6 +761,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -750,6 +785,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -775,6 +811,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd718xx_ldo1_volt_range_sel, .enable_reg = BD718XX_REG_LDO1_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO1_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -796,6 +833,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .n_voltages = ARRAY_SIZE(ldo_2_volts), .enable_reg = BD718XX_REG_LDO2_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO2_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -818,6 +856,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO3_MASK, .enable_reg = BD718XX_REG_LDO3_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -840,6 +879,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO4_MASK, .enable_reg = BD718XX_REG_LDO4_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -865,6 +905,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd71847_ldo5_volt_range_sel, .enable_reg = BD718XX_REG_LDO5_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -889,6 +930,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO6_MASK, .enable_reg = BD718XX_REG_LDO6_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -942,6 +984,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK1_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK1_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -975,6 +1018,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK2_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK2_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1005,6 +1049,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD71837_REG_BUCK3_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK3_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1033,6 +1078,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD71837_REG_BUCK4_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK4_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1065,6 +1111,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .linear_range_selectors = bd71837_buck5_volt_range_sel, .enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1088,6 +1135,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_BUCK6_MASK, .enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1109,6 +1157,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK7_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1132,6 +1181,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK8_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1157,6 +1207,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .linear_range_selectors = bd718xx_ldo1_volt_range_sel, .enable_reg = BD718XX_REG_LDO1_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO1_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1178,6 +1229,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .n_voltages = ARRAY_SIZE(ldo_2_volts), .enable_reg = BD718XX_REG_LDO2_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO2_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1200,6 +1252,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO3_MASK, .enable_reg = BD718XX_REG_LDO3_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1222,6 +1275,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO4_MASK, .enable_reg = BD718XX_REG_LDO4_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1246,6 +1300,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_LDO5_MASK, .enable_reg = BD718XX_REG_LDO5_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1272,6 +1327,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO6_MASK, .enable_reg = BD718XX_REG_LDO6_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1296,6 +1352,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_LDO7_MASK, .enable_reg = BD71837_REG_LDO7_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO7_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { -- GitLab From 52252adede912890b81e0a05503a482062e17c6e Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 10 Dec 2020 09:50:49 +0100 Subject: [PATCH 0980/1894] dm ebs: avoid double unlikely() notation when using IS_ERR() The definition of IS_ERR() already applies the unlikely() notation when checking the error status of the passed pointer. For this reason there is no need to have the same notation outside of IS_ERR() itself. Clean up code by removing redundant notation. Signed-off-by: Antonio Quartulli Signed-off-by: Mike Snitzer --- drivers/md/dm-ebs-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index cb85610527c2c..55bcfb74f51fa 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -86,7 +86,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv else ba = dm_bufio_new(ec->bufio, block, &b); - if (unlikely(IS_ERR(ba))) { + if (IS_ERR(ba)) { /* * Carry on with next buffer, if any, to issue all possible * data but return error. -- GitLab From a528b04ea40690ff40501f50d618a62a02b19620 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 21 Dec 2020 18:34:04 +0000 Subject: [PATCH 0981/1894] io_uring: fix ignoring xa_store errors xa_store() may fail, check the result. Cc: stable@vger.kernel.org # 5.10 Fixes: 0f2122045b946 ("io_uring: don't rely on weak ->files references") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fbf747803dbc1..846c635d06200 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8852,10 +8852,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) { struct io_uring_task *tctx = current->io_uring; + int ret; if (unlikely(!tctx)) { - int ret; - ret = io_uring_alloc_task_context(current); if (unlikely(ret)) return ret; @@ -8866,7 +8865,12 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) if (!old) { get_file(file); - xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL); + ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, + file, GFP_KERNEL)); + if (ret) { + fput(file); + return ret; + } } tctx->last = file; } -- GitLab From e7e518053c267bb6be3799520d9f4a34c7264a2e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Dec 2020 11:25:06 -0800 Subject: [PATCH 0982/1894] bpf: Add schedule point in htab_init_buckets() We noticed that with a LOCKDEP enabled kernel, allocating a hash table with 65536 buckets would use more than 60ms. htab_init_buckets() runs from process context, it is safe to schedule to avoid latency spikes. Fixes: c50eb518e262 ("bpf: Use separate lockdep class for each hashtab") Reported-by: John Sperbeck Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201221192506.707584-1-eric.dumazet@gmail.com --- kernel/bpf/hashtab.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 7e848200cd268..c1ac7f964bc99 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab) lockdep_set_class(&htab->buckets[i].lock, &htab->lockdep_key); } + cond_resched(); } } -- GitLab From 54ddbdb024882e226055cc4c3c246592ddde2ee5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 18 Dec 2020 09:38:43 -0800 Subject: [PATCH 0983/1894] net: systemport: set dev->max_mtu to UMAC_MAX_MTU_SIZE The driver is already allocating receive buffers of 2KiB and the Ethernet MAC is configured to accept frames up to UMAC_MAX_MTU_SIZE. Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports") Signed-off-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201218173843.141046-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0fdd19d99d99f..b1ae9eb8f2479 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2577,6 +2577,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) NETIF_F_HW_VLAN_CTAG_TX; dev->hw_features |= dev->features; dev->vlan_features |= dev->features; + dev->max_mtu = UMAC_MAX_MTU_SIZE; /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = 1; -- GitLab From 1385ae5c30f238f81bc6528d897c6d7a0816783f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Dec 2020 11:55:36 +0100 Subject: [PATCH 0984/1894] ethernet: ucc_geth: set dev->max_mtu to 1518 All the buffers and registers are already set up appropriately for an MTU slightly above 1500, so we just need to expose this to the networking stack. AFAICT, there's no need to implement .ndo_change_mtu when the receive buffers are always set up to support the max_mtu. This fixes several warnings during boot on our mpc8309-board with an embedded mv88e6250 switch: mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 0 ... mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 4 ucc_geth e0102000.ethernet eth1: error -22 setting MTU to 1504 to include DSA overhead The last line explains what the DSA stack tries to do: achieving an MTU of 1500 on-the-wire requires that the master netdevice connected to the CPU port supports an MTU of 1500+the tagging overhead. Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports") Reviewed-by: Andrew Lunn Signed-off-by: Rasmus Villemoes Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index ba8869c3d891c..d6dbae4807161 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3889,6 +3889,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work); netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64); dev->mtu = 1500; + dev->max_mtu = 1518; ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT); ugeth->phy_interface = phy_interface; -- GitLab From 887078de2a23689e29d6fa1b75d7cbc544c280be Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Dec 2020 11:55:37 +0100 Subject: [PATCH 0985/1894] ethernet: ucc_geth: fix definition and size of ucc_geth_tx_global_pram Table 8-53 in the QUICC Engine Reference manual shows definitions of fields up to a size of 192 bytes, not just 128. But in table 8-111, one does find the text Base Address of the Global Transmitter Parameter RAM Page. [...] The user needs to allocate 128 bytes for this page. The address must be aligned to the page size. I've checked both rev. 7 (11/2015) and rev. 9 (05/2018) of the manual; they both have this inconsistency (and the table numbers are the same). Adding a bit of debug printing, on my board the struct ucc_geth_tx_global_pram is allocated at offset 0x880, while the (opaque) ucc_geth_thread_data_tx gets allocated immediately afterwards, at 0x900. So whatever the engine writes into the thread data overlaps with the tail of the global tx pram (and devmem says that something does get written during a simple ping). I haven't observed any failure that could be attributed to this, but it seems to be the kind of thing that would be extremely hard to debug. So extend the struct definition so that we do allocate 192 bytes. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 1a9bdf66a7d81..11d4bf5dc21f7 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -575,7 +575,14 @@ struct ucc_geth_tx_global_pram { u32 vtagtable[0x8]; /* 8 4-byte VLAN tags */ u32 tqptr; /* a base pointer to the Tx Queues Memory Region */ - u8 res2[0x80 - 0x74]; + u8 res2[0x78 - 0x74]; + u64 snums_en; + u32 l2l3baseptr; /* top byte consists of a few other bit fields */ + + u16 mtu[8]; + u8 res3[0xa8 - 0x94]; + u32 wrrtablebase; /* top byte is reserved */ + u8 res4[0xc0 - 0xac]; } __packed; /* structure representing Extended Filtering Global Parameters in PRAM */ -- GitLab From e925e0cd2a705aaacb0b907bb3691fcac3a973a4 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Dec 2020 11:55:38 +0100 Subject: [PATCH 0986/1894] ethernet: ucc_geth: fix use-after-free in ucc_geth_remove() ugeth is the netdiv_priv() part of the netdevice. Accessing the memory pointed to by ugeth (such as done by ucc_geth_memclean() and the two of_node_puts) after free_netdev() is thus use-after-free. Fixes: 80a9fad8e89a ("ucc_geth: fix module removal") Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index d6dbae4807161..6d853f018d531 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3935,12 +3935,12 @@ static int ucc_geth_remove(struct platform_device* ofdev) struct device_node *np = ofdev->dev.of_node; unregister_netdev(dev); - free_netdev(dev); ucc_geth_memclean(ugeth); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(ugeth->ug_info->tbi_node); of_node_put(ugeth->ug_info->phy_node); + free_netdev(dev); return 0; } -- GitLab From 83469893204281ecf65d572bddf02de29a19787c Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 18 Dec 2020 13:50:01 -0800 Subject: [PATCH 0987/1894] ionic: account for vlan tag len in rx buffer len Let the FW know we have enough receive buffer space for the vlan tag if it isn't stripped. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20201218215001.64696-1-snelson@pensando.io Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 9156c9825a16f..ac4cd5d82e696 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -337,7 +337,7 @@ void ionic_rx_fill(struct ionic_queue *q) unsigned int i, j; unsigned int len; - len = netdev->mtu + ETH_HLEN; + len = netdev->mtu + ETH_HLEN + VLAN_HLEN; nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE; for (i = ionic_q_space_avail(q); i; i--) { -- GitLab From 8df66af5c1e5f80562fe728db5ec069b21810144 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Dec 2020 14:01:44 +0300 Subject: [PATCH 0988/1894] atm: idt77252: call pci_disable_device() on error path This error path needs to disable the pci device before returning. Fixes: ede58ef28e10 ("atm: remove deprecated use of pci api") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X93dmC4NX0vbTpGp@mwanda Signed-off-by: Jakub Kicinski --- drivers/atm/idt77252.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 65a3886f68c9e..5f0472c18bcbd 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3607,7 +3607,7 @@ static int idt77252_init_one(struct pci_dev *pcidev, if ((err = dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)))) { printk("idt77252: can't enable DMA for PCI device at %s\n", pci_name(pcidev)); - return err; + goto err_out_disable_pdev; } card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL); -- GitLab From bcce55f556e824d43f352d76b94509185585e38d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 19 Dec 2020 13:19:24 +0100 Subject: [PATCH 0989/1894] ppp: Fix PPPIOCUNBRIDGECHAN request number PPPIOCGL2TPSTATS already uses 54. This shouldn't be a problem in practice, but let's keep the logical decreasing assignment scheme. Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls") Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/e3a4c355e3820331d8e1fffef8522739aae58b57.1608380117.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ppp-ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h index 8dbecb3ad0368..1cc5ce0ae062e 100644 --- a/include/uapi/linux/ppp-ioctl.h +++ b/include/uapi/linux/ppp-ioctl.h @@ -116,7 +116,7 @@ struct pppol2tp_ioc_stats { #define PPPIOCGCHAN _IOR('t', 55, int) /* get ppp channel number */ #define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats) #define PPPIOCBRIDGECHAN _IOW('t', 53, int) /* bridge one channel to another */ -#define PPPIOCUNBRIDGECHAN _IO('t', 54) /* unbridge channel */ +#define PPPIOCUNBRIDGECHAN _IO('t', 52) /* unbridge channel */ #define SIOCGPPPSTATS (SIOCDEVPRIVATE + 0) #define SIOCGPPPVER (SIOCDEVPRIVATE + 1) /* NEVER change this!! */ -- GitLab From 2575bc1aa9d52a62342b57a0b7d0a12146cf6aed Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Sun, 20 Dec 2020 13:02:29 +0200 Subject: [PATCH 0990/1894] net: mvpp2: Fix GoP port 3 Networking Complex Control configurations During GoP port 2 Networking Complex Control mode of operation configurations, also GoP port 3 mode of operation was wrongly set. Patch removes these configurations. Fixes: f84bf386f395 ("net: mvpp2: initialize the GoP") Acked-by: Marcin Wojtas Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608462149-1702-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 0f18d034ce3e8..f20b313270270 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1231,7 +1231,7 @@ static void mvpp22_gop_init_rgmii(struct mvpp2_port *port) regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val); if (port->gop_id == 2) - val |= GENCONF_CTRL0_PORT0_RGMII | GENCONF_CTRL0_PORT1_RGMII; + val |= GENCONF_CTRL0_PORT0_RGMII; else if (port->gop_id == 3) val |= GENCONF_CTRL0_PORT1_RGMII_MII; regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val); -- GitLab From 2e2cbaf920d14de9a96180ddefd6861bcc46f07d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 9 Dec 2020 18:38:24 -0500 Subject: [PATCH 0991/1894] fix hostfs_open() use of ->f_path.dentry this is one of the cases where we need to use d_real() - we are using more than the name of dentry here. ->d_sb is used as well, so in case of hostfs being used as a layer we get the wrong superblock. Reported-by: Johannes Berg Tested-by: Johannes Berg Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index c070c0d8e3e97..aea35459d3903 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -315,7 +315,7 @@ retry: if (mode & FMODE_WRITE) r = w = 1; - name = dentry_name(file->f_path.dentry); + name = dentry_name(d_real(file->f_path.dentry, file->f_inode)); if (name == NULL) return -ENOMEM; -- GitLab From 74a2921948ed8c0e7f079a98442ec3493168cc85 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:57 +0800 Subject: [PATCH 0992/1894] scsi: hisi_sas: Expose HW queues for v2 hw As a performance enhancement, make the completion queue interrupts managed. In addition, in commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline"), CPU hotplug for MQ devices using managed interrupts is made safe. So expose HW queues to blk-mq to take advantage of this. Flag Scsi_host.host_tagset is also set to ensure that the HBA is not sent more commands than it can handle. However the driver still does not use request tag for IPTT as there are many HW bugs means that special rules apply for IPTT allocation. Link: https://lore.kernel.org/r/1606905417-183214-6-git-send-email-john.garry@huawei.com Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas.h | 4 ++ drivers/scsi/hisi_sas/hisi_sas_main.c | 11 +++++ drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 66 +++++++++++++++++++++----- 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index a25cfc11c96de..aa67807c56938 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -312,6 +313,7 @@ enum { struct hisi_sas_hw { int (*hw_init)(struct hisi_hba *hisi_hba); + int (*interrupt_preinit)(struct hisi_hba *hisi_hba); void (*setup_itct)(struct hisi_hba *hisi_hba, struct hisi_sas_device *device); int (*slot_index_alloc)(struct hisi_hba *hisi_hba, @@ -418,6 +420,8 @@ struct hisi_hba { u32 refclk_frequency_mhz; u8 sas_addr[SAS_ADDR_SIZE]; + int *irq_map; /* v2 hw */ + int n_phy; spinlock_t lock; struct semaphore sem; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 274ccf18ce2db..061d65b8a2878 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2620,6 +2620,13 @@ err_out: return NULL; } +static int hisi_sas_interrupt_preinit(struct hisi_hba *hisi_hba) +{ + if (hisi_hba->hw->interrupt_preinit) + return hisi_hba->hw->interrupt_preinit(hisi_hba); + return 0; +} + int hisi_sas_probe(struct platform_device *pdev, const struct hisi_sas_hw *hw) { @@ -2677,6 +2684,10 @@ int hisi_sas_probe(struct platform_device *pdev, sha->sas_port[i] = &hisi_hba->port[i].sas_port; } + rc = hisi_sas_interrupt_preinit(hisi_hba); + if (rc) + goto err_out_ha; + rc = scsi_add_host(shost, &pdev->dev); if (rc) goto err_out_ha; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index b57177b52facc..9adfdefef9cad 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3302,6 +3302,28 @@ static irq_handler_t fatal_interrupts[HISI_SAS_FATAL_INT_NR] = { fatal_axi_int_v2_hw }; +#define CQ0_IRQ_INDEX (96) + +static int hisi_sas_v2_interrupt_preinit(struct hisi_hba *hisi_hba) +{ + struct platform_device *pdev = hisi_hba->platform_dev; + struct Scsi_Host *shost = hisi_hba->shost; + struct irq_affinity desc = { + .pre_vectors = CQ0_IRQ_INDEX, + .post_vectors = 16, + }; + int resv = desc.pre_vectors + desc.post_vectors, minvec = resv + 1, nvec; + + nvec = devm_platform_get_irqs_affinity(pdev, &desc, minvec, 128, + &hisi_hba->irq_map); + if (nvec < 0) + return nvec; + + shost->nr_hw_queues = hisi_hba->cq_nvecs = nvec - resv; + + return 0; +} + /* * There is a limitation in the hip06 chipset that we need * to map in all mbigen interrupts, even if they are not used. @@ -3310,14 +3332,11 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) { struct platform_device *pdev = hisi_hba->platform_dev; struct device *dev = &pdev->dev; - int irq, rc = 0, irq_map[128]; + int irq, rc = 0; int i, phy_no, fatal_no, queue_no; - for (i = 0; i < 128; i++) - irq_map[i] = platform_get_irq(pdev, i); - for (i = 0; i < HISI_SAS_PHY_INT_NR; i++) { - irq = irq_map[i + 1]; /* Phy up/down is irq1 */ + irq = hisi_hba->irq_map[i + 1]; /* Phy up/down is irq1 */ rc = devm_request_irq(dev, irq, phy_interrupts[i], 0, DRV_NAME " phy", hisi_hba); if (rc) { @@ -3331,7 +3350,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) for (phy_no = 0; phy_no < hisi_hba->n_phy; phy_no++) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; - irq = irq_map[phy_no + 72]; + irq = hisi_hba->irq_map[phy_no + 72]; rc = devm_request_irq(dev, irq, sata_int_v2_hw, 0, DRV_NAME " sata", phy); if (rc) { @@ -3343,7 +3362,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) } for (fatal_no = 0; fatal_no < HISI_SAS_FATAL_INT_NR; fatal_no++) { - irq = irq_map[fatal_no + 81]; + irq = hisi_hba->irq_map[fatal_no + 81]; rc = devm_request_irq(dev, irq, fatal_interrupts[fatal_no], 0, DRV_NAME " fatal", hisi_hba); if (rc) { @@ -3354,24 +3373,22 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) } } - for (queue_no = 0; queue_no < hisi_hba->queue_count; queue_no++) { + for (queue_no = 0; queue_no < hisi_hba->cq_nvecs; queue_no++) { struct hisi_sas_cq *cq = &hisi_hba->cq[queue_no]; - cq->irq_no = irq_map[queue_no + 96]; + cq->irq_no = hisi_hba->irq_map[queue_no + 96]; rc = devm_request_threaded_irq(dev, cq->irq_no, cq_interrupt_v2_hw, cq_thread_v2_hw, IRQF_ONESHOT, DRV_NAME " cq", cq); if (rc) { dev_err(dev, "irq init: could not request cq interrupt %d, rc=%d\n", - irq, rc); + cq->irq_no, rc); rc = -ENOENT; goto err_out; } + cq->irq_mask = irq_get_affinity_mask(cq->irq_no); } - - hisi_hba->cq_nvecs = hisi_hba->queue_count; - err_out: return rc; } @@ -3529,6 +3546,26 @@ static struct device_attribute *host_attrs_v2_hw[] = { NULL }; +static int map_queues_v2_hw(struct Scsi_Host *shost) +{ + struct hisi_hba *hisi_hba = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < qmap->nr_queues; queue++) { + mask = irq_get_affinity_mask(hisi_hba->irq_map[96 + queue]); + if (!mask) + continue; + + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } + + return 0; + +} + static struct scsi_host_template sht_v2_hw = { .name = DRV_NAME, .proc_name = DRV_NAME, @@ -3553,10 +3590,13 @@ static struct scsi_host_template sht_v2_hw = { #endif .shost_attrs = host_attrs_v2_hw, .host_reset = hisi_sas_host_reset, + .map_queues = map_queues_v2_hw, + .host_tagset = 1, }; static const struct hisi_sas_hw hisi_sas_v2_hw = { .hw_init = hisi_sas_v2_init, + .interrupt_preinit = hisi_sas_v2_interrupt_preinit, .setup_itct = setup_itct_v2_hw, .slot_index_alloc = slot_index_alloc_quirk_v2_hw, .alloc_dev = alloc_dev_quirk_v2_hw, -- GitLab From 9eb78c25327548b905598975aa3ded4ef244b94a Mon Sep 17 00:00:00 2001 From: Artem Labazov <123321artyom@gmail.com> Date: Mon, 7 Dec 2020 09:04:36 +0900 Subject: [PATCH 0993/1894] exfat: Avoid allocating upcase table using kcalloc() The table for Unicode upcase conversion requires an order-5 allocation, which may fail on a highly-fragmented system: pool-udisksd: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null), cpuset=/,mems_allowed=0 CPU: 4 PID: 3756880 Comm: pool-udisksd Tainted: G U 5.8.10-200.fc32.x86_64 #1 Hardware name: Dell Inc. XPS 13 9360/0PVG6D, BIOS 2.13.0 11/14/2019 Call Trace: dump_stack+0x6b/0x88 warn_alloc.cold+0x75/0xd9 ? _cond_resched+0x16/0x40 ? __alloc_pages_direct_compact+0x144/0x150 __alloc_pages_slowpath.constprop.0+0xcfa/0xd30 ? __schedule+0x28a/0x840 ? __wait_on_bit_lock+0x92/0xa0 __alloc_pages_nodemask+0x2df/0x320 kmalloc_order+0x1b/0x80 kmalloc_order_trace+0x1d/0xa0 exfat_create_upcase_table+0x115/0x390 [exfat] exfat_fill_super+0x3ef/0x7f0 [exfat] ? sget_fc+0x1d0/0x240 ? exfat_init_fs_context+0x120/0x120 [exfat] get_tree_bdev+0x15c/0x250 vfs_get_tree+0x25/0xb0 do_mount+0x7c3/0xaf0 ? copy_mount_options+0xab/0x180 __x64_sys_mount+0x8e/0xd0 do_syscall_64+0x4d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Make the driver use kvcalloc() to eliminate the issue. Fixes: 370e812b3ec1 ("exfat: add nls operations") Cc: stable@vger.kernel.org #v5.7+ Signed-off-by: Artem Labazov <123321artyom@gmail.com> Signed-off-by: Namjae Jeon --- fs/exfat/nls.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 675d0e7058c5a..314d5407a1be5 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -659,7 +659,7 @@ static int exfat_load_upcase_table(struct super_block *sb, unsigned char skip = false; unsigned short *upcase_table; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -715,7 +715,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb) unsigned short uni = 0, *upcase_table; unsigned int index = 0; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -803,5 +803,5 @@ load_default: void exfat_free_upcase_table(struct exfat_sb_info *sbi) { - kfree(sbi->vol_utbl); + kvfree(sbi->vol_utbl); } -- GitLab From de043da0b9e71147ca610ed542d34858aadfc61c Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Dec 2020 16:13:56 -0800 Subject: [PATCH 0994/1894] RISC-V: Fix usage of memblock_enforce_memory_limit memblock_enforce_memory_limit accepts the maximum memory size not the maximum address that can be handled by kernel. Fix the function invocation accordingly. Fixes: 1bd14a66ee52 ("RISC-V: Remove any memblock representing unusable memory area") Cc: stable@vger.kernel.org Reported-by: Bin Meng Tested-by: Bin Meng Acked-by: Mike Rapoport Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 13ba533f462b4..bf5379135e39b 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -176,7 +176,7 @@ void __init setup_bootmem(void) * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed * as it is unusable by kernel. */ - memblock_enforce_memory_limit(mem_start - PAGE_OFFSET); + memblock_enforce_memory_limit(-PAGE_OFFSET); /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); -- GitLab From 43877226829eda91856b055d217b3033805fd76f Mon Sep 17 00:00:00 2001 From: Mike Oliphant Date: Mon, 21 Dec 2020 13:55:33 -0800 Subject: [PATCH 0995/1894] ALSA: usb-audio: Add implicit feeback support for the BOSS GT-1 The BOSS GT-1 (USB ID 0582:01d6) requires implicit feedback like other similar BOSS devices. This patch adds this support. [ rearranged the table entry in the ID order -- tiwai ] Signed-off-by: Mike Oliphant Link: https://lore.kernel.org/r/20201221215533.2511-1-oliphant@nostatic.org Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 4e911d200562d..eb3a4c433c3e9 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -75,6 +75,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { /* No quirk for playback but with capture quirk (see below) */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0130), /* BOSS BR-80 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0189), /* BOSS GT-100v2 */ + IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6), /* BOSS GT-1 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8), /* BOSS Katana */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01e5), /* BOSS GT-001 */ @@ -85,6 +86,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x0582, 0x0130, 0x0d, 0x01), /* BOSS BR-80 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x0189, 0x0d, 0x01), /* BOSS GT-100v2 */ + IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01e5, 0x0d, 0x01), /* BOSS GT-001 */ -- GitLab From 37309f47e2f5674f3e86cb765312ace42cfcedf5 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Wed, 9 Dec 2020 20:52:30 -0800 Subject: [PATCH 0996/1894] HID: wacom: Fix memory leakage caused by kfifo_alloc As reported by syzbot below, kfifo_alloc'd memory would not be freed if a non-zero return value is triggered in wacom_probe. This patch creates and uses devm_kfifo_alloc to allocate and free itself. BUG: memory leak unreferenced object 0xffff88810dc44a00 (size 512): comm "kworker/1:2", pid 3674, jiffies 4294943617 (age 14.100s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000023e1afac>] kmalloc_array include/linux/slab.h:592 [inline] [<0000000023e1afac>] __kfifo_alloc+0xad/0x100 lib/kfifo.c:43 [<00000000c477f737>] wacom_probe+0x1a1/0x3b0 drivers/hid/wacom_sys.c:2727 [<00000000b3109aca>] hid_device_probe+0x16b/0x210 drivers/hid/hid-core.c:2281 [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 [<00000000efb7c59e>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431 [<0000000024ab1590>] __device_attach+0x122/0x250 drivers/base/dd.c:912 [<000000004c7ac048>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491 [<00000000b93050a3>] device_add+0x5ac/0xc30 drivers/base/core.c:2936 [<00000000e5b46ea5>] hid_add_device+0x151/0x390 drivers/hid/hid-core.c:2437 [<00000000c6add147>] usbhid_probe+0x412/0x560 drivers/hid/usbhid/hid-core.c:1407 [<00000000c33acdb4>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396 [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 https://syzkaller.appspot.com/bug?extid=5b49c9695968d7250a26 Reported-by: syzbot+5b49c9695968d7250a26@syzkaller.appspotmail.com Signed-off-by: Ping Cheng Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 045c464228d91..e8acd235db2a9 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -1270,6 +1270,37 @@ static int wacom_devm_sysfs_create_group(struct wacom *wacom, group); } +static void wacom_devm_kfifo_release(struct device *dev, void *res) +{ + struct kfifo_rec_ptr_2 *devres = res; + + kfifo_free(devres); +} + +static int wacom_devm_kfifo_alloc(struct wacom *wacom) +{ + struct wacom_wac *wacom_wac = &wacom->wacom_wac; + struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo; + int error; + + pen_fifo = devres_alloc(wacom_devm_kfifo_release, + sizeof(struct kfifo_rec_ptr_2), + GFP_KERNEL); + + if (!pen_fifo) + return -ENOMEM; + + error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + if (error) { + devres_free(pen_fifo); + return error; + } + + devres_add(&wacom->hdev->dev, pen_fifo); + + return 0; +} + enum led_brightness wacom_leds_brightness_get(struct wacom_led *led) { struct wacom *wacom = led->wacom; @@ -2724,7 +2755,7 @@ static int wacom_probe(struct hid_device *hdev, if (features->check_for_hid_type && features->hid_type != hdev->type) return -ENODEV; - error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + error = wacom_devm_kfifo_alloc(wacom); if (error) return error; @@ -2786,8 +2817,6 @@ static void wacom_remove(struct hid_device *hdev) if (wacom->wacom_wac.features.type != REMOTE) wacom_release_resources(wacom); - - kfifo_free(&wacom_wac->pen_fifo); } #ifdef CONFIG_PM -- GitLab From 2a5f1b67ec577fb1544b563086e0377f095f88e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 10 Dec 2020 08:30:59 +0000 Subject: [PATCH 0997/1894] KVM: arm64: Don't access PMCR_EL0 when no PMU is available We reset the guest's view of PMCR_EL0 unconditionally, based on the host's view of this register. It is however legal for an implementation not to provide any PMU, resulting in an UNDEF. The obvious fix is to skip the reset of this shadow register when no PMU is available, sidestepping the issue entirely. If no PMU is available, the guest is not able to request a virtual PMU anyway, so not doing nothing is the right thing to do! It is unlikely that this bug can hit any HW implementation though, as they all provide a PMU. It has been found using nested virt with the host KVM not implementing the PMU itself. Fixes: ab9468340d2bc ("arm64: KVM: Add access handler for PMCR register") Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201210083059.1277162-1-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3313dedfa5053..d46e7f706cb06 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -594,6 +594,10 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; + /* No PMU available, PMCR_EL0 may UNDEF... */ + if (!kvm_arm_support_pmu_v3()) + return; + pmcr = read_sysreg(pmcr_el0); /* * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN -- GitLab From ff367fe473a9857160c17827931375a899076394 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:47 +0000 Subject: [PATCH 0998/1894] KVM: arm64: Prevent use of invalid PSCI v0.1 function IDs PSCI driver exposes a struct containing the PSCI v0.1 function IDs configured in the DT. However, the struct does not convey the information whether these were set from DT or contain the default value zero. This could be a problem for PSCI proxy in KVM protected mode. Extend config passed to KVM with a bit mask with individual bits set depending on whether the corresponding function pointer in psci_ops is set, eg. set bit for PSCI_CPU_SUSPEND if psci_ops.cpu_suspend != NULL. Previously config was split into multiple global variables. Put everything into a single struct for convenience. Reported-by: Mark Rutland Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-2-dbrazdil@google.com --- arch/arm64/include/asm/kvm_host.h | 20 +++++++++++ arch/arm64/kvm/arm.c | 14 +++++--- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 53 +++++++++++++++++++++------- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 11beda85ee7e5..828d50d40dc2f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -240,6 +241,25 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; +#define KVM_HOST_PSCI_0_1_CPU_SUSPEND BIT(0) +#define KVM_HOST_PSCI_0_1_CPU_ON BIT(1) +#define KVM_HOST_PSCI_0_1_CPU_OFF BIT(2) +#define KVM_HOST_PSCI_0_1_MIGRATE BIT(3) + +struct kvm_host_psci_config { + /* PSCI version used by host. */ + u32 version; + + /* Function IDs used by host if version is v0.1. */ + struct psci_0_1_function_ids function_ids_0_1; + + /* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */ + unsigned int enabled_functions_0_1; +}; + +extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); +#define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config) + struct vcpu_reset_state { unsigned long pc; unsigned long r0; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6e637d2b4cfb7..6a2f4e01b04f3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -66,8 +66,6 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; -extern u32 kvm_nvhe_sym(kvm_host_psci_version); -extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { @@ -1618,8 +1616,16 @@ static bool init_psci_relay(void) return false; } - kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version(); - kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids(); + kvm_host_psci_config.version = psci_ops.get_version(); + + if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { + kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); + kvm_host_psci_config.enabled_functions_0_1 = + (psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) | + (psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) | + (psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) | + (psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0); + } return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 08dc9de693147..0d6f4aa396217 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -22,9 +22,8 @@ void kvm_hyp_cpu_resume(unsigned long r0); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); /* Config options set by the host. */ -__ro_after_init u32 kvm_host_psci_version; -__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; -__ro_after_init s64 hyp_physvirt_offset; +struct kvm_host_psci_config __ro_after_init kvm_host_psci_config; +s64 __ro_after_init hyp_physvirt_offset; #define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) @@ -54,12 +53,41 @@ static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) return func_id; } +static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit) +{ + return kvm_host_psci_config.enabled_functions_0_1 & fn_bit; +} + +static inline bool is_psci_0_1_cpu_suspend(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend); +} + +static inline bool is_psci_0_1_cpu_on(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on); +} + +static inline bool is_psci_0_1_cpu_off(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off); +} + +static inline bool is_psci_0_1_migrate(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) && + (func_id == kvm_host_psci_config.function_ids_0_1.migrate); +} + static bool is_psci_0_1_call(u64 func_id) { - return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) || - (func_id == kvm_host_psci_0_1_function_ids.cpu_on) || - (func_id == kvm_host_psci_0_1_function_ids.cpu_off) || - (func_id == kvm_host_psci_0_1_function_ids.migrate); + return is_psci_0_1_cpu_suspend(func_id) || + is_psci_0_1_cpu_on(func_id) || + is_psci_0_1_cpu_off(func_id) || + is_psci_0_1_migrate(func_id); } static bool is_psci_0_2_call(u64 func_id) @@ -71,7 +99,7 @@ static bool is_psci_0_2_call(u64 func_id) static bool is_psci_call(u64 func_id) { - switch (kvm_host_psci_version) { + switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): return is_psci_0_1_call(func_id); default: @@ -248,12 +276,11 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || - (func_id == kvm_host_psci_0_1_function_ids.migrate)) + if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id)) return psci_forward(host_ctxt); - else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) + else if (is_psci_0_1_cpu_on(func_id)) return psci_cpu_on(func_id, host_ctxt); - else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) + else if (is_psci_0_1_cpu_suspend(func_id)) return psci_cpu_suspend(func_id, host_ctxt); else return PSCI_RET_NOT_SUPPORTED; @@ -304,7 +331,7 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) if (!is_psci_call(func_id)) return false; - switch (kvm_host_psci_version) { + switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): ret = psci_0_1_handler(func_id, host_ctxt); break; -- GitLab From 7a96a0687b80a1870c689418d7b72012c8bdd53d Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:48 +0000 Subject: [PATCH 0999/1894] KVM: arm64: Use lm_alias in nVHE-only VA conversion init_hyp_physvirt_offset() computes PA from a kernel VA. Conversion to kernel linear-map is required first but the code used kvm_ksym_ref() for this purpose. Under VHE that is a NOP and resulted in a runtime warning. Replace kvm_ksym_ref with lm_alias. Reported-by: Qian Cai Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-3-dbrazdil@google.com --- arch/arm64/kvm/va_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index d8cc51bd60bf2..914732b88c69f 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -42,7 +42,7 @@ static void init_hyp_physvirt_offset(void) u64 kern_va, hyp_va; /* Compute the offset from the hyp VA and PA of a random symbol. */ - kern_va = (u64)kvm_ksym_ref(__hyp_text_start); + kern_va = (u64)lm_alias(__hyp_text_start); hyp_va = __early_kern_hyp_va(kern_va); CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; } -- GitLab From c3e181aec96f6ada84df1cb72a72be8970f8b284 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:49 +0000 Subject: [PATCH 1000/1894] KVM: arm64: Skip computing hyp VA layout for VHE Computing the hyp VA layout is redundant when the kernel runs in EL2 and hyp shares its VA mappings. Make calling kvm_compute_layout() conditional on not just CONFIG_KVM but also !is_kernel_in_hyp_mode(). Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-4-dbrazdil@google.com --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 18e9727d3f645..4e585cc892e82 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -434,7 +434,7 @@ static void __init hyp_mode_check(void) "CPU: CPUs started in inconsistent modes"); else pr_info("CPU: All CPU(s) started at EL1\n"); - if (IS_ENABLED(CONFIG_KVM)) + if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) kvm_compute_layout(); } -- GitLab From 61fe0c37af57ac35472a870581a7d0bb5ac2f63a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:50 +0000 Subject: [PATCH 1001/1894] KVM: arm64: Minor cleanup of hyp variables used in host Small cleanup moving declarations of hyp-exported variables to kvm_host.h and using macros to avoid having to refer to them with kvm_nvhe_sym() in host. No functional change intended. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-5-dbrazdil@google.com --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/arm.c | 4 +--- arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 6 +++--- arch/arm64/kvm/va_layout.c | 5 ++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 828d50d40dc2f..bce2452b305c0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -260,6 +260,12 @@ struct kvm_host_psci_config { extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config) +extern s64 kvm_nvhe_sym(hyp_physvirt_offset); +#define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset) + +extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS]; +#define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map) + struct vcpu_reset_state { unsigned long pc; unsigned long r0; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6a2f4e01b04f3..836ca763b91d7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -65,8 +65,6 @@ static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); -extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; - int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -1602,7 +1600,7 @@ static void init_cpu_logical_map(void) * allow any other CPUs from the `possible` set to boot. */ for_each_online_cpu(cpu) - kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); + hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); } static bool init_psci_relay(void) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index cbab0c6246e20..2997aa156d8e5 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -14,14 +14,14 @@ * Other CPUs should not be allowed to boot because their features were * not checked against the finalized system capabilities. */ -u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; +u64 __ro_after_init hyp_cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; u64 cpu_logical_map(unsigned int cpu) { - if (cpu >= ARRAY_SIZE(__cpu_logical_map)) + if (cpu >= ARRAY_SIZE(hyp_cpu_logical_map)) hyp_panic(); - return __cpu_logical_map[cpu]; + return hyp_cpu_logical_map[cpu]; } unsigned long __hyp_per_cpu_offset(unsigned int cpu) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 914732b88c69f..70fcd6a12fe1f 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -34,17 +34,16 @@ static u64 __early_kern_hyp_va(u64 addr) } /* - * Store a hyp VA <-> PA offset into a hyp-owned variable. + * Store a hyp VA <-> PA offset into a EL2-owned variable. */ static void init_hyp_physvirt_offset(void) { - extern s64 kvm_nvhe_sym(hyp_physvirt_offset); u64 kern_va, hyp_va; /* Compute the offset from the hyp VA and PA of a random symbol. */ kern_va = (u64)lm_alias(__hyp_text_start); hyp_va = __early_kern_hyp_va(kern_va); - CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; + hyp_physvirt_offset = (s64)__pa(kern_va) - (s64)hyp_va; } /* -- GitLab From e6829e0384a49efe68537298132230bebd8bd1b3 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:51 +0000 Subject: [PATCH 1002/1894] KVM: arm64: Remove unused includes in psci-relay.c Minor cleanup removing unused includes. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-6-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 0d6f4aa396217..1f7237e451482 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -7,11 +7,8 @@ #include #include #include -#include #include #include -#include -#include #include #include -- GitLab From 860a4c3d1e04a3c3e62bacbbba64417bf49768e2 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:52 +0000 Subject: [PATCH 1003/1894] KVM: arm64: Move skip_host_instruction to adjust_pc.h Move function for skipping host instruction in the host trap handler to a header file containing analogical helpers for guests. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-7-dbrazdil@google.com --- arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 9 +++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 12 ++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h index b1f60923a8feb..61716359035d6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -59,4 +59,13 @@ static inline void __adjust_pc(struct kvm_vcpu *vcpu) } } +/* + * Skip an instruction while host sysregs are live. + * Assumes host is always 64-bit. + */ +static inline void kvm_skip_host_instr(void) +{ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); +} + #endif diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index bde658d51404b..a906f9e2ff34f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -157,11 +157,6 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) __kvm_hyp_host_forward_smc(host_ctxt); } -static void skip_host_instruction(void) -{ - write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); -} - static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { bool handled; @@ -170,11 +165,8 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) if (!handled) default_host_smc_handler(host_ctxt); - /* - * Unlike HVC, the return address of an SMC is the instruction's PC. - * Move the return address past the instruction. - */ - skip_host_instruction(); + /* SMC was trapped, move ELR past the current PC. */ + kvm_skip_host_instr(); } void handle_trap(struct kvm_cpu_context *host_ctxt) -- GitLab From 767c973f2e4a9264a4f159c9fad5ca8acdb9915e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Dec 2020 12:46:41 +0000 Subject: [PATCH 1004/1894] KVM: arm64: Declutter host PSCI 0.1 handling Although there is nothing wrong with the current host PSCI relay implementation, we can clean it up and remove some of the helpers that do not improve the overall readability of the legacy PSCI 0.1 handling. Opportunity is taken to turn the bitmap into a set of booleans, and creative use of preprocessor macros make init and check more concise/readable. Suggested-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 11 ++-- arch/arm64/kvm/arm.c | 12 +++-- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 77 +++++++--------------------- 3 files changed, 30 insertions(+), 70 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bce2452b305c0..8fcfab0c25672 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -241,11 +241,6 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; -#define KVM_HOST_PSCI_0_1_CPU_SUSPEND BIT(0) -#define KVM_HOST_PSCI_0_1_CPU_ON BIT(1) -#define KVM_HOST_PSCI_0_1_CPU_OFF BIT(2) -#define KVM_HOST_PSCI_0_1_MIGRATE BIT(3) - struct kvm_host_psci_config { /* PSCI version used by host. */ u32 version; @@ -253,8 +248,10 @@ struct kvm_host_psci_config { /* Function IDs used by host if version is v0.1. */ struct psci_0_1_function_ids function_ids_0_1; - /* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */ - unsigned int enabled_functions_0_1; + bool psci_0_1_cpu_suspend_implemented; + bool psci_0_1_cpu_on_implemented; + bool psci_0_1_cpu_off_implemented; + bool psci_0_1_migrate_implemented; }; extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 836ca763b91d7..e207e4541f552 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1603,6 +1603,9 @@ static void init_cpu_logical_map(void) hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); } +#define init_psci_0_1_impl_state(config, what) \ + config.psci_0_1_ ## what ## _implemented = psci_ops.what + static bool init_psci_relay(void) { /* @@ -1618,11 +1621,10 @@ static bool init_psci_relay(void) if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); - kvm_host_psci_config.enabled_functions_0_1 = - (psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) | - (psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) | - (psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) | - (psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off); + init_psci_0_1_impl_state(kvm_host_psci_config, migrate); } return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 1f7237e451482..e3947846ffcb9 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -43,48 +43,16 @@ struct psci_boot_args { static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT; -static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) -{ - DECLARE_REG(u64, func_id, host_ctxt, 0); - - return func_id; -} - -static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit) -{ - return kvm_host_psci_config.enabled_functions_0_1 & fn_bit; -} - -static inline bool is_psci_0_1_cpu_suspend(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend); -} - -static inline bool is_psci_0_1_cpu_on(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on); -} - -static inline bool is_psci_0_1_cpu_off(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off); -} - -static inline bool is_psci_0_1_migrate(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) && - (func_id == kvm_host_psci_config.function_ids_0_1.migrate); -} +#define is_psci_0_1(what, func_id) \ + (kvm_host_psci_config.psci_0_1_ ## what ## _implemented && \ + (func_id) == kvm_host_psci_config.function_ids_0_1.what) static bool is_psci_0_1_call(u64 func_id) { - return is_psci_0_1_cpu_suspend(func_id) || - is_psci_0_1_cpu_on(func_id) || - is_psci_0_1_cpu_off(func_id) || - is_psci_0_1_migrate(func_id); + return (is_psci_0_1(cpu_suspend, func_id) || + is_psci_0_1(cpu_on, func_id) || + is_psci_0_1(cpu_off, func_id) || + is_psci_0_1(migrate, func_id)); } static bool is_psci_0_2_call(u64 func_id) @@ -94,16 +62,6 @@ static bool is_psci_0_2_call(u64 func_id) (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31)); } -static bool is_psci_call(u64 func_id) -{ - switch (kvm_host_psci_config.version) { - case PSCI_VERSION(0, 1): - return is_psci_0_1_call(func_id); - default: - return is_psci_0_2_call(func_id); - } -} - static unsigned long psci_call(unsigned long fn, unsigned long arg0, unsigned long arg1, unsigned long arg2) { @@ -273,14 +231,14 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id)) + if (is_psci_0_1(cpu_off, func_id) || is_psci_0_1(migrate, func_id)) return psci_forward(host_ctxt); - else if (is_psci_0_1_cpu_on(func_id)) + if (is_psci_0_1(cpu_on, func_id)) return psci_cpu_on(func_id, host_ctxt); - else if (is_psci_0_1_cpu_suspend(func_id)) + if (is_psci_0_1(cpu_suspend, func_id)) return psci_cpu_suspend(func_id, host_ctxt); - else - return PSCI_RET_NOT_SUPPORTED; + + return PSCI_RET_NOT_SUPPORTED; } static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) @@ -322,20 +280,23 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) { - u64 func_id = get_psci_func_id(host_ctxt); + DECLARE_REG(u64, func_id, host_ctxt, 0); unsigned long ret; - if (!is_psci_call(func_id)) - return false; - switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): + if (!is_psci_0_1_call(func_id)) + return false; ret = psci_0_1_handler(func_id, host_ctxt); break; case PSCI_VERSION(0, 2): + if (!is_psci_0_2_call(func_id)) + return false; ret = psci_0_2_handler(func_id, host_ctxt); break; default: + if (!is_psci_0_2_call(func_id)) + return false; ret = psci_1_0_handler(func_id, host_ctxt); break; } -- GitLab From b77709237e72d6467fb27bfbad163f7221ecd648 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 10 Dec 2020 22:01:03 +0800 Subject: [PATCH 1005/1894] dm cache: simplify the return expression of load_mapping() Simplify the return expression. Signed-off-by: Zheng Yongjun Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 4bc453f5bbaa6..541c45027cc85 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2840,7 +2840,6 @@ static void cache_postsuspend(struct dm_target *ti) static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, bool dirty, uint32_t hint, bool hint_valid) { - int r; struct cache *cache = context; if (dirty) { @@ -2849,11 +2848,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, } else clear_bit(from_cblock(cblock), cache->dirty_bitset); - r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); - if (r) - return r; - - return 0; + return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); } /* -- GitLab From 6ca653e3f73a1af0f30dbf9c2c79d2897074989f Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Tue, 22 Dec 2020 23:04:58 +0800 Subject: [PATCH 1006/1894] ALSA: hda/realtek: Apply jack fixup for Quanta NL3 The Quanta NL3 laptop has both a headphone output jack and a headset jack, on the right edge of the chassis. The pin information suggests that both of these are at the Front. The PulseAudio is confused to differentiate them so one of the jack can neither get the jack sense working nor the audio output. The ALC269_FIXUP_LIFEBOOK chained with ALC269_FIXUP_QUANTA_MUTE can help to differentiate 2 jacks and get the 'Auto-Mute Mode' working correctly. Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20201222150459.9545-1-chiu@endlessos.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5478a89d94e30..e9ea2e3fc811e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8030,6 +8030,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), -- GitLab From 13be30f156fda725b168ac89fc91f78651575307 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Tue, 22 Dec 2020 23:04:59 +0800 Subject: [PATCH 1007/1894] ALSA/hda: apply jack fixup for the Acer Veriton N4640G/N6640G/N2510G This Acer Veriton N4640G/N6640G/N2510G desktops have 2 headphone jacks(front and rear), and a separate Mic In jack. The rear headphone jack is actually a line out jack but always silent while playing audio. The front 'Mic In' also fails the jack sensing. Apply the ALC269_FIXUP_LIFEBOOK to have all audio jacks to work as expected. Signed-off-by: Chris Chiu Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20201222150459.9545-2-chiu@endlessos.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e9ea2e3fc811e..dde5ba2095415 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7817,11 +7817,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), + SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x1166, "Acer Veriton N4640G", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x1025, 0x1167, "Acer Veriton N6640G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), -- GitLab From adf4c01aba575c02ae7335255f9f9a379e594c5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Dec 2020 08:55:42 +0100 Subject: [PATCH 1008/1894] MAINTAINERS: add fs/block_dev.c to the block section fs/block_dev.c is a pretty integral part of the block layer, so make sure it is mentioned in MAINTAINERS. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8c1c5f9830c38..ce9c7ab54b5cb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3187,6 +3187,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git F: block/ F: drivers/block/ +F: fs/block_dev.c F: include/linux/blk* F: kernel/trace/blktrace.c F: lib/sbitmap.c -- GitLab From ca2e270aa1aa214d77d06c705d1f19524cde3faf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Dec 2020 08:55:43 +0100 Subject: [PATCH 1009/1894] block: remove a pointless self-reference in block_dev.c There is no point in duplicating the file name in the top of the file comment. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 9e56ee1f26523..d2fa5009d5a48 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/block_dev.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2001 Andrea Arcangeli SuSE */ -- GitLab From 7b51e703a89b824dbbf65de96e77d10d4915dbe0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Dec 2020 08:55:44 +0100 Subject: [PATCH 1010/1894] block: update some copyrights Update copyrights for files that have gotten some major rewrites lately. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/genhd.c | 2 ++ block/partitions/core.c | 1 + fs/block_dev.c | 1 + 3 files changed, 4 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index b84b8671e6270..73faec438e49a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * gendisk handling + * + * Portions Copyright (C) 2020 Christoph Hellwig */ #include diff --git a/block/partitions/core.c b/block/partitions/core.c index deca253583bd3..e7d776db803b1 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -2,6 +2,7 @@ /* * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King + * Copyright (C) 2020 Christoph Hellwig */ #include #include diff --git a/fs/block_dev.c b/fs/block_dev.c index d2fa5009d5a48..9293045e128cd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -2,6 +2,7 @@ /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2001 Andrea Arcangeli SuSE + * Copyright (C) 2016 - 2020 Christoph Hellwig */ #include -- GitLab From 0a4e668b5d52eed8026f5d717196b02b55fb2dc6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 14 Dec 2020 10:26:22 -0800 Subject: [PATCH 1011/1894] hwmon: (k10temp) Remove support for displaying voltage and current on Zen CPUs Voltages and current are reported by Zen CPUs. However, the means to do so is undocumented, changes from CPU to CPU, and the raw data is not calibrated. Calibration information is available, but again not documented. This results in less than perfect user experience, up to concerns that loading the driver might possibly damage the hardware (by reporting out-of range voltages). Effectively support for reporting voltages and current is not maintainable. Drop it. Cc: Artem S. Tashkinov Cc: Wei Huang Tested-by: Wei Huang Signed-off-by: Guenter Roeck --- drivers/hwmon/k10temp.c | 98 ----------------------------------------- 1 file changed, 98 deletions(-) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index a250481b5a97f..3bc2551577a30 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -11,13 +11,6 @@ * convert raw register values is from https://github.com/ocerman/zenpower. * The information is not confirmed from chip datasheets, but experiments * suggest that it provides reasonable temperature values. - * - Register addresses to read chip voltage and current are also from - * https://github.com/ocerman/zenpower, and not confirmed from chip - * datasheets. Current calibration is board specific and not typically - * shared by board vendors. For this reason, current values are - * normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC - * current. Reported values can be adjusted using the sensors configuration - * file. */ #include @@ -109,10 +102,7 @@ struct k10temp_data { int temp_offset; u32 temp_adjust_mask; u32 show_temp; - u32 svi_addr[2]; bool is_zen; - bool show_current; - int cfactor[2]; }; #define TCTL_BIT 0 @@ -137,16 +127,6 @@ static const struct tctl_offset tctl_offset_table[] = { { 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */ }; -static bool is_threadripper(void) -{ - return strstr(boot_cpu_data.x86_model_id, "Threadripper"); -} - -static bool is_epyc(void) -{ - return strstr(boot_cpu_data.x86_model_id, "EPYC"); -} - static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval) { pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval); @@ -211,16 +191,6 @@ static const char *k10temp_temp_label[] = { "Tccd8", }; -static const char *k10temp_in_label[] = { - "Vcore", - "Vsoc", -}; - -static const char *k10temp_curr_label[] = { - "Icore", - "Isoc", -}; - static int k10temp_read_labels(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) @@ -229,50 +199,6 @@ static int k10temp_read_labels(struct device *dev, case hwmon_temp: *str = k10temp_temp_label[channel]; break; - case hwmon_in: - *str = k10temp_in_label[channel]; - break; - case hwmon_curr: - *str = k10temp_curr_label[channel]; - break; - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int k10temp_read_curr(struct device *dev, u32 attr, int channel, - long *val) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - u32 regval; - - switch (attr) { - case hwmon_curr_input: - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - data->svi_addr[channel], ®val); - *val = DIV_ROUND_CLOSEST(data->cfactor[channel] * - (regval & 0xff), - 1000); - break; - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int k10temp_read_in(struct device *dev, u32 attr, int channel, long *val) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - u32 regval; - - switch (attr) { - case hwmon_in_input: - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - data->svi_addr[channel], ®val); - regval = (regval >> 16) & 0xff; - *val = DIV_ROUND_CLOSEST(155000 - regval * 625, 100); - break; default: return -EOPNOTSUPP; } @@ -331,10 +257,6 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type, switch (type) { case hwmon_temp: return k10temp_read_temp(dev, attr, channel, val); - case hwmon_in: - return k10temp_read_in(dev, attr, channel, val); - case hwmon_curr: - return k10temp_read_curr(dev, attr, channel, val); default: return -EOPNOTSUPP; } @@ -383,11 +305,6 @@ static umode_t k10temp_is_visible(const void *_data, return 0; } break; - case hwmon_in: - case hwmon_curr: - if (!data->show_current) - return 0; - break; default: return 0; } @@ -517,20 +434,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) case 0x8: /* Zen+ */ case 0x11: /* Zen APU */ case 0x18: /* Zen+ APU */ - data->show_current = !is_threadripper() && !is_epyc(); - data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE0; - data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1; - data->cfactor[0] = F17H_M01H_CFACTOR_ICORE; - data->cfactor[1] = F17H_M01H_CFACTOR_ISOC; k10temp_get_ccd_support(pdev, data, 4); break; case 0x31: /* Zen2 Threadripper */ case 0x71: /* Zen2 */ - data->show_current = !is_threadripper() && !is_epyc(); - data->cfactor[0] = F17H_M31H_CFACTOR_ICORE; - data->cfactor[1] = F17H_M31H_CFACTOR_ISOC; - data->svi_addr[0] = F17H_M31H_SVI_TEL_PLANE0; - data->svi_addr[1] = F17H_M31H_SVI_TEL_PLANE1; k10temp_get_ccd_support(pdev, data, 8); break; } @@ -542,11 +449,6 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) switch (boot_cpu_data.x86_model) { case 0x0 ... 0x1: /* Zen3 */ - data->show_current = true; - data->svi_addr[0] = F19H_M01_SVI_TEL_PLANE0; - data->svi_addr[1] = F19H_M01_SVI_TEL_PLANE1; - data->cfactor[0] = F19H_M01H_CFACTOR_ICORE; - data->cfactor[1] = F19H_M01H_CFACTOR_ISOC; k10temp_get_ccd_support(pdev, data, 8); break; } -- GitLab From c92dc856848f32781e37b88c1b7f875e274f5efb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 27 Nov 2020 12:34:00 +0100 Subject: [PATCH 1012/1894] ext4: defer saving error info from atomic context When filesystem inconsistency is detected with group locked, we currently try to modify superblock to store error there without blocking. However this can cause superblock checksum failures (or DIF/DIX failure) when the superblock is just being written out. Make error handling code just store error information in ext4_sb_info structure and copy it to on-disk superblock only in ext4_commit_super(). In case of error happening with group locked, we just postpone the superblock flushing to a workqueue. [ Added fixup so that s_first_error_* does not get updated after the file system is remounted. Also added fix for syzbot failure. - Ted ] Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201127113405.26867-8-jack@suse.cz Signed-off-by: Theodore Ts'o Cc: Hillf Danton Reported-by: syzbot+9043030c040ce1849a60@syzkaller.appspotmail.com --- fs/ext4/ext4.h | 21 +++++++++ fs/ext4/super.c | 120 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 104 insertions(+), 37 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 84c63b9b83106..ac356ed33a22f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1629,6 +1629,27 @@ struct ext4_sb_info { errseq_t s_bdev_wb_err; spinlock_t s_bdev_wb_lock; + /* Information about errors that happened during this mount */ + spinlock_t s_error_lock; + int s_add_error_count; + int s_first_error_code; + __u32 s_first_error_line; + __u32 s_first_error_ino; + __u64 s_first_error_block; + const char *s_first_error_func; + time64_t s_first_error_time; + int s_last_error_code; + __u32 s_last_error_line; + __u32 s_last_error_ino; + __u64 s_last_error_block; + const char *s_last_error_func; + time64_t s_last_error_time; + /* + * If we are in a context where we cannot update error information in + * the on-disk superblock, we queue this work to do it. + */ + struct work_struct s_error_work; + /* Ext4 fast commit stuff */ atomic_t s_fc_subtid; atomic_t s_fc_ineligible_updates; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0e6d9a4bd72fd..4bbfb05aae581 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -404,10 +404,8 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); } -static void __ext4_update_tstamp(__le32 *lo, __u8 *hi) +static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now) { - time64_t now = ktime_get_real_seconds(); - now = clamp_val(now, 0, (1ull << 40) - 1); *lo = cpu_to_le32(lower_32_bits(now)); @@ -419,7 +417,8 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo); } #define ext4_update_tstamp(es, tstamp) \ - __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) + __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \ + ktime_get_real_seconds()) #define ext4_get_tstamp(es, tstamp) \ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) @@ -591,7 +590,7 @@ static void __save_error_info(struct super_block *sb, int error, __u32 ino, __u64 block, const char *func, unsigned int line) { - struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(sb); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (bdev_read_only(sb->s_bdev)) @@ -599,30 +598,24 @@ static void __save_error_info(struct super_block *sb, int error, /* We default to EFSCORRUPTED error... */ if (error == 0) error = EFSCORRUPTED; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_update_tstamp(es, s_last_error_time); - strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); - es->s_last_error_line = cpu_to_le32(line); - es->s_last_error_ino = cpu_to_le32(ino); - es->s_last_error_block = cpu_to_le64(block); - es->s_last_error_errcode = ext4_errno_to_code(error); - if (!es->s_first_error_time) { - es->s_first_error_time = es->s_last_error_time; - es->s_first_error_time_hi = es->s_last_error_time_hi; - strncpy(es->s_first_error_func, func, - sizeof(es->s_first_error_func)); - es->s_first_error_line = cpu_to_le32(line); - es->s_first_error_ino = es->s_last_error_ino; - es->s_first_error_block = es->s_last_error_block; - es->s_first_error_errcode = es->s_last_error_errcode; - } - /* - * Start the daily error reporting function if it hasn't been - * started already - */ - if (!es->s_error_count) - mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); - le32_add_cpu(&es->s_error_count, 1); + + spin_lock(&sbi->s_error_lock); + sbi->s_add_error_count++; + sbi->s_last_error_code = error; + sbi->s_last_error_line = line; + sbi->s_last_error_ino = ino; + sbi->s_last_error_block = block; + sbi->s_last_error_func = func; + sbi->s_last_error_time = ktime_get_real_seconds(); + if (!sbi->s_first_error_time) { + sbi->s_first_error_code = error; + sbi->s_first_error_line = line; + sbi->s_first_error_ino = ino; + sbi->s_first_error_block = block; + sbi->s_first_error_func = func; + sbi->s_first_error_time = sbi->s_last_error_time; + } + spin_unlock(&sbi->s_error_lock); } static void save_error_info(struct super_block *sb, int error, @@ -685,6 +678,14 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro) sb->s_flags |= SB_RDONLY; } +static void flush_stashed_error_work(struct work_struct *work) +{ + struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, + s_error_work); + + ext4_commit_super(sbi->s_sb, 1); +} + #define ext4_error_ratelimit(sb) \ ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ "EXT4-fs error") @@ -925,8 +926,6 @@ __acquires(bitlock) return; trace_ext4_error(sb, function, line); - __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - if (ext4_error_ratelimit(sb)) { va_start(args, fmt); vaf.fmt = fmt; @@ -942,16 +941,15 @@ __acquires(bitlock) va_end(args); } - if (test_opt(sb, WARN_ON_ERROR)) - WARN_ON_ONCE(1); - if (test_opt(sb, ERRORS_CONT)) { - ext4_commit_super(sb, 0); + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); + schedule_work(&EXT4_SB(sb)->s_error_work); return; } - ext4_unlock_group(sb, grp); - ext4_commit_super(sb, 1); + save_error_info(sb, EFSCORRUPTED, ino, block, function, line); ext4_handle_error(sb, false); /* * We only get here in the ERRORS_RO case; relocking the group @@ -1124,6 +1122,7 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); + flush_work(&sbi->s_error_work); destroy_workqueue(sbi->rsv_conversion_wq); /* @@ -4666,6 +4665,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } timer_setup(&sbi->s_err_report, print_daily_error_info, 0); + spin_lock_init(&sbi->s_error_lock); + INIT_WORK(&sbi->s_error_work, flush_stashed_error_work); /* Register extent status tree shrinker */ if (ext4_es_register_shrinker(sbi)) @@ -5108,6 +5109,7 @@ failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: del_timer_sync(&sbi->s_err_report); + flush_work(&sbi->s_error_work); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: @@ -5434,6 +5436,7 @@ err_out: static int ext4_commit_super(struct super_block *sb, int sync) { + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; @@ -5470,6 +5473,46 @@ static int ext4_commit_super(struct super_block *sb, int sync) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); + /* Copy error information to the on-disk superblock */ + spin_lock(&sbi->s_error_lock); + if (sbi->s_add_error_count > 0) { + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + if (!es->s_first_error_time && !es->s_first_error_time_hi) { + __ext4_update_tstamp(&es->s_first_error_time, + &es->s_first_error_time_hi, + sbi->s_first_error_time); + strncpy(es->s_first_error_func, sbi->s_first_error_func, + sizeof(es->s_first_error_func)); + es->s_first_error_line = + cpu_to_le32(sbi->s_first_error_line); + es->s_first_error_ino = + cpu_to_le32(sbi->s_first_error_ino); + es->s_first_error_block = + cpu_to_le64(sbi->s_first_error_block); + es->s_first_error_errcode = + ext4_errno_to_code(sbi->s_first_error_code); + } + __ext4_update_tstamp(&es->s_last_error_time, + &es->s_last_error_time_hi, + sbi->s_last_error_time); + strncpy(es->s_last_error_func, sbi->s_last_error_func, + sizeof(es->s_last_error_func)); + es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line); + es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino); + es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block); + es->s_last_error_errcode = + ext4_errno_to_code(sbi->s_last_error_code); + /* + * Start the daily error reporting function if it hasn't been + * started already + */ + if (!es->s_error_count) + mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); + le32_add_cpu(&es->s_error_count, sbi->s_add_error_count); + sbi->s_add_error_count = 0; + } + spin_unlock(&sbi->s_error_lock); + BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); if (sync) @@ -5823,6 +5866,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } + /* Flush outstanding errors before changing fs state */ + flush_work(&sbi->s_error_work); + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { err = -EROFS; -- GitLab From 82ef1370b0c1757ab4ce29f34c52b4e93839b0aa Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Fri, 4 Dec 2020 11:05:43 +0800 Subject: [PATCH 1013/1894] ext4: avoid s_mb_prefetch to be zero in individual scenarios Commit cfd732377221 ("ext4: add prefetching for block allocation bitmaps") introduced block bitmap prefetch, and expects to read block bitmaps of flex_bg through an IO. However, it seems to ignore the value range of s_log_groups_per_flex. In the scenario where the value of s_log_groups_per_flex is greater than 27, s_mb_prefetch or s_mb_prefetch_limit will overflow, cause a divide zero exception. In addition, the logic of calculating nr is also flawed, because the size of flexbg is fixed during a single mount, but s_mb_prefetch can be modified, which causes nr to fail to meet the value condition of [1, flexbg_size]. To solve this problem, we need to set the upper limit of s_mb_prefetch. Since we expect to load block bitmaps of a flex_bg through an IO, we can consider determining a reasonable upper limit among the IO limit parameters. After consideration, we chose BLK_MAX_SEGMENT_SIZE. This is a good choice to solve divide zero problem and avoiding performance degradation. [ Some minor code simplifications to make the changes easy to follow -- TYT ] Reported-by: Tosk Robot Signed-off-by: Chunguang Xu Reviewed-by: Samuel Liao Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1607051143-24508-1-git-send-email-brookxu@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 77815cd110b2e..99bf091fee10e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2372,9 +2372,9 @@ repeat: nr = sbi->s_mb_prefetch; if (ext4_has_feature_flex_bg(sb)) { - nr = (group / sbi->s_mb_prefetch) * - sbi->s_mb_prefetch; - nr = nr + sbi->s_mb_prefetch - group; + nr = 1 << sbi->s_log_groups_per_flex; + nr -= group & (nr - 1); + nr = min(nr, sbi->s_mb_prefetch); } prefetch_grp = ext4_mb_prefetch(sb, group, nr, &prefetch_ios); @@ -2710,7 +2710,8 @@ static int ext4_mb_init_backend(struct super_block *sb) if (ext4_has_feature_flex_bg(sb)) { /* a single flex group is supposed to be read by a single IO */ - sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; + sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex, + BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { sbi->s_mb_prefetch = 32; -- GitLab From e789ca0cc1d51296832b8424fa4008ce6e9d1703 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:37 +0100 Subject: [PATCH 1014/1894] ext4: combine ext4_handle_error() and save_error_info() save_error_info() is always called together with ext4_handle_error(). Combine them into a single call and move unconditional bits out of save_error_info() into ext4_handle_error(). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4bbfb05aae581..cdf2a377d884f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -592,9 +592,6 @@ static void __save_error_info(struct super_block *sb, int error, { struct ext4_sb_info *sbi = EXT4_SB(sb); - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - if (bdev_read_only(sb->s_bdev)) - return; /* We default to EFSCORRUPTED error... */ if (error == 0) error = EFSCORRUPTED; @@ -647,13 +644,19 @@ static void save_error_info(struct super_block *sb, int error, * used to deal with unrecoverable failures such as journal IO errors or ENOMEM * at a critical moment in log management. */ -static void ext4_handle_error(struct super_block *sb, bool force_ro) +static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { journal_t *journal = EXT4_SB(sb)->s_journal; + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); + if (!bdev_read_only(sb->s_bdev)) + save_error_info(sb, error, ino, block, func, line); + if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) return; @@ -710,8 +713,7 @@ void __ext4_error(struct super_block *sb, const char *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); } - save_error_info(sb, error, 0, block, function, line); - ext4_handle_error(sb, force_ro); + ext4_handle_error(sb, force_ro, error, 0, block, function, line); } void __ext4_error_inode(struct inode *inode, const char *function, @@ -741,9 +743,8 @@ void __ext4_error_inode(struct inode *inode, const char *function, current->comm, &vaf); va_end(args); } - save_error_info(inode->i_sb, error, inode->i_ino, block, - function, line); - ext4_handle_error(inode->i_sb, false); + ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block, + function, line); } void __ext4_error_file(struct file *file, const char *function, @@ -780,9 +781,8 @@ void __ext4_error_file(struct file *file, const char *function, current->comm, path, &vaf); va_end(args); } - save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block, - function, line); - ext4_handle_error(inode->i_sb, false); + ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block, + function, line); } const char *ext4_decode_error(struct super_block *sb, int errno, @@ -849,8 +849,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, sb->s_id, function, line, errstr); } - save_error_info(sb, -errno, 0, 0, function, line); - ext4_handle_error(sb, false); + ext4_handle_error(sb, false, -errno, 0, 0, function, line); } void __ext4_msg(struct super_block *sb, @@ -944,13 +943,14 @@ __acquires(bitlock) if (test_opt(sb, ERRORS_CONT)) { if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - schedule_work(&EXT4_SB(sb)->s_error_work); + if (!bdev_read_only(sb->s_bdev)) + schedule_work(&EXT4_SB(sb)->s_error_work); return; } ext4_unlock_group(sb, grp); - save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - ext4_handle_error(sb, false); + ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the -- GitLab From be993933d2e997fdb72b8b1418d2a84df79b8962 Mon Sep 17 00:00:00 2001 From: Lei Chen Date: Fri, 11 Dec 2020 14:54:24 +0800 Subject: [PATCH 1015/1894] ext4: remove unnecessary wbc parameter from ext4_bio_write_page ext4_bio_write_page does not need wbc parameter, since its parameter io contains the io_wbc field. The io::io_wbc is initialized by ext4_io_submit_init which is called in ext4_writepages and ext4_writepage functions prior to ext4_bio_write_page. Therefor, when ext4_bio_write_page is called, wbc info has already been included in io parameter. Signed-off-by: Lei Chen Link: https://lore.kernel.org/r/1607669664-25656-1-git-send-email-lennychen@tencent.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 - fs/ext4/inode.c | 4 ++-- fs/ext4/page-io.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ac356ed33a22f..64f25ea2fa7a2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3653,7 +3653,6 @@ extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc, bool keep_towrite); extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end); extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ab786123a022e..27946882d4ce4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2064,7 +2064,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); + ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -2098,7 +2098,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) len = size & ~PAGE_MASK; else len = PAGE_SIZE; - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); + err = ext4_bio_write_page(&mpd->io_submit, page, len, false); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index cb135a94482dd..03a44a0de86ad 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -435,7 +435,6 @@ submit_and_retry: int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc, bool keep_towrite) { struct page *bounce_page = NULL; @@ -445,6 +444,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, int ret = 0; int nr_submitted = 0; int nr_to_submit = 0; + struct writeback_control *wbc = io->io_wbc; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); -- GitLab From 4392fbc4bab57db3760f0fb61258cb7089b37665 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:38 +0100 Subject: [PATCH 1016/1894] ext4: drop sync argument of ext4_commit_super() Everybody passes 1 as sync argument of ext4_commit_super(). Just drop it. Reviewed-by: Harshad Shirwadkar Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cdf2a377d884f..8bf31003416a9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -65,7 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); -static int ext4_commit_super(struct super_block *sb, int sync); +static int ext4_commit_super(struct super_block *sb); static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); static int ext4_clear_journal_err(struct super_block *sb, @@ -621,7 +621,7 @@ static void save_error_info(struct super_block *sb, int error, { __save_error_info(sb, error, ino, block, func, line); if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } /* Deal with the reporting of failure conditions on a filesystem such as @@ -686,7 +686,7 @@ static void flush_stashed_error_work(struct work_struct *work) struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, s_error_work); - ext4_commit_super(sbi->s_sb, 1); + ext4_commit_super(sbi->s_sb); } #define ext4_error_ratelimit(sb) \ @@ -1152,7 +1152,7 @@ static void ext4_put_super(struct super_block *sb) es->s_state = cpu_to_le16(sbi->s_mount_state); } if (!sb_rdonly(sb)) - ext4_commit_super(sb, 1); + ext4_commit_super(sb); rcu_read_lock(); group_desc = rcu_dereference(sbi->s_group_desc); @@ -2642,7 +2642,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); - err = ext4_commit_super(sb, 1); + err = ext4_commit_super(sb); done: if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " @@ -4869,7 +4869,7 @@ no_journal: if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && !ext4_has_feature_encrypt(sb)) { ext4_set_feature_encrypt(sb); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } /* @@ -5424,7 +5424,7 @@ static int ext4_load_journal(struct super_block *sb, es->s_journal_dev = cpu_to_le32(journal_devnum); /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } return 0; @@ -5434,7 +5434,7 @@ err_out: return err; } -static int ext4_commit_super(struct super_block *sb, int sync) +static int ext4_commit_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -5515,8 +5515,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - if (sync) - lock_buffer(sbh); + lock_buffer(sbh); if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5532,16 +5531,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) set_buffer_uptodate(sbh); } mark_buffer_dirty(sbh); - if (sync) { - unlock_buffer(sbh); - error = __sync_dirty_buffer(sbh, - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); - if (buffer_write_io_error(sbh)) { - ext4_msg(sb, KERN_ERR, "I/O error while writing " - "superblock"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } + unlock_buffer(sbh); + error = __sync_dirty_buffer(sbh, + REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); + if (buffer_write_io_error(sbh)) { + ext4_msg(sb, KERN_ERR, "I/O error while writing " + "superblock"); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); } return error; } @@ -5572,7 +5569,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb, if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) { ext4_clear_feature_journal_needs_recovery(sb); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } out: jbd2_journal_unlock_updates(journal); @@ -5614,7 +5611,7 @@ static int ext4_clear_journal_err(struct super_block *sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); jbd2_journal_clear_err(journal); jbd2_journal_update_sb_errno(journal); @@ -5716,7 +5713,7 @@ static int ext4_freeze(struct super_block *sb) ext4_clear_feature_journal_needs_recovery(sb); } - error = ext4_commit_super(sb, 1); + error = ext4_commit_super(sb); out: if (journal) /* we rely on upper layer to stop further updates */ @@ -5738,7 +5735,7 @@ static int ext4_unfreeze(struct super_block *sb) ext4_set_feature_journal_needs_recovery(sb); } - ext4_commit_super(sb, 1); + ext4_commit_super(sb); return 0; } @@ -5998,7 +5995,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) { - err = ext4_commit_super(sb, 1); + err = ext4_commit_super(sb); if (err) goto restore_opts; } -- GitLab From 05c2c00f3769abb9e323fcaca70d2de0b48af7ba Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:39 +0100 Subject: [PATCH 1017/1894] ext4: protect superblock modifications with a buffer lock Protect all superblock modifications (including checksum computation) with a superblock buffer lock. That way we are sure computed checksum matches current superblock contents (a mismatch could cause checksum failures in nojournal mode or if an unjournalled superblock update races with a journalled one). Also we avoid modifying superblock contents while it is being written out (which can cause DIF/DIX failures if we are running in nojournal mode). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-4-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 1 - fs/ext4/file.c | 3 +++ fs/ext4/inode.c | 3 +++ fs/ext4/namei.c | 6 ++++++ fs/ext4/resize.c | 12 ++++++++++++ fs/ext4/super.c | 2 +- fs/ext4/xattr.c | 3 +++ 7 files changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 1a0a827a7f345..c7e410c4ab7d7 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -379,7 +379,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; - ext4_superblock_csum_set(sb); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); if (err) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3ed8c048fb12c..26907d5835d00 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -809,8 +809,11 @@ static int ext4_sample_last_mounted(struct super_block *sb, err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto out_journal; + lock_buffer(sbi->s_sbh); strlcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); ext4_handle_dirty_super(handle, sb); out_journal: ext4_journal_stop(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 27946882d4ce4..27de6f0a33c8a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5150,7 +5150,10 @@ static int ext4_do_update_inode(handle_t *handle, err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (err) goto out_brelse; + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_set_feature_large_file(sb); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_sync(handle); err = ext4_handle_dirty_super(handle, sb); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7a890ff214f1a..40970a509dccc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2978,7 +2978,10 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) (le32_to_cpu(sbi->s_es->s_inodes_count))) { /* Insert this inode at the head of the on-disk orphan list */ NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); dirty = true; } list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); @@ -3061,7 +3064,10 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; } + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + ext4_superblock_csum_set(inode->i_sb); + unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_super(handle, inode->i_sb); } else { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 928700d57eb67..6155f2b9538ca 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -899,7 +899,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, EXT4_SB(sb)->s_gdb_count++; ext4_kvfree_array_rcu(o_group_desc); + lock_buffer(EXT4_SB(sb)->s_sbh); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); err = ext4_handle_dirty_super(handle, sb); if (err) ext4_std_error(sb, err); @@ -1384,6 +1387,7 @@ static void ext4_update_super(struct super_block *sb, reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); + lock_buffer(sbi->s_sbh); ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * @@ -1421,6 +1425,8 @@ static void ext4_update_super(struct super_block *sb, * active. */ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + reserved_blocks); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, @@ -1717,8 +1723,11 @@ static int ext4_group_extend_no_check(struct super_block *sb, goto errout; } + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_blocks_count_set(es, o_blocks_count + add); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ @@ -1874,10 +1883,13 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (err) goto errout; + lock_buffer(sbi->s_sbh); ext4_clear_feature_resize_inode(sb); ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_super(handle, sb); if (err) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8bf31003416a9..2d653ad142005 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5444,6 +5444,7 @@ static int ext4_commit_super(struct super_block *sb) if (!sbh || block_device_ejected(sb)) return error; + lock_buffer(sbh); /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock @@ -5515,7 +5516,6 @@ static int ext4_commit_super(struct super_block *sb) BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - lock_buffer(sbh); if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4e3b1f8c2e81e..1db7ca778d69e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -792,7 +792,10 @@ static void ext4_xattr_update_super_block(handle_t *handle, BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_set_feature_xattr(sb); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_dirty_super(handle, sb); } } -- GitLab From 2d01ddc86606564fb08c56e3bc93a0693895f710 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:40 +0100 Subject: [PATCH 1018/1894] ext4: save error info to sb through journal if available If journalling is still working at the moment we get to writing error information to the superblock we cannot write directly to the superblock as such write could race with journalled update of the superblock and cause journal checksum failures, writing inconsistent information to the journal or other problems. We cannot journal the superblock directly from the error handling functions as we are running in uncertain context and could deadlock so just punt journalled superblock update to a workqueue. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-5-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 101 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2d653ad142005..9a256c558e044 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -65,6 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); +static void ext4_update_super(struct super_block *sb); static int ext4_commit_super(struct super_block *sb); static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); @@ -586,9 +587,9 @@ static int ext4_errno_to_code(int errno) return EXT4_ERR_UNKNOWN; } -static void __save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) +static void save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -615,15 +616,6 @@ static void __save_error_info(struct super_block *sb, int error, spin_unlock(&sbi->s_error_lock); } -static void save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - __save_error_info(sb, error, ino, block, func, line); - if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb); -} - /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -649,20 +641,35 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, const char *func, unsigned int line) { journal_t *journal = EXT4_SB(sb)->s_journal; + bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (!bdev_read_only(sb->s_bdev)) + if (!continue_fs && !sb_rdonly(sb)) { + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + if (journal) + jbd2_journal_abort(journal, -EIO); + } + + if (!bdev_read_only(sb->s_bdev)) { save_error_info(sb, error, ino, block, func, line); + /* + * In case the fs should keep running, we need to writeout + * superblock through the journal. Due to lock ordering + * constraints, it may not be safe to do it right here so we + * defer superblock flushing to a workqueue. + */ + if (continue_fs) + schedule_work(&EXT4_SB(sb)->s_error_work); + else + ext4_commit_super(sb); + } - if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) + if (sb_rdonly(sb) || continue_fs) return; - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (journal) - jbd2_journal_abort(journal, -EIO); /* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already @@ -685,7 +692,38 @@ static void flush_stashed_error_work(struct work_struct *work) { struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, s_error_work); + journal_t *journal = sbi->s_journal; + handle_t *handle; + /* + * If the journal is still running, we have to write out superblock + * through the journal to avoid collisions of other journalled sb + * updates. + * + * We use directly jbd2 functions here to avoid recursing back into + * ext4 error handling code during handling of previous errors. + */ + if (!sb_rdonly(sbi->s_sb) && journal) { + handle = jbd2_journal_start(journal, 1); + if (IS_ERR(handle)) + goto write_directly; + if (jbd2_journal_get_write_access(handle, sbi->s_sbh)) { + jbd2_journal_stop(handle); + goto write_directly; + } + ext4_update_super(sbi->s_sb); + if (jbd2_journal_dirty_metadata(handle, sbi->s_sbh)) { + jbd2_journal_stop(handle); + goto write_directly; + } + jbd2_journal_stop(handle); + return; + } +write_directly: + /* + * Write through journal failed. Write sb directly to get error info + * out and hope for the best. + */ ext4_commit_super(sbi->s_sb); } @@ -944,9 +982,11 @@ __acquires(bitlock) if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - if (!bdev_read_only(sb->s_bdev)) + if (!bdev_read_only(sb->s_bdev)) { + save_error_info(sb, EFSCORRUPTED, ino, block, function, + line); schedule_work(&EXT4_SB(sb)->s_error_work); + } return; } ext4_unlock_group(sb, grp); @@ -5434,15 +5474,12 @@ err_out: return err; } -static int ext4_commit_super(struct super_block *sb) +/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */ +static void ext4_update_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; - int error = 0; - - if (!sbh || block_device_ejected(sb)) - return error; lock_buffer(sbh); /* @@ -5514,8 +5551,20 @@ static int ext4_commit_super(struct super_block *sb) } spin_unlock(&sbi->s_error_lock); - BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); + unlock_buffer(sbh); +} + +static int ext4_commit_super(struct super_block *sb) +{ + struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + int error = 0; + + if (!sbh || block_device_ejected(sb)) + return error; + + ext4_update_super(sb); + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5530,8 +5579,8 @@ static int ext4_commit_super(struct super_block *sb) clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } + BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); - unlock_buffer(sbh); error = __sync_dirty_buffer(sbh, REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); if (buffer_write_io_error(sbh)) { -- GitLab From e92ad03fa53498f12b3f5ecb8822adc3bf815b28 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:41 +0100 Subject: [PATCH 1019/1894] ext4: use sbi instead of EXT4_SB(sb) in ext4_update_super() No behavioral change. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-6-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a256c558e044..0f0db49031dc9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5478,8 +5478,8 @@ err_out: static void ext4_update_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + struct ext4_super_block *es = sbi->s_es; + struct buffer_head *sbh = sbi->s_sbh; lock_buffer(sbh); /* @@ -5496,21 +5496,20 @@ static void ext4_update_super(struct super_block *sb) ext4_update_tstamp(es, s_wtime); if (sb->s_bdev->bd_part) es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + cpu_to_le64(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); + sbi->s_sectors_written_start) >> 1)); else - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) + es->s_kbytes_written = cpu_to_le64(sbi->s_kbytes_written); + if (percpu_counter_initialized(&sbi->s_freeclusters_counter)) ext4_free_blocks_count_set(es, - EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeclusters_counter))); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) + EXT4_C2B(sbi, percpu_counter_sum_positive( + &sbi->s_freeclusters_counter))); + if (percpu_counter_initialized(&sbi->s_freeinodes_counter)) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeinodes_counter)); + &sbi->s_freeinodes_counter)); /* Copy error information to the on-disk superblock */ spin_lock(&sbi->s_error_lock); if (sbi->s_add_error_count > 0) { -- GitLab From dfd56c2c0c0dbb11be939b804ddc8d5395ab3432 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:43 +0100 Subject: [PATCH 1020/1894] ext4: fix superblock checksum failure when setting password salt When setting password salt in the superblock, we forget to recompute the superblock checksum so it will not match until the next superblock modification which recomputes the checksum. Fix it. CC: Michael Halcrow Reported-by: Andreas Dilger Fixes: 9bd8212f981e ("ext4 crypto: add encryption policy and password salt support") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-8-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f0381876a7e5b..106bf149e8ca8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1157,7 +1157,10 @@ resizefs_out: err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto pwsalt_err_journal; + lock_buffer(sbi->s_sbh); generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); pwsalt_err_journal: -- GitLab From a3f5cf14ff917d46a4d491cf86210fd639d1ff38 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:44 +0100 Subject: [PATCH 1021/1894] ext4: drop ext4_handle_dirty_super() The wrapper is now useless since it does what ext4_handle_dirty_metadata() does. Just remove it. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-9-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 16 ---------------- fs/ext4/ext4_jbd2.h | 5 ----- fs/ext4/file.c | 2 +- fs/ext4/inode.c | 3 ++- fs/ext4/namei.c | 4 ++-- fs/ext4/resize.c | 8 ++++---- fs/ext4/xattr.c | 2 +- 7 files changed, 10 insertions(+), 30 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c7e410c4ab7d7..be799040a4154 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -372,19 +372,3 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, } return err; } - -int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb) -{ - struct buffer_head *bh = EXT4_SB(sb)->s_sbh; - int err = 0; - - if (ext4_handle_valid(handle)) { - err = jbd2_journal_dirty_metadata(handle, bh); - if (err) - ext4_journal_abort_handle(where, line, __func__, - bh, handle, err); - } else - mark_buffer_dirty(bh); - return err; -} diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a124c68b0c75e..0d2fa423b7adb 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -244,9 +244,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh); -int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb); - #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ @@ -257,8 +254,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) -#define ext4_handle_dirty_super(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, int type, int blocks, int rsv_blocks, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 26907d5835d00..1cd3d26e3217e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -814,7 +814,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, sizeof(sbi->s_es->s_last_mounted)); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); out_journal: ext4_journal_stop(handle); out: diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 27de6f0a33c8a..c173c84058561 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5155,7 +5155,8 @@ static int ext4_do_update_inode(handle_t *handle, ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_sync(handle); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, + EXT4_SB(sb)->s_sbh); } ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 40970a509dccc..a3b28ef2455a8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2988,7 +2988,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) mutex_unlock(&sbi->s_orphan_lock); if (dirty) { - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -3069,7 +3069,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) ext4_superblock_csum_set(inode->i_sb); unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); - err = ext4_handle_dirty_super(handle, inode->i_sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6155f2b9538ca..bd0d185654f33 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -903,7 +903,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, le16_add_cpu(&es->s_reserved_gdt_blocks, -1); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); if (err) ext4_std_error(sb, err); return err; @@ -1521,7 +1521,7 @@ static int ext4_flex_group_add(struct super_block *sb, ext4_update_super(sb, flex_gd); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); exit_journal: err2 = ext4_journal_stop(handle); @@ -1734,7 +1734,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); if (err) goto errout; - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); errout: @@ -1891,7 +1891,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); if (err) { ext4_std_error(sb, err); goto errout; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1db7ca778d69e..372208500f4e7 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -796,7 +796,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, ext4_set_feature_xattr(sb); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); } } -- GitLab From 5a3b590d4b2db187faa6f06adc9a53d6199fb1f9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Dec 2020 13:24:15 -0500 Subject: [PATCH 1022/1894] ext4: don't leak old mountpoint samples When the first file is opened, ext4 samples the mountpoint of the filesystem in 64 bytes of the super block. It does so using strlcpy(), this means that the remaining bytes in the super block string buffer are untouched. If the mount point before had a longer path than the current one, it can be reconstructed. Consider the case where the fs was mounted to "/media/johnjdeveloper" and later to "/". The super block buffer then contains "/\x00edia/johnjdeveloper". This case was seen in the wild and caused confusion how the name of a developer ands up on the super block of a filesystem used in production... Fix this by using strncpy() instead of strlcpy(). The superblock field is defined to be a fixed-size char array, and it is already marked using __nonstring in fs/ext4/ext4.h. The consumer of the field in e2fsprogs already assumes that in the case of a 64+ byte mount path, that s_last_mounted will not be NUL terminated. Link: https://lore.kernel.org/r/X9ujIOJG/HqMr88R@mit.edu Reported-by: Richard Weinberger Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1cd3d26e3217e..349b27f0dda0c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -810,7 +810,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, if (err) goto out_journal; lock_buffer(sbi->s_sbh); - strlcpy(sbi->s_es->s_last_mounted, cp, + strncpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); -- GitLab From 8b3fd902391fdee526f6ba46899a3f8005983ae1 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 17 Dec 2020 08:15:01 +0100 Subject: [PATCH 1023/1894] MAINTAINERS: include governors into CPU IDLE TIME MANAGEMENT FRAMEWORK The current pattern in the file entry does not make the files in the governors subdirectory to be a part of the CPU IDLE TIME MANAGEMENT FRAMEWORK. Adjust the file pattern to include files in governors. Signed-off-by: Lukas Bulwahn Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7e76ae7ee8a2d..59be280069d15 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4583,7 +4583,7 @@ B: https://bugzilla.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git F: Documentation/admin-guide/pm/cpuidle.rst F: Documentation/driver-api/pm/cpuidle.rst -F: drivers/cpuidle/* +F: drivers/cpuidle/ F: include/linux/cpuidle.h CPU POWER MONITORING SUBSYSTEM -- GitLab From 7887cc89d5851cbdec49219e9614beec776af150 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Dec 2020 23:34:13 +0100 Subject: [PATCH 1024/1894] ARM: dts: ux500/golden: Set display max brightness A too high brightness by default (default is max) makes the screen go blank. Set this to 15 as in the Vendor tree. Signed-off-by: Linus Walleij Cc: Stephan Gerhold Link: https://lore.kernel.org/r/20201214223413.253893-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-ux500-samsung-golden.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts index a1093cb37dc7a..aed1f2d5f2467 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts @@ -326,6 +326,7 @@ panel@0 { compatible = "samsung,s6e63m0"; reg = <0>; + max-brightness = <15>; vdd3-supply = <&panel_reg_3v0>; vci-supply = <&panel_reg_1v8>; reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>; -- GitLab From 11f094e312ae834531672aee711079c00ca39ff8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:07 -0800 Subject: [PATCH 1025/1894] kasan: drop unnecessary GPL text from comment headers Patch series "kasan: add hardware tag-based mode for arm64", v11. This patchset adds a new hardware tag-based mode to KASAN [1]. The new mode is similar to the existing software tag-based KASAN, but relies on arm64 Memory Tagging Extension (MTE) [2] to perform memory and pointer tagging (instead of shadow memory and compiler instrumentation). This patchset is co-developed and tested by Vincenzo Frascino . This patchset is available here: https://github.com/xairy/linux/tree/up-kasan-mte-v11 For testing in QEMU hardware tag-based KASAN requires: 1. QEMU built from master [4] (use "-machine virt,mte=on -cpu max" arguments to run). 2. GCC version 10. [1] https://www.kernel.org/doc/html/latest/dev-tools/kasan.html [2] https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/enhancing-memory-safety [3] git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux for-next/mte [4] https://github.com/qemu/qemu ====== Overview The underlying ideas of the approach used by hardware tag-based KASAN are: 1. By relying on the Top Byte Ignore (TBI) arm64 CPU feature, pointer tags are stored in the top byte of each kernel pointer. 2. With the Memory Tagging Extension (MTE) arm64 CPU feature, memory tags for kernel memory allocations are stored in a dedicated memory not accessible via normal instuctions. 3. On each memory allocation, a random tag is generated, embedded it into the returned pointer, and the corresponding memory is tagged with the same tag value. 4. With MTE the CPU performs a check on each memory access to make sure that the pointer tag matches the memory tag. 5. On a tag mismatch the CPU generates a tag fault, and a KASAN report is printed. Same as other KASAN modes, hardware tag-based KASAN is intended as a debugging feature at this point. ====== Rationale There are two main reasons for this new hardware tag-based mode: 1. Previously implemented software tag-based KASAN is being successfully used on dogfood testing devices due to its low memory overhead (as initially planned). The new hardware mode keeps the same low memory overhead, and is expected to have significantly lower performance impact, due to the tag checks being performed by the hardware. Therefore the new mode can be used as a better alternative in dogfood testing for hardware that supports MTE. 2. The new mode lays the groundwork for the planned in-kernel MTE-based memory corruption mitigation to be used in production. ====== Technical details Considering the implementation perspective, hardware tag-based KASAN is almost identical to the software mode. The key difference is using MTE for assigning and checking tags. Compared to the software mode, the hardware mode uses 4 bits per tag, as dictated by MTE. Pointer tags are stored in bits [56:60), the top 4 bits have the normal value 0xF. Having less distict tags increases the probablity of false negatives (from ~1/256 to ~1/16) in certain cases. Only synchronous exceptions are set up and used by hardware tag-based KASAN. ====== Benchmarks Note: all measurements have been performed with software emulation of Memory Tagging Extension, performance numbers for hardware tag-based KASAN on the actual hardware are expected to be better. Boot time [1]: * 2.8 sec for clean kernel * 5.7 sec for hardware tag-based KASAN * 11.8 sec for software tag-based KASAN * 11.6 sec for generic KASAN Slab memory usage after boot [2]: * 7.0 kb for clean kernel * 9.7 kb for hardware tag-based KASAN * 9.7 kb for software tag-based KASAN * 41.3 kb for generic KASAN Measurements have been performed with: * defconfig-based configs * Manually built QEMU master * QEMU arguments: -machine virt,mte=on -cpu max * CONFIG_KASAN_STACK_ENABLE disabled * CONFIG_KASAN_INLINE enabled * clang-10 as the compiler and gcc-10 as the assembler [1] Time before the ext4 driver is initialized. [2] Measured as `cat /proc/meminfo | grep Slab`. ====== Notes The cover letter for software tag-based KASAN patchset can be found here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0116523cfffa62aeb5aa3b85ce7419f3dae0c1b8 ===== Tags Tested-by: Vincenzo Frascino This patch (of 41): Don't mention "GNU General Public License version 2" text explicitly, as it's already covered by the SPDX-License-Identifier. Link: https://lkml.kernel.org/r/cover.1606161801.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/6ea9f5f4aa9dbbffa0d0c0a780b37699a4531034.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Catalin Marinas Cc: Will Deacon Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 5 ----- mm/kasan/generic.c | 5 ----- mm/kasan/generic_report.c | 5 ----- mm/kasan/init.c | 5 ----- mm/kasan/quarantine.c | 10 ---------- mm/kasan/report.c | 5 ----- mm/kasan/tags.c | 5 ----- mm/kasan/tags_report.c | 5 ----- 8 files changed, 45 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 950fd372a07ec..33d863f55db19 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -7,11 +7,6 @@ * * Some code borrowed from https://github.com/xairy/kasan-prototype by * Andrey Konovalov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 30c0a5038b5cd..d341859a1b955 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -7,11 +7,6 @@ * * Some code borrowed from https://github.com/xairy/kasan-prototype by * Andrey Konovalov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/mm/kasan/generic_report.c b/mm/kasan/generic_report.c index a38c7a9e192a6..6bb3f66992df4 100644 --- a/mm/kasan/generic_report.c +++ b/mm/kasan/generic_report.c @@ -7,11 +7,6 @@ * * Some code borrowed from https://github.com/xairy/kasan-prototype by * Andrey Konovalov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include diff --git a/mm/kasan/init.c b/mm/kasan/init.c index fe6be0be1f763..9ce8cc5b8621f 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -4,11 +4,6 @@ * * Copyright (c) 2015 Samsung Electronics Co., Ltd. * Author: Andrey Ryabinin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 0e3f8494628f6..693b05e2c2aa7 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -6,16 +6,6 @@ * Copyright (C) 2016 Google, Inc. * * Based on code by Dmitry Chernenkov. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * */ #include diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 5a0102f371713..0a2c72ddcdebb 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -7,11 +7,6 @@ * * Some code borrowed from https://github.com/xairy/kasan-prototype by * Andrey Konovalov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c index e02a36a51f429..5c8b08a257157 100644 --- a/mm/kasan/tags.c +++ b/mm/kasan/tags.c @@ -4,11 +4,6 @@ * * Copyright (c) 2018 Google, Inc. * Author: Andrey Konovalov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/mm/kasan/tags_report.c b/mm/kasan/tags_report.c index bee43717d6f0b..5f183501b8710 100644 --- a/mm/kasan/tags_report.c +++ b/mm/kasan/tags_report.c @@ -7,11 +7,6 @@ * * Some code borrowed from https://github.com/xairy/kasan-prototype by * Andrey Konovalov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include -- GitLab From 71f6af6d52ceb96be522819a1b0806325bc690d8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:10 -0800 Subject: [PATCH 1026/1894] kasan: KASAN_VMALLOC depends on KASAN_GENERIC Currently only generic KASAN mode supports vmalloc, reflect that in the config. Link: https://lkml.kernel.org/r/0c493d3a065ad95b04313d00244e884a7e2498ff.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 8fb097057fec8..58dd3b86ef846 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -146,7 +146,7 @@ config KASAN_SW_TAGS_IDENTIFY config KASAN_VMALLOC bool "Back mappings in vmalloc space with real shadow memory" - depends on HAVE_ARCH_KASAN_VMALLOC + depends on KASAN_GENERIC && HAVE_ARCH_KASAN_VMALLOC help By default, the shadow region for vmalloc space is the read-only zero page. This means that KASAN cannot detect errors involving -- GitLab From 3b1a4a8640876a966ab68ab4f561642e19674671 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:14 -0800 Subject: [PATCH 1027/1894] kasan: group vmalloc code This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Group all vmalloc-related function declarations in include/linux/kasan.h, and their implementations in mm/kasan/common.c. No functional changes. Link: https://lkml.kernel.org/r/80a6fdd29b039962843bd6cf22ce2643a7c8904e.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 41 +++++++++++++---------- mm/kasan/common.c | 78 ++++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 56 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 30d343b4a40a5..59538e795df4b 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -75,19 +75,6 @@ struct kasan_cache { int free_meta_offset; }; -/* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. - */ -#ifndef CONFIG_KASAN_VMALLOC -int kasan_module_alloc(void *addr, size_t size); -void kasan_free_shadow(const struct vm_struct *vm); -#else -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} -#endif - int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); @@ -156,9 +143,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} - static inline int kasan_add_zero_shadow(void *start, unsigned long size) { return 0; @@ -211,13 +195,16 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN_VMALLOC + int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_poison_vmalloc(const void *start, unsigned long size); void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -#else + +#else /* CONFIG_KASAN_VMALLOC */ + static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { @@ -232,7 +219,25 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end) {} -#endif + +#endif /* CONFIG_KASAN_VMALLOC */ + +#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) + +/* + * These functions provide a special case to support backing module + * allocations with real shadow memory. With KASAN vmalloc, the special + * case is unnecessary, as the work is handled in the generic case. + */ +int kasan_module_alloc(void *addr, size_t size); +void kasan_free_shadow(const struct vm_struct *vm); + +#else /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ + +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_free_shadow(const struct vm_struct *vm) {} + +#endif /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 33d863f55db19..89e5ef9417a79 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -536,44 +536,6 @@ void kasan_kfree_large(void *ptr, unsigned long ip) /* The object will be poisoned by page_alloc. */ } -#ifndef CONFIG_KASAN_VMALLOC -int kasan_module_alloc(void *addr, size_t size) -{ - void *ret; - size_t scaled_size; - size_t shadow_size; - unsigned long shadow_start; - - shadow_start = (unsigned long)kasan_mem_to_shadow(addr); - scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; - shadow_size = round_up(scaled_size, PAGE_SIZE); - - if (WARN_ON(!PAGE_ALIGNED(shadow_start))) - return -EINVAL; - - ret = __vmalloc_node_range(shadow_size, 1, shadow_start, - shadow_start + shadow_size, - GFP_KERNEL, - PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, - __builtin_return_address(0)); - - if (ret) { - __memset(ret, KASAN_SHADOW_INIT, shadow_size); - find_vm_area(addr)->flags |= VM_KASAN; - kmemleak_ignore(ret); - return 0; - } - - return -ENOMEM; -} - -void kasan_free_shadow(const struct vm_struct *vm) -{ - if (vm->flags & VM_KASAN) - vfree(kasan_mem_to_shadow(vm->addr)); -} -#endif - #ifdef CONFIG_MEMORY_HOTPLUG static bool shadow_mapped(unsigned long addr) { @@ -685,6 +647,7 @@ core_initcall(kasan_memhotplug_init); #endif #ifdef CONFIG_KASAN_VMALLOC + static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { @@ -923,4 +886,43 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, (unsigned long)shadow_end); } } + +#else /* CONFIG_KASAN_VMALLOC */ + +int kasan_module_alloc(void *addr, size_t size) +{ + void *ret; + size_t scaled_size; + size_t shadow_size; + unsigned long shadow_start; + + shadow_start = (unsigned long)kasan_mem_to_shadow(addr); + scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; + shadow_size = round_up(scaled_size, PAGE_SIZE); + + if (WARN_ON(!PAGE_ALIGNED(shadow_start))) + return -EINVAL; + + ret = __vmalloc_node_range(shadow_size, 1, shadow_start, + shadow_start + shadow_size, + GFP_KERNEL, + PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, + __builtin_return_address(0)); + + if (ret) { + __memset(ret, KASAN_SHADOW_INIT, shadow_size); + find_vm_area(addr)->flags |= VM_KASAN; + kmemleak_ignore(ret); + return 0; + } + + return -ENOMEM; +} + +void kasan_free_shadow(const struct vm_struct *vm) +{ + if (vm->flags & VM_KASAN) + vfree(kasan_mem_to_shadow(vm->addr)); +} + #endif -- GitLab From d5750edf6da759576f91ec2b57d5553985815b40 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:17 -0800 Subject: [PATCH 1028/1894] kasan: shadow declarations only for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Group shadow-related KASAN function declarations and only define them for the two existing software modes. No functional changes for software modes. Link: https://lkml.kernel.org/r/35126.1606402815@turing-police Link: https://lore.kernel.org/linux-arm-kernel/24105.1606397102@turing-police/ Link: https://lkml.kernel.org/r/e88d94eff94db883a65dca52e1736d80d28dd9bc.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Signed-off-by: Valdis Kletnieks Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon [valdis.kletnieks@vt.edu: fix build issue with asmlinkage] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 48 ++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 59538e795df4b..7828436a3a999 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -11,7 +11,7 @@ struct task_struct; #ifdef CONFIG_KASAN -#include +#include #include /* kasan_data struct is used in KUnit tests for KASAN expected failures */ @@ -20,6 +20,20 @@ struct kunit_kasan_expectation { bool report_found; }; +#endif + +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + +#include + +/* Software KASAN implementations use shadow memory. */ + +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_SHADOW_INIT 0xFF +#else +#define KASAN_SHADOW_INIT 0 +#endif + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; @@ -35,6 +49,23 @@ static inline void *kasan_mem_to_shadow(const void *addr) + KASAN_SHADOW_OFFSET; } +int kasan_add_zero_shadow(void *start, unsigned long size); +void kasan_remove_zero_shadow(void *start, unsigned long size); + +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +static inline int kasan_add_zero_shadow(void *start, unsigned long size) +{ + return 0; +} +static inline void kasan_remove_zero_shadow(void *start, + unsigned long size) +{} + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +#ifdef CONFIG_KASAN + /* Enable reporting bugs after kasan_disable_current() */ extern void kasan_enable_current(void); @@ -75,9 +106,6 @@ struct kasan_cache { int free_meta_offset; }; -int kasan_add_zero_shadow(void *start, unsigned long size); -void kasan_remove_zero_shadow(void *start, unsigned long size); - size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { @@ -143,14 +171,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline int kasan_add_zero_shadow(void *start, unsigned long size) -{ - return 0; -} -static inline void kasan_remove_zero_shadow(void *start, - unsigned long size) -{} - static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } @@ -158,8 +178,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #ifdef CONFIG_KASAN_GENERIC -#define KASAN_SHADOW_INIT 0 - void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); @@ -174,8 +192,6 @@ static inline void kasan_record_aux_stack(void *ptr) {} #ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_SHADOW_INIT 0xFF - void kasan_init_tags(void); void *kasan_reset_tag(const void *addr); -- GitLab From cebd0eb29acdfc2f5e44e5f356ffcd0c44f16b4a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:21 -0800 Subject: [PATCH 1029/1894] kasan: rename (un)poison_shadow to (un)poison_range This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. The new mode won't be using shadow memory. Rename external annotation kasan_unpoison_shadow() to kasan_unpoison_range(), and introduce internal functions (un)poison_range() (without kasan_ prefix). Co-developed-by: Marco Elver Link: https://lkml.kernel.org/r/fccdcaa13dc6b2211bf363d6c6d499279a54fe3a.1606161801.git.andreyknvl@google.com Signed-off-by: Marco Elver Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 +++--- kernel/fork.c | 4 ++-- mm/kasan/common.c | 49 ++++++++++++++++++++++++------------------- mm/kasan/generic.c | 23 ++++++++++---------- mm/kasan/kasan.h | 3 ++- mm/kasan/tags.c | 2 +- mm/slab_common.c | 2 +- 7 files changed, 47 insertions(+), 42 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7828436a3a999..9740c06a04a14 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -72,7 +72,7 @@ extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ extern void kasan_disable_current(void); -void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_unpoison_range(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); @@ -109,7 +109,7 @@ struct kasan_cache { size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { - kasan_unpoison_shadow(ptr, __ksize(ptr)); + kasan_unpoison_range(ptr, __ksize(ptr)); } size_t kasan_metadata_size(struct kmem_cache *cache); @@ -118,7 +118,7 @@ void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ -static inline void kasan_unpoison_shadow(const void *address, size_t size) {} +static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} diff --git a/kernel/fork.c b/kernel/fork.c index 41906a52a7646..37720a6d04eaa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -225,8 +225,8 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; - /* Clear the KASAN shadow of the stack. */ - kasan_unpoison_shadow(s->addr, THREAD_SIZE); + /* Mark stack accessible for KASAN. */ + kasan_unpoison_range(s->addr, THREAD_SIZE); /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 89e5ef9417a79..73e79a34671b4 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -108,7 +108,7 @@ void *memcpy(void *dest, const void *src, size_t len) * Poisons the shadow memory for 'size' bytes starting from 'addr'. * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. */ -void kasan_poison_shadow(const void *address, size_t size, u8 value) +void poison_range(const void *address, size_t size, u8 value) { void *shadow_start, *shadow_end; @@ -125,7 +125,7 @@ void kasan_poison_shadow(const void *address, size_t size, u8 value) __memset(shadow_start, value, shadow_end - shadow_start); } -void kasan_unpoison_shadow(const void *address, size_t size) +void unpoison_range(const void *address, size_t size) { u8 tag = get_tag(address); @@ -136,7 +136,7 @@ void kasan_unpoison_shadow(const void *address, size_t size) */ address = reset_tag(address); - kasan_poison_shadow(address, size, tag); + poison_range(address, size, tag); if (size & KASAN_SHADOW_MASK) { u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); @@ -148,12 +148,17 @@ void kasan_unpoison_shadow(const void *address, size_t size) } } +void kasan_unpoison_range(const void *address, size_t size) +{ + unpoison_range(address, size); +} + static void __kasan_unpoison_stack(struct task_struct *task, const void *sp) { void *base = task_stack_page(task); size_t size = sp - base; - kasan_unpoison_shadow(base, size); + unpoison_range(base, size); } /* Unpoison the entire stack for a task. */ @@ -172,7 +177,7 @@ asmlinkage void kasan_unpoison_task_stack_below(const void *watermark) */ void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1)); - kasan_unpoison_shadow(base, watermark - base); + unpoison_range(base, watermark - base); } void kasan_alloc_pages(struct page *page, unsigned int order) @@ -186,13 +191,13 @@ void kasan_alloc_pages(struct page *page, unsigned int order) tag = random_tag(); for (i = 0; i < (1 << order); i++) page_kasan_tag_set(page + i, tag); - kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order); + unpoison_range(page_address(page), PAGE_SIZE << order); } void kasan_free_pages(struct page *page, unsigned int order) { if (likely(!PageHighMem(page))) - kasan_poison_shadow(page_address(page), + poison_range(page_address(page), PAGE_SIZE << order, KASAN_FREE_PAGE); } @@ -284,18 +289,18 @@ void kasan_poison_slab(struct page *page) for (i = 0; i < compound_nr(page); i++) page_kasan_tag_reset(page + i); - kasan_poison_shadow(page_address(page), page_size(page), - KASAN_KMALLOC_REDZONE); + poison_range(page_address(page), page_size(page), + KASAN_KMALLOC_REDZONE); } void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) { - kasan_unpoison_shadow(object, cache->object_size); + unpoison_range(object, cache->object_size); } void kasan_poison_object_data(struct kmem_cache *cache, void *object) { - kasan_poison_shadow(object, + poison_range(object, round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE), KASAN_KMALLOC_REDZONE); } @@ -408,7 +413,7 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, } rounded_up_size = round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE); - kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); + poison_range(object, rounded_up_size, KASAN_KMALLOC_FREE); if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine) || unlikely(!(cache->flags & SLAB_KASAN))) @@ -448,9 +453,9 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, tag = assign_tag(cache, object, false, keep_tag); /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */ - kasan_unpoison_shadow(set_tag(object, tag), size); - kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, - KASAN_KMALLOC_REDZONE); + unpoison_range(set_tag(object, tag), size); + poison_range((void *)redzone_start, redzone_end - redzone_start, + KASAN_KMALLOC_REDZONE); if (cache->flags & SLAB_KASAN) kasan_set_track(&get_alloc_info(cache, object)->alloc_track, flags); @@ -489,9 +494,9 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, KASAN_SHADOW_SCALE_SIZE); redzone_end = (unsigned long)ptr + page_size(page); - kasan_unpoison_shadow(ptr, size); - kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, - KASAN_PAGE_REDZONE); + unpoison_range(ptr, size); + poison_range((void *)redzone_start, redzone_end - redzone_start, + KASAN_PAGE_REDZONE); return (void *)ptr; } @@ -523,7 +528,7 @@ void kasan_poison_kfree(void *ptr, unsigned long ip) kasan_report_invalid_free(ptr, ip); return; } - kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE); + poison_range(ptr, page_size(page), KASAN_FREE_PAGE); } else { __kasan_slab_free(page->slab_cache, ptr, ip, false); } @@ -709,7 +714,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) * // vmalloc() allocates memory * // let a = area->addr * // we reach kasan_populate_vmalloc - * // and call kasan_unpoison_shadow: + * // and call unpoison_range: * STORE shadow(a), unpoison_val * ... * STORE shadow(a+99), unpoison_val x = LOAD p @@ -744,7 +749,7 @@ void kasan_poison_vmalloc(const void *start, unsigned long size) return; size = round_up(size, KASAN_SHADOW_SCALE_SIZE); - kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID); + poison_range(start, size, KASAN_VMALLOC_INVALID); } void kasan_unpoison_vmalloc(const void *start, unsigned long size) @@ -752,7 +757,7 @@ void kasan_unpoison_vmalloc(const void *start, unsigned long size) if (!is_vmalloc_or_module_addr(start)) return; - kasan_unpoison_shadow(start, size); + unpoison_range(start, size); } static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index d341859a1b955..9fe44f9b3b30c 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -202,11 +202,11 @@ static void register_global(struct kasan_global *global) { size_t aligned_size = round_up(global->size, KASAN_SHADOW_SCALE_SIZE); - kasan_unpoison_shadow(global->beg, global->size); + unpoison_range(global->beg, global->size); - kasan_poison_shadow(global->beg + aligned_size, - global->size_with_redzone - aligned_size, - KASAN_GLOBAL_REDZONE); + poison_range(global->beg + aligned_size, + global->size_with_redzone - aligned_size, + KASAN_GLOBAL_REDZONE); } void __asan_register_globals(struct kasan_global *globals, size_t size) @@ -285,13 +285,12 @@ void __asan_alloca_poison(unsigned long addr, size_t size) WARN_ON(!IS_ALIGNED(addr, KASAN_ALLOCA_REDZONE_SIZE)); - kasan_unpoison_shadow((const void *)(addr + rounded_down_size), - size - rounded_down_size); - kasan_poison_shadow(left_redzone, KASAN_ALLOCA_REDZONE_SIZE, - KASAN_ALLOCA_LEFT); - kasan_poison_shadow(right_redzone, - padding_size + KASAN_ALLOCA_REDZONE_SIZE, - KASAN_ALLOCA_RIGHT); + unpoison_range((const void *)(addr + rounded_down_size), + size - rounded_down_size); + poison_range(left_redzone, KASAN_ALLOCA_REDZONE_SIZE, + KASAN_ALLOCA_LEFT); + poison_range(right_redzone, padding_size + KASAN_ALLOCA_REDZONE_SIZE, + KASAN_ALLOCA_RIGHT); } EXPORT_SYMBOL(__asan_alloca_poison); @@ -301,7 +300,7 @@ void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom) if (unlikely(!stack_top || stack_top > stack_bottom)) return; - kasan_unpoison_shadow(stack_top, stack_bottom - stack_top); + unpoison_range(stack_top, stack_bottom - stack_top); } EXPORT_SYMBOL(__asan_allocas_unpoison); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index ac499456740f1..42ab02c61331d 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -150,7 +150,8 @@ static inline bool addr_has_shadow(const void *addr) return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); } -void kasan_poison_shadow(const void *address, size_t size, u8 value); +void poison_range(const void *address, size_t size, u8 value); +void unpoison_range(const void *address, size_t size); /** * check_memory_region - Check memory region, and report if invalid access. diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c index 5c8b08a257157..c0b3f327812b9 100644 --- a/mm/kasan/tags.c +++ b/mm/kasan/tags.c @@ -153,7 +153,7 @@ EXPORT_SYMBOL(__hwasan_storeN_noabort); void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size) { - kasan_poison_shadow((void *)addr, size, tag); + poison_range((void *)addr, size, tag); } EXPORT_SYMBOL(__hwasan_tag_memory); diff --git a/mm/slab_common.c b/mm/slab_common.c index 2f2b55c2798e7..573fbacd9ef50 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1176,7 +1176,7 @@ size_t ksize(const void *objp) * We assume that ksize callers could use whole allocated area, * so we need to unpoison this area. */ - kasan_unpoison_shadow(objp, size); + kasan_unpoison_range(objp, size); return size; } EXPORT_SYMBOL(ksize); -- GitLab From 1f600626b3a9b77001b3ef90a79bf68c9f7e4cda Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:24 -0800 Subject: [PATCH 1030/1894] kasan: rename KASAN_SHADOW_* to KASAN_GRANULE_* This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. The new mode won't be using shadow memory, but will still use the concept of memory granules. Each memory granule maps to a single metadata entry: 8 bytes per one shadow byte for generic mode, 16 bytes per one shadow byte for software tag-based mode, and 16 bytes per one allocation tag for hardware tag-based mode. Rename KASAN_SHADOW_SCALE_SIZE to KASAN_GRANULE_SIZE, and KASAN_SHADOW_MASK to KASAN_GRANULE_MASK. Also use MASK when used as a mask, otherwise use SIZE. No functional changes. Link: https://lkml.kernel.org/r/939b5754e47f528a6e6a6f28ffc5815d8d128033.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kasan.rst | 2 +- lib/test_kasan.c | 2 +- lib/test_kasan_module.c | 2 +- mm/kasan/common.c | 39 ++++++++++++++++--------------- mm/kasan/generic.c | 14 +++++------ mm/kasan/generic_report.c | 8 +++---- mm/kasan/init.c | 8 +++---- mm/kasan/kasan.h | 4 ++-- mm/kasan/report.c | 10 ++++---- mm/kasan/tags_report.c | 2 +- 10 files changed, 46 insertions(+), 45 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 6b752a45a936b..924d19aa1248b 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -265,7 +265,7 @@ Most mappings in vmalloc space are small, requiring less than a full page of shadow space. Allocating a full shadow page per mapping would therefore be wasteful. Furthermore, to ensure that different mappings use different shadow pages, mappings would have to be aligned to -``KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE``. +``KASAN_GRANULE_SIZE * PAGE_SIZE``. Instead, we share backing space across multiple mappings. We allocate a backing page when a mapping in vmalloc space uses a particular page diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 662f862702fc8..2947274cc2d30 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -25,7 +25,7 @@ #include "../mm/kasan/kasan.h" -#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_SHADOW_SCALE_SIZE) +#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE) /* * We assign some test results to these globals to make sure the tests diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c index 62a87854b1200..3b4cc77992d28 100644 --- a/lib/test_kasan_module.c +++ b/lib/test_kasan_module.c @@ -15,7 +15,7 @@ #include "../mm/kasan/kasan.h" -#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_SHADOW_SCALE_SIZE) +#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE) static noinline void __init copy_user_test(void) { diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 73e79a34671b4..166e36e0033e5 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -106,7 +106,7 @@ void *memcpy(void *dest, const void *src, size_t len) /* * Poisons the shadow memory for 'size' bytes starting from 'addr'. - * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. + * Memory addresses should be aligned to KASAN_GRANULE_SIZE. */ void poison_range(const void *address, size_t size, u8 value) { @@ -138,13 +138,13 @@ void unpoison_range(const void *address, size_t size) poison_range(address, size, tag); - if (size & KASAN_SHADOW_MASK) { + if (size & KASAN_GRANULE_MASK) { u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) *shadow = tag; else - *shadow = size & KASAN_SHADOW_MASK; + *shadow = size & KASAN_GRANULE_MASK; } } @@ -301,7 +301,7 @@ void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) void kasan_poison_object_data(struct kmem_cache *cache, void *object) { poison_range(object, - round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE), + round_up(cache->object_size, KASAN_GRANULE_SIZE), KASAN_KMALLOC_REDZONE); } @@ -373,7 +373,7 @@ static inline bool shadow_invalid(u8 tag, s8 shadow_byte) { if (IS_ENABLED(CONFIG_KASAN_GENERIC)) return shadow_byte < 0 || - shadow_byte >= KASAN_SHADOW_SCALE_SIZE; + shadow_byte >= KASAN_GRANULE_SIZE; /* else CONFIG_KASAN_SW_TAGS: */ if ((u8)shadow_byte == KASAN_TAG_INVALID) @@ -412,7 +412,7 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, return true; } - rounded_up_size = round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE); + rounded_up_size = round_up(cache->object_size, KASAN_GRANULE_SIZE); poison_range(object, rounded_up_size, KASAN_KMALLOC_FREE); if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine) || @@ -445,9 +445,9 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, return NULL; redzone_start = round_up((unsigned long)(object + size), - KASAN_SHADOW_SCALE_SIZE); + KASAN_GRANULE_SIZE); redzone_end = round_up((unsigned long)object + cache->object_size, - KASAN_SHADOW_SCALE_SIZE); + KASAN_GRANULE_SIZE); if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) tag = assign_tag(cache, object, false, keep_tag); @@ -491,7 +491,7 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, page = virt_to_page(ptr); redzone_start = round_up((unsigned long)(ptr + size), - KASAN_SHADOW_SCALE_SIZE); + KASAN_GRANULE_SIZE); redzone_end = (unsigned long)ptr + page_size(page); unpoison_range(ptr, size); @@ -589,8 +589,8 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb, shadow_size = nr_shadow_pages << PAGE_SHIFT; shadow_end = shadow_start + shadow_size; - if (WARN_ON(mem_data->nr_pages % KASAN_SHADOW_SCALE_SIZE) || - WARN_ON(start_kaddr % (KASAN_SHADOW_SCALE_SIZE << PAGE_SHIFT))) + if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) || + WARN_ON(start_kaddr % (KASAN_GRANULE_SIZE << PAGE_SHIFT))) return NOTIFY_BAD; switch (action) { @@ -748,7 +748,7 @@ void kasan_poison_vmalloc(const void *start, unsigned long size) if (!is_vmalloc_or_module_addr(start)) return; - size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + size = round_up(size, KASAN_GRANULE_SIZE); poison_range(start, size, KASAN_VMALLOC_INVALID); } @@ -861,22 +861,22 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long region_start, region_end; unsigned long size; - region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); - region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + region_start = ALIGN(start, PAGE_SIZE * KASAN_GRANULE_SIZE); + region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_GRANULE_SIZE); free_region_start = ALIGN(free_region_start, - PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + PAGE_SIZE * KASAN_GRANULE_SIZE); if (start != region_start && free_region_start < region_start) - region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; + region_start -= PAGE_SIZE * KASAN_GRANULE_SIZE; free_region_end = ALIGN_DOWN(free_region_end, - PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + PAGE_SIZE * KASAN_GRANULE_SIZE); if (end != region_end && free_region_end > region_end) - region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; + region_end += PAGE_SIZE * KASAN_GRANULE_SIZE; shadow_start = kasan_mem_to_shadow((void *)region_start); shadow_end = kasan_mem_to_shadow((void *)region_end); @@ -902,7 +902,8 @@ int kasan_module_alloc(void *addr, size_t size) unsigned long shadow_start; shadow_start = (unsigned long)kasan_mem_to_shadow(addr); - scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; + scaled_size = (size + KASAN_GRANULE_SIZE - 1) >> + KASAN_SHADOW_SCALE_SHIFT; shadow_size = round_up(scaled_size, PAGE_SIZE); if (WARN_ON(!PAGE_ALIGNED(shadow_start))) diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 9fe44f9b3b30c..71ae070b80c6b 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -46,7 +46,7 @@ static __always_inline bool memory_is_poisoned_1(unsigned long addr) s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr); if (unlikely(shadow_value)) { - s8 last_accessible_byte = addr & KASAN_SHADOW_MASK; + s8 last_accessible_byte = addr & KASAN_GRANULE_MASK; return unlikely(last_accessible_byte >= shadow_value); } @@ -62,7 +62,7 @@ static __always_inline bool memory_is_poisoned_2_4_8(unsigned long addr, * Access crosses 8(shadow size)-byte boundary. Such access maps * into 2 shadow bytes, so we need to check them both. */ - if (unlikely(((addr + size - 1) & KASAN_SHADOW_MASK) < size - 1)) + if (unlikely(((addr + size - 1) & KASAN_GRANULE_MASK) < size - 1)) return *shadow_addr || memory_is_poisoned_1(addr + size - 1); return memory_is_poisoned_1(addr + size - 1); @@ -73,7 +73,7 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); /* Unaligned 16-bytes access maps into 3 shadow bytes. */ - if (unlikely(!IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE))) + if (unlikely(!IS_ALIGNED(addr, KASAN_GRANULE_SIZE))) return *shadow_addr || memory_is_poisoned_1(addr + 15); return *shadow_addr; @@ -134,7 +134,7 @@ static __always_inline bool memory_is_poisoned_n(unsigned long addr, s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte); if (unlikely(ret != (unsigned long)last_shadow || - ((long)(last_byte & KASAN_SHADOW_MASK) >= *last_shadow))) + ((long)(last_byte & KASAN_GRANULE_MASK) >= *last_shadow))) return true; } return false; @@ -200,7 +200,7 @@ void kasan_cache_shutdown(struct kmem_cache *cache) static void register_global(struct kasan_global *global) { - size_t aligned_size = round_up(global->size, KASAN_SHADOW_SCALE_SIZE); + size_t aligned_size = round_up(global->size, KASAN_GRANULE_SIZE); unpoison_range(global->beg, global->size); @@ -274,10 +274,10 @@ EXPORT_SYMBOL(__asan_handle_no_return); /* Emitted by compiler to poison alloca()ed objects. */ void __asan_alloca_poison(unsigned long addr, size_t size) { - size_t rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + size_t rounded_up_size = round_up(size, KASAN_GRANULE_SIZE); size_t padding_size = round_up(size, KASAN_ALLOCA_REDZONE_SIZE) - rounded_up_size; - size_t rounded_down_size = round_down(size, KASAN_SHADOW_SCALE_SIZE); + size_t rounded_down_size = round_down(size, KASAN_GRANULE_SIZE); const void *left_redzone = (const void *)(addr - KASAN_ALLOCA_REDZONE_SIZE); diff --git a/mm/kasan/generic_report.c b/mm/kasan/generic_report.c index 6bb3f66992df4..7d5b9e5c7cfe8 100644 --- a/mm/kasan/generic_report.c +++ b/mm/kasan/generic_report.c @@ -34,7 +34,7 @@ void *find_first_bad_addr(void *addr, size_t size) void *p = addr; while (p < addr + size && !(*(u8 *)kasan_mem_to_shadow(p))) - p += KASAN_SHADOW_SCALE_SIZE; + p += KASAN_GRANULE_SIZE; return p; } @@ -46,14 +46,14 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info) shadow_addr = (u8 *)kasan_mem_to_shadow(info->first_bad_addr); /* - * If shadow byte value is in [0, KASAN_SHADOW_SCALE_SIZE) we can look + * If shadow byte value is in [0, KASAN_GRANULE_SIZE) we can look * at the next shadow byte to determine the type of the bad access. */ - if (*shadow_addr > 0 && *shadow_addr <= KASAN_SHADOW_SCALE_SIZE - 1) + if (*shadow_addr > 0 && *shadow_addr <= KASAN_GRANULE_SIZE - 1) shadow_addr++; switch (*shadow_addr) { - case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: + case 0 ... KASAN_GRANULE_SIZE - 1: /* * In theory it's still possible to see these shadow values * due to a data race in the kernel code. diff --git a/mm/kasan/init.c b/mm/kasan/init.c index 9ce8cc5b8621f..dfddd6c39fe6e 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -442,8 +442,8 @@ void kasan_remove_zero_shadow(void *start, unsigned long size) end = addr + (size >> KASAN_SHADOW_SCALE_SHIFT); if (WARN_ON((unsigned long)start % - (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) || - WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE))) + (KASAN_GRANULE_SIZE * PAGE_SIZE)) || + WARN_ON(size % (KASAN_GRANULE_SIZE * PAGE_SIZE))) return; for (; addr < end; addr = next) { @@ -477,8 +477,8 @@ int kasan_add_zero_shadow(void *start, unsigned long size) shadow_end = shadow_start + (size >> KASAN_SHADOW_SCALE_SHIFT); if (WARN_ON((unsigned long)start % - (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) || - WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE))) + (KASAN_GRANULE_SIZE * PAGE_SIZE)) || + WARN_ON(size % (KASAN_GRANULE_SIZE * PAGE_SIZE))) return -EINVAL; ret = kasan_populate_early_shadow(shadow_start, shadow_end); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 42ab02c61331d..53b095f56f28e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -5,8 +5,8 @@ #include #include -#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) -#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) +#define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) +#define KASAN_GRANULE_MASK (KASAN_GRANULE_SIZE - 1) #define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */ #define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 0a2c72ddcdebb..d16ccbc7e4b2b 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -314,24 +314,24 @@ static bool __must_check get_address_stack_frame_info(const void *addr, return false; aligned_addr = round_down((unsigned long)addr, sizeof(long)); - mem_ptr = round_down(aligned_addr, KASAN_SHADOW_SCALE_SIZE); + mem_ptr = round_down(aligned_addr, KASAN_GRANULE_SIZE); shadow_ptr = kasan_mem_to_shadow((void *)aligned_addr); shadow_bottom = kasan_mem_to_shadow(end_of_stack(current)); while (shadow_ptr >= shadow_bottom && *shadow_ptr != KASAN_STACK_LEFT) { shadow_ptr--; - mem_ptr -= KASAN_SHADOW_SCALE_SIZE; + mem_ptr -= KASAN_GRANULE_SIZE; } while (shadow_ptr >= shadow_bottom && *shadow_ptr == KASAN_STACK_LEFT) { shadow_ptr--; - mem_ptr -= KASAN_SHADOW_SCALE_SIZE; + mem_ptr -= KASAN_GRANULE_SIZE; } if (shadow_ptr < shadow_bottom) return false; - frame = (const unsigned long *)(mem_ptr + KASAN_SHADOW_SCALE_SIZE); + frame = (const unsigned long *)(mem_ptr + KASAN_GRANULE_SIZE); if (frame[0] != KASAN_CURRENT_STACK_FRAME_MAGIC) { pr_err("KASAN internal error: frame info validation failed; invalid marker: %lu\n", frame[0]); @@ -599,6 +599,6 @@ void kasan_non_canonical_hook(unsigned long addr) else bug_type = "maybe wild-memory-access"; pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type, - orig_addr, orig_addr + KASAN_SHADOW_MASK); + orig_addr, orig_addr + KASAN_GRANULE_SIZE - 1); } #endif diff --git a/mm/kasan/tags_report.c b/mm/kasan/tags_report.c index 5f183501b8710..c87d5a343b4ef 100644 --- a/mm/kasan/tags_report.c +++ b/mm/kasan/tags_report.c @@ -76,7 +76,7 @@ void *find_first_bad_addr(void *addr, size_t size) void *end = p + size; while (p < end && tag == *(u8 *)kasan_mem_to_shadow(p)) - p += KASAN_SHADOW_SCALE_SIZE; + p += KASAN_GRANULE_SIZE; return p; } -- GitLab From b266e8fee9630d1e5a9144f33222a49c06ad6976 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:28 -0800 Subject: [PATCH 1031/1894] kasan: only build init.c for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. The new mode won't be using shadow memory, so only build init.c that contains shadow initialization code for software modes. No functional changes for software modes. Link: https://lkml.kernel.org/r/bae0a6a35b7a9b1a443803c1a55e6e3fecc311c9.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/Makefile | 6 +++--- mm/kasan/init.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 370d970e5ab5b..7cf685bb51bd1 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -29,6 +29,6 @@ CFLAGS_report.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_tags.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_tags_report.o := $(CC_FLAGS_KASAN_RUNTIME) -obj-$(CONFIG_KASAN) := common.o init.o report.o -obj-$(CONFIG_KASAN_GENERIC) += generic.o generic_report.o quarantine.o -obj-$(CONFIG_KASAN_SW_TAGS) += tags.o tags_report.o +obj-$(CONFIG_KASAN) := common.o report.o +obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o generic_report.o quarantine.o +obj-$(CONFIG_KASAN_SW_TAGS) += init.o tags.o tags_report.o diff --git a/mm/kasan/init.c b/mm/kasan/init.c index dfddd6c39fe6e..1a71eaa8c5f90 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This file contains some kasan initialization code. + * This file contains KASAN shadow initialization code. * * Copyright (c) 2015 Samsung Electronics Co., Ltd. * Author: Andrey Ryabinin -- GitLab From bb359dbcb70085a63e8bdbf14837a900750f0cf7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:32 -0800 Subject: [PATCH 1032/1894] kasan: split out shadow.c from common.c This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. The new mode won't be using shadow memory. Move all shadow-related code to shadow.c, which is only enabled for software KASAN modes that use shadow memory. No functional changes for software modes. Link: https://lkml.kernel.org/r/17d95cfa7d5cf9c4fcd9bf415f2a8dea911668df.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/Makefile | 6 +- mm/kasan/common.c | 486 +------------------------------------------- mm/kasan/shadow.c | 505 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 510 insertions(+), 487 deletions(-) create mode 100644 mm/kasan/shadow.c diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 7cf685bb51bd1..7cc1031e1ef80 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -10,6 +10,7 @@ CFLAGS_REMOVE_generic_report.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_shadow.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_tags_report.o = $(CC_FLAGS_FTRACE) @@ -26,9 +27,10 @@ CFLAGS_generic_report.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_init.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_quarantine.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_report.o := $(CC_FLAGS_KASAN_RUNTIME) +CFLAGS_shadow.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_tags.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_tags_report.o := $(CC_FLAGS_KASAN_RUNTIME) obj-$(CONFIG_KASAN) := common.o report.o -obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o generic_report.o quarantine.o -obj-$(CONFIG_KASAN_SW_TAGS) += init.o tags.o tags_report.o +obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o generic_report.o shadow.o quarantine.o +obj-$(CONFIG_KASAN_SW_TAGS) += init.o shadow.o tags.o tags_report.o diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 166e36e0033e5..88f57346c7bd3 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This file contains common generic and tag-based KASAN code. + * This file contains common KASAN code. * * Copyright (c) 2014 Samsung Electronics Co., Ltd. * Author: Andrey Ryabinin @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -26,12 +25,8 @@ #include #include #include -#include #include -#include -#include - #include "kasan.h" #include "../slab.h" @@ -61,93 +56,6 @@ void kasan_disable_current(void) current->kasan_depth--; } -bool __kasan_check_read(const volatile void *p, unsigned int size) -{ - return check_memory_region((unsigned long)p, size, false, _RET_IP_); -} -EXPORT_SYMBOL(__kasan_check_read); - -bool __kasan_check_write(const volatile void *p, unsigned int size) -{ - return check_memory_region((unsigned long)p, size, true, _RET_IP_); -} -EXPORT_SYMBOL(__kasan_check_write); - -#undef memset -void *memset(void *addr, int c, size_t len) -{ - if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_)) - return NULL; - - return __memset(addr, c, len); -} - -#ifdef __HAVE_ARCH_MEMMOVE -#undef memmove -void *memmove(void *dest, const void *src, size_t len) -{ - if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || - !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) - return NULL; - - return __memmove(dest, src, len); -} -#endif - -#undef memcpy -void *memcpy(void *dest, const void *src, size_t len) -{ - if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || - !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) - return NULL; - - return __memcpy(dest, src, len); -} - -/* - * Poisons the shadow memory for 'size' bytes starting from 'addr'. - * Memory addresses should be aligned to KASAN_GRANULE_SIZE. - */ -void poison_range(const void *address, size_t size, u8 value) -{ - void *shadow_start, *shadow_end; - - /* - * Perform shadow offset calculation based on untagged address, as - * some of the callers (e.g. kasan_poison_object_data) pass tagged - * addresses to this function. - */ - address = reset_tag(address); - - shadow_start = kasan_mem_to_shadow(address); - shadow_end = kasan_mem_to_shadow(address + size); - - __memset(shadow_start, value, shadow_end - shadow_start); -} - -void unpoison_range(const void *address, size_t size) -{ - u8 tag = get_tag(address); - - /* - * Perform shadow offset calculation based on untagged address, as - * some of the callers (e.g. kasan_unpoison_object_data) pass tagged - * addresses to this function. - */ - address = reset_tag(address); - - poison_range(address, size, tag); - - if (size & KASAN_GRANULE_MASK) { - u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); - - if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - *shadow = tag; - else - *shadow = size & KASAN_GRANULE_MASK; - } -} - void kasan_unpoison_range(const void *address, size_t size) { unpoison_range(address, size); @@ -540,395 +448,3 @@ void kasan_kfree_large(void *ptr, unsigned long ip) kasan_report_invalid_free(ptr, ip); /* The object will be poisoned by page_alloc. */ } - -#ifdef CONFIG_MEMORY_HOTPLUG -static bool shadow_mapped(unsigned long addr) -{ - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (pgd_none(*pgd)) - return false; - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) - return false; - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) - return false; - - /* - * We can't use pud_large() or pud_huge(), the first one is - * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse - * pud_bad(), if pud is bad then it's bad because it's huge. - */ - if (pud_bad(*pud)) - return true; - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) - return false; - - if (pmd_bad(*pmd)) - return true; - pte = pte_offset_kernel(pmd, addr); - return !pte_none(*pte); -} - -static int __meminit kasan_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct memory_notify *mem_data = data; - unsigned long nr_shadow_pages, start_kaddr, shadow_start; - unsigned long shadow_end, shadow_size; - - nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT; - start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn); - shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr); - shadow_size = nr_shadow_pages << PAGE_SHIFT; - shadow_end = shadow_start + shadow_size; - - if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) || - WARN_ON(start_kaddr % (KASAN_GRANULE_SIZE << PAGE_SHIFT))) - return NOTIFY_BAD; - - switch (action) { - case MEM_GOING_ONLINE: { - void *ret; - - /* - * If shadow is mapped already than it must have been mapped - * during the boot. This could happen if we onlining previously - * offlined memory. - */ - if (shadow_mapped(shadow_start)) - return NOTIFY_OK; - - ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, - shadow_end, GFP_KERNEL, - PAGE_KERNEL, VM_NO_GUARD, - pfn_to_nid(mem_data->start_pfn), - __builtin_return_address(0)); - if (!ret) - return NOTIFY_BAD; - - kmemleak_ignore(ret); - return NOTIFY_OK; - } - case MEM_CANCEL_ONLINE: - case MEM_OFFLINE: { - struct vm_struct *vm; - - /* - * shadow_start was either mapped during boot by kasan_init() - * or during memory online by __vmalloc_node_range(). - * In the latter case we can use vfree() to free shadow. - * Non-NULL result of the find_vm_area() will tell us if - * that was the second case. - * - * Currently it's not possible to free shadow mapped - * during boot by kasan_init(). It's because the code - * to do that hasn't been written yet. So we'll just - * leak the memory. - */ - vm = find_vm_area((void *)shadow_start); - if (vm) - vfree((void *)shadow_start); - } - } - - return NOTIFY_OK; -} - -static int __init kasan_memhotplug_init(void) -{ - hotplug_memory_notifier(kasan_mem_notifier, 0); - - return 0; -} - -core_initcall(kasan_memhotplug_init); -#endif - -#ifdef CONFIG_KASAN_VMALLOC - -static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, - void *unused) -{ - unsigned long page; - pte_t pte; - - if (likely(!pte_none(*ptep))) - return 0; - - page = __get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); - pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); - - spin_lock(&init_mm.page_table_lock); - if (likely(pte_none(*ptep))) { - set_pte_at(&init_mm, addr, ptep, pte); - page = 0; - } - spin_unlock(&init_mm.page_table_lock); - if (page) - free_page(page); - return 0; -} - -int kasan_populate_vmalloc(unsigned long addr, unsigned long size) -{ - unsigned long shadow_start, shadow_end; - int ret; - - if (!is_vmalloc_or_module_addr((void *)addr)) - return 0; - - shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); - shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); - shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); - shadow_end = ALIGN(shadow_end, PAGE_SIZE); - - ret = apply_to_page_range(&init_mm, shadow_start, - shadow_end - shadow_start, - kasan_populate_vmalloc_pte, NULL); - if (ret) - return ret; - - flush_cache_vmap(shadow_start, shadow_end); - - /* - * We need to be careful about inter-cpu effects here. Consider: - * - * CPU#0 CPU#1 - * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ; - * p[99] = 1; - * - * With compiler instrumentation, that ends up looking like this: - * - * CPU#0 CPU#1 - * // vmalloc() allocates memory - * // let a = area->addr - * // we reach kasan_populate_vmalloc - * // and call unpoison_range: - * STORE shadow(a), unpoison_val - * ... - * STORE shadow(a+99), unpoison_val x = LOAD p - * // rest of vmalloc process - * STORE p, a LOAD shadow(x+99) - * - * If there is no barrier between the end of unpoisioning the shadow - * and the store of the result to p, the stores could be committed - * in a different order by CPU#0, and CPU#1 could erroneously observe - * poison in the shadow. - * - * We need some sort of barrier between the stores. - * - * In the vmalloc() case, this is provided by a smp_wmb() in - * clear_vm_uninitialized_flag(). In the per-cpu allocator and in - * get_vm_area() and friends, the caller gets shadow allocated but - * doesn't have any pages mapped into the virtual address space that - * has been reserved. Mapping those pages in will involve taking and - * releasing a page-table lock, which will provide the barrier. - */ - - return 0; -} - -/* - * Poison the shadow for a vmalloc region. Called as part of the - * freeing process at the time the region is freed. - */ -void kasan_poison_vmalloc(const void *start, unsigned long size) -{ - if (!is_vmalloc_or_module_addr(start)) - return; - - size = round_up(size, KASAN_GRANULE_SIZE); - poison_range(start, size, KASAN_VMALLOC_INVALID); -} - -void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ - if (!is_vmalloc_or_module_addr(start)) - return; - - unpoison_range(start, size); -} - -static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, - void *unused) -{ - unsigned long page; - - page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT); - - spin_lock(&init_mm.page_table_lock); - - if (likely(!pte_none(*ptep))) { - pte_clear(&init_mm, addr, ptep); - free_page(page); - } - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -/* - * Release the backing for the vmalloc region [start, end), which - * lies within the free region [free_region_start, free_region_end). - * - * This can be run lazily, long after the region was freed. It runs - * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap - * infrastructure. - * - * How does this work? - * ------------------- - * - * We have a region that is page aligned, labelled as A. - * That might not map onto the shadow in a way that is page-aligned: - * - * start end - * v v - * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc - * -------- -------- -------- -------- -------- - * | | | | | - * | | | /-------/ | - * \-------\|/------/ |/---------------/ - * ||| || - * |??AAAAAA|AAAAAAAA|AA??????| < shadow - * (1) (2) (3) - * - * First we align the start upwards and the end downwards, so that the - * shadow of the region aligns with shadow page boundaries. In the - * example, this gives us the shadow page (2). This is the shadow entirely - * covered by this allocation. - * - * Then we have the tricky bits. We want to know if we can free the - * partially covered shadow pages - (1) and (3) in the example. For this, - * we are given the start and end of the free region that contains this - * allocation. Extending our previous example, we could have: - * - * free_region_start free_region_end - * | start end | - * v v v v - * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc - * -------- -------- -------- -------- -------- - * | | | | | - * | | | /-------/ | - * \-------\|/------/ |/---------------/ - * ||| || - * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow - * (1) (2) (3) - * - * Once again, we align the start of the free region up, and the end of - * the free region down so that the shadow is page aligned. So we can free - * page (1) - we know no allocation currently uses anything in that page, - * because all of it is in the vmalloc free region. But we cannot free - * page (3), because we can't be sure that the rest of it is unused. - * - * We only consider pages that contain part of the original region for - * freeing: we don't try to free other pages from the free region or we'd - * end up trying to free huge chunks of virtual address space. - * - * Concurrency - * ----------- - * - * How do we know that we're not freeing a page that is simultaneously - * being used for a fresh allocation in kasan_populate_vmalloc(_pte)? - * - * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running - * at the same time. While we run under free_vmap_area_lock, the population - * code does not. - * - * free_vmap_area_lock instead operates to ensure that the larger range - * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and - * the per-cpu region-finding algorithm both run under free_vmap_area_lock, - * no space identified as free will become used while we are running. This - * means that so long as we are careful with alignment and only free shadow - * pages entirely covered by the free region, we will not run in to any - * trouble - any simultaneous allocations will be for disjoint regions. - */ -void kasan_release_vmalloc(unsigned long start, unsigned long end, - unsigned long free_region_start, - unsigned long free_region_end) -{ - void *shadow_start, *shadow_end; - unsigned long region_start, region_end; - unsigned long size; - - region_start = ALIGN(start, PAGE_SIZE * KASAN_GRANULE_SIZE); - region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_GRANULE_SIZE); - - free_region_start = ALIGN(free_region_start, - PAGE_SIZE * KASAN_GRANULE_SIZE); - - if (start != region_start && - free_region_start < region_start) - region_start -= PAGE_SIZE * KASAN_GRANULE_SIZE; - - free_region_end = ALIGN_DOWN(free_region_end, - PAGE_SIZE * KASAN_GRANULE_SIZE); - - if (end != region_end && - free_region_end > region_end) - region_end += PAGE_SIZE * KASAN_GRANULE_SIZE; - - shadow_start = kasan_mem_to_shadow((void *)region_start); - shadow_end = kasan_mem_to_shadow((void *)region_end); - - if (shadow_end > shadow_start) { - size = shadow_end - shadow_start; - apply_to_existing_page_range(&init_mm, - (unsigned long)shadow_start, - size, kasan_depopulate_vmalloc_pte, - NULL); - flush_tlb_kernel_range((unsigned long)shadow_start, - (unsigned long)shadow_end); - } -} - -#else /* CONFIG_KASAN_VMALLOC */ - -int kasan_module_alloc(void *addr, size_t size) -{ - void *ret; - size_t scaled_size; - size_t shadow_size; - unsigned long shadow_start; - - shadow_start = (unsigned long)kasan_mem_to_shadow(addr); - scaled_size = (size + KASAN_GRANULE_SIZE - 1) >> - KASAN_SHADOW_SCALE_SHIFT; - shadow_size = round_up(scaled_size, PAGE_SIZE); - - if (WARN_ON(!PAGE_ALIGNED(shadow_start))) - return -EINVAL; - - ret = __vmalloc_node_range(shadow_size, 1, shadow_start, - shadow_start + shadow_size, - GFP_KERNEL, - PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, - __builtin_return_address(0)); - - if (ret) { - __memset(ret, KASAN_SHADOW_INIT, shadow_size); - find_vm_area(addr)->flags |= VM_KASAN; - kmemleak_ignore(ret); - return 0; - } - - return -ENOMEM; -} - -void kasan_free_shadow(const struct vm_struct *vm) -{ - if (vm->flags & VM_KASAN) - vfree(kasan_mem_to_shadow(vm->addr)); -} - -#endif diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c new file mode 100644 index 0000000000000..5faba876456f6 --- /dev/null +++ b/mm/kasan/shadow.c @@ -0,0 +1,505 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file contains KASAN runtime code that manages shadow memory for + * generic and software tag-based KASAN modes. + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * + * Some code borrowed from https://github.com/xairy/kasan-prototype by + * Andrey Konovalov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "kasan.h" + +bool __kasan_check_read(const volatile void *p, unsigned int size) +{ + return check_memory_region((unsigned long)p, size, false, _RET_IP_); +} +EXPORT_SYMBOL(__kasan_check_read); + +bool __kasan_check_write(const volatile void *p, unsigned int size) +{ + return check_memory_region((unsigned long)p, size, true, _RET_IP_); +} +EXPORT_SYMBOL(__kasan_check_write); + +#undef memset +void *memset(void *addr, int c, size_t len) +{ + if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_)) + return NULL; + + return __memset(addr, c, len); +} + +#ifdef __HAVE_ARCH_MEMMOVE +#undef memmove +void *memmove(void *dest, const void *src, size_t len) +{ + if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || + !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) + return NULL; + + return __memmove(dest, src, len); +} +#endif + +#undef memcpy +void *memcpy(void *dest, const void *src, size_t len) +{ + if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || + !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) + return NULL; + + return __memcpy(dest, src, len); +} + +/* + * Poisons the shadow memory for 'size' bytes starting from 'addr'. + * Memory addresses should be aligned to KASAN_GRANULE_SIZE. + */ +void poison_range(const void *address, size_t size, u8 value) +{ + void *shadow_start, *shadow_end; + + /* + * Perform shadow offset calculation based on untagged address, as + * some of the callers (e.g. kasan_poison_object_data) pass tagged + * addresses to this function. + */ + address = reset_tag(address); + + shadow_start = kasan_mem_to_shadow(address); + shadow_end = kasan_mem_to_shadow(address + size); + + __memset(shadow_start, value, shadow_end - shadow_start); +} + +void unpoison_range(const void *address, size_t size) +{ + u8 tag = get_tag(address); + + /* + * Perform shadow offset calculation based on untagged address, as + * some of the callers (e.g. kasan_unpoison_object_data) pass tagged + * addresses to this function. + */ + address = reset_tag(address); + + poison_range(address, size, tag); + + if (size & KASAN_GRANULE_MASK) { + u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); + + if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + *shadow = tag; + else + *shadow = size & KASAN_GRANULE_MASK; + } +} + +#ifdef CONFIG_MEMORY_HOTPLUG +static bool shadow_mapped(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (pgd_none(*pgd)) + return false; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return false; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + return false; + + /* + * We can't use pud_large() or pud_huge(), the first one is + * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse + * pud_bad(), if pud is bad then it's bad because it's huge. + */ + if (pud_bad(*pud)) + return true; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return false; + + if (pmd_bad(*pmd)) + return true; + pte = pte_offset_kernel(pmd, addr); + return !pte_none(*pte); +} + +static int __meminit kasan_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *mem_data = data; + unsigned long nr_shadow_pages, start_kaddr, shadow_start; + unsigned long shadow_end, shadow_size; + + nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT; + start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn); + shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr); + shadow_size = nr_shadow_pages << PAGE_SHIFT; + shadow_end = shadow_start + shadow_size; + + if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) || + WARN_ON(start_kaddr % (KASAN_GRANULE_SIZE << PAGE_SHIFT))) + return NOTIFY_BAD; + + switch (action) { + case MEM_GOING_ONLINE: { + void *ret; + + /* + * If shadow is mapped already than it must have been mapped + * during the boot. This could happen if we onlining previously + * offlined memory. + */ + if (shadow_mapped(shadow_start)) + return NOTIFY_OK; + + ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, + shadow_end, GFP_KERNEL, + PAGE_KERNEL, VM_NO_GUARD, + pfn_to_nid(mem_data->start_pfn), + __builtin_return_address(0)); + if (!ret) + return NOTIFY_BAD; + + kmemleak_ignore(ret); + return NOTIFY_OK; + } + case MEM_CANCEL_ONLINE: + case MEM_OFFLINE: { + struct vm_struct *vm; + + /* + * shadow_start was either mapped during boot by kasan_init() + * or during memory online by __vmalloc_node_range(). + * In the latter case we can use vfree() to free shadow. + * Non-NULL result of the find_vm_area() will tell us if + * that was the second case. + * + * Currently it's not possible to free shadow mapped + * during boot by kasan_init(). It's because the code + * to do that hasn't been written yet. So we'll just + * leak the memory. + */ + vm = find_vm_area((void *)shadow_start); + if (vm) + vfree((void *)shadow_start); + } + } + + return NOTIFY_OK; +} + +static int __init kasan_memhotplug_init(void) +{ + hotplug_memory_notifier(kasan_mem_notifier, 0); + + return 0; +} + +core_initcall(kasan_memhotplug_init); +#endif + +#ifdef CONFIG_KASAN_VMALLOC + +static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, + void *unused) +{ + unsigned long page; + pte_t pte; + + if (likely(!pte_none(*ptep))) + return 0; + + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); + pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); + + spin_lock(&init_mm.page_table_lock); + if (likely(pte_none(*ptep))) { + set_pte_at(&init_mm, addr, ptep, pte); + page = 0; + } + spin_unlock(&init_mm.page_table_lock); + if (page) + free_page(page); + return 0; +} + +int kasan_populate_vmalloc(unsigned long addr, unsigned long size) +{ + unsigned long shadow_start, shadow_end; + int ret; + + if (!is_vmalloc_or_module_addr((void *)addr)) + return 0; + + shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); + shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); + shadow_end = ALIGN(shadow_end, PAGE_SIZE); + + ret = apply_to_page_range(&init_mm, shadow_start, + shadow_end - shadow_start, + kasan_populate_vmalloc_pte, NULL); + if (ret) + return ret; + + flush_cache_vmap(shadow_start, shadow_end); + + /* + * We need to be careful about inter-cpu effects here. Consider: + * + * CPU#0 CPU#1 + * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ; + * p[99] = 1; + * + * With compiler instrumentation, that ends up looking like this: + * + * CPU#0 CPU#1 + * // vmalloc() allocates memory + * // let a = area->addr + * // we reach kasan_populate_vmalloc + * // and call unpoison_range: + * STORE shadow(a), unpoison_val + * ... + * STORE shadow(a+99), unpoison_val x = LOAD p + * // rest of vmalloc process + * STORE p, a LOAD shadow(x+99) + * + * If there is no barrier between the end of unpoisioning the shadow + * and the store of the result to p, the stores could be committed + * in a different order by CPU#0, and CPU#1 could erroneously observe + * poison in the shadow. + * + * We need some sort of barrier between the stores. + * + * In the vmalloc() case, this is provided by a smp_wmb() in + * clear_vm_uninitialized_flag(). In the per-cpu allocator and in + * get_vm_area() and friends, the caller gets shadow allocated but + * doesn't have any pages mapped into the virtual address space that + * has been reserved. Mapping those pages in will involve taking and + * releasing a page-table lock, which will provide the barrier. + */ + + return 0; +} + +/* + * Poison the shadow for a vmalloc region. Called as part of the + * freeing process at the time the region is freed. + */ +void kasan_poison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + size = round_up(size, KASAN_GRANULE_SIZE); + poison_range(start, size, KASAN_VMALLOC_INVALID); +} + +void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + unpoison_range(start, size); +} + +static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, + void *unused) +{ + unsigned long page; + + page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT); + + spin_lock(&init_mm.page_table_lock); + + if (likely(!pte_none(*ptep))) { + pte_clear(&init_mm, addr, ptep); + free_page(page); + } + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +/* + * Release the backing for the vmalloc region [start, end), which + * lies within the free region [free_region_start, free_region_end). + * + * This can be run lazily, long after the region was freed. It runs + * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap + * infrastructure. + * + * How does this work? + * ------------------- + * + * We have a region that is page aligned, labelled as A. + * That might not map onto the shadow in a way that is page-aligned: + * + * start end + * v v + * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc + * -------- -------- -------- -------- -------- + * | | | | | + * | | | /-------/ | + * \-------\|/------/ |/---------------/ + * ||| || + * |??AAAAAA|AAAAAAAA|AA??????| < shadow + * (1) (2) (3) + * + * First we align the start upwards and the end downwards, so that the + * shadow of the region aligns with shadow page boundaries. In the + * example, this gives us the shadow page (2). This is the shadow entirely + * covered by this allocation. + * + * Then we have the tricky bits. We want to know if we can free the + * partially covered shadow pages - (1) and (3) in the example. For this, + * we are given the start and end of the free region that contains this + * allocation. Extending our previous example, we could have: + * + * free_region_start free_region_end + * | start end | + * v v v v + * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc + * -------- -------- -------- -------- -------- + * | | | | | + * | | | /-------/ | + * \-------\|/------/ |/---------------/ + * ||| || + * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow + * (1) (2) (3) + * + * Once again, we align the start of the free region up, and the end of + * the free region down so that the shadow is page aligned. So we can free + * page (1) - we know no allocation currently uses anything in that page, + * because all of it is in the vmalloc free region. But we cannot free + * page (3), because we can't be sure that the rest of it is unused. + * + * We only consider pages that contain part of the original region for + * freeing: we don't try to free other pages from the free region or we'd + * end up trying to free huge chunks of virtual address space. + * + * Concurrency + * ----------- + * + * How do we know that we're not freeing a page that is simultaneously + * being used for a fresh allocation in kasan_populate_vmalloc(_pte)? + * + * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running + * at the same time. While we run under free_vmap_area_lock, the population + * code does not. + * + * free_vmap_area_lock instead operates to ensure that the larger range + * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and + * the per-cpu region-finding algorithm both run under free_vmap_area_lock, + * no space identified as free will become used while we are running. This + * means that so long as we are careful with alignment and only free shadow + * pages entirely covered by the free region, we will not run in to any + * trouble - any simultaneous allocations will be for disjoint regions. + */ +void kasan_release_vmalloc(unsigned long start, unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) +{ + void *shadow_start, *shadow_end; + unsigned long region_start, region_end; + unsigned long size; + + region_start = ALIGN(start, PAGE_SIZE * KASAN_GRANULE_SIZE); + region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_GRANULE_SIZE); + + free_region_start = ALIGN(free_region_start, + PAGE_SIZE * KASAN_GRANULE_SIZE); + + if (start != region_start && + free_region_start < region_start) + region_start -= PAGE_SIZE * KASAN_GRANULE_SIZE; + + free_region_end = ALIGN_DOWN(free_region_end, + PAGE_SIZE * KASAN_GRANULE_SIZE); + + if (end != region_end && + free_region_end > region_end) + region_end += PAGE_SIZE * KASAN_GRANULE_SIZE; + + shadow_start = kasan_mem_to_shadow((void *)region_start); + shadow_end = kasan_mem_to_shadow((void *)region_end); + + if (shadow_end > shadow_start) { + size = shadow_end - shadow_start; + apply_to_existing_page_range(&init_mm, + (unsigned long)shadow_start, + size, kasan_depopulate_vmalloc_pte, + NULL); + flush_tlb_kernel_range((unsigned long)shadow_start, + (unsigned long)shadow_end); + } +} + +#else /* CONFIG_KASAN_VMALLOC */ + +int kasan_module_alloc(void *addr, size_t size) +{ + void *ret; + size_t scaled_size; + size_t shadow_size; + unsigned long shadow_start; + + shadow_start = (unsigned long)kasan_mem_to_shadow(addr); + scaled_size = (size + KASAN_GRANULE_SIZE - 1) >> + KASAN_SHADOW_SCALE_SHIFT; + shadow_size = round_up(scaled_size, PAGE_SIZE); + + if (WARN_ON(!PAGE_ALIGNED(shadow_start))) + return -EINVAL; + + ret = __vmalloc_node_range(shadow_size, 1, shadow_start, + shadow_start + shadow_size, + GFP_KERNEL, + PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, + __builtin_return_address(0)); + + if (ret) { + __memset(ret, KASAN_SHADOW_INIT, shadow_size); + find_vm_area(addr)->flags |= VM_KASAN; + kmemleak_ignore(ret); + return 0; + } + + return -ENOMEM; +} + +void kasan_free_shadow(const struct vm_struct *vm) +{ + if (vm->flags & VM_KASAN) + vfree(kasan_mem_to_shadow(vm->addr)); +} + +#endif -- GitLab From affc3f07759cfdcb1ffd87f2847b1c27d8781d65 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:35 -0800 Subject: [PATCH 1033/1894] kasan: define KASAN_MEMORY_PER_SHADOW_PAGE Define KASAN_MEMORY_PER_SHADOW_PAGE as (KASAN_GRANULE_SIZE << PAGE_SHIFT), which is the same as (KASAN_GRANULE_SIZE * PAGE_SIZE) for software modes that use shadow memory, and use it across KASAN code to simplify it. Link: https://lkml.kernel.org/r/8329391cfe14b5cffd3decf3b5c535b6ce21eef6.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/init.c | 10 ++++------ mm/kasan/kasan.h | 2 ++ mm/kasan/shadow.c | 16 +++++++--------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/mm/kasan/init.c b/mm/kasan/init.c index 1a71eaa8c5f90..bc0ad208b3a7a 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -441,9 +441,8 @@ void kasan_remove_zero_shadow(void *start, unsigned long size) addr = (unsigned long)kasan_mem_to_shadow(start); end = addr + (size >> KASAN_SHADOW_SCALE_SHIFT); - if (WARN_ON((unsigned long)start % - (KASAN_GRANULE_SIZE * PAGE_SIZE)) || - WARN_ON(size % (KASAN_GRANULE_SIZE * PAGE_SIZE))) + if (WARN_ON((unsigned long)start % KASAN_MEMORY_PER_SHADOW_PAGE) || + WARN_ON(size % KASAN_MEMORY_PER_SHADOW_PAGE)) return; for (; addr < end; addr = next) { @@ -476,9 +475,8 @@ int kasan_add_zero_shadow(void *start, unsigned long size) shadow_start = kasan_mem_to_shadow(start); shadow_end = shadow_start + (size >> KASAN_SHADOW_SCALE_SHIFT); - if (WARN_ON((unsigned long)start % - (KASAN_GRANULE_SIZE * PAGE_SIZE)) || - WARN_ON(size % (KASAN_GRANULE_SIZE * PAGE_SIZE))) + if (WARN_ON((unsigned long)start % KASAN_MEMORY_PER_SHADOW_PAGE) || + WARN_ON(size % KASAN_MEMORY_PER_SHADOW_PAGE)) return -EINVAL; ret = kasan_populate_early_shadow(shadow_start, shadow_end); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 53b095f56f28e..eec88bf28c641 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -8,6 +8,8 @@ #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #define KASAN_GRANULE_MASK (KASAN_GRANULE_SIZE - 1) +#define KASAN_MEMORY_PER_SHADOW_PAGE (KASAN_GRANULE_SIZE << PAGE_SHIFT) + #define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */ #define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */ #define KASAN_TAG_MAX 0xFD /* maximum value for random tags */ diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 5faba876456f6..ba84e51065852 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -161,7 +161,7 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb, shadow_end = shadow_start + shadow_size; if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) || - WARN_ON(start_kaddr % (KASAN_GRANULE_SIZE << PAGE_SHIFT))) + WARN_ON(start_kaddr % KASAN_MEMORY_PER_SHADOW_PAGE)) return NOTIFY_BAD; switch (action) { @@ -432,22 +432,20 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long region_start, region_end; unsigned long size; - region_start = ALIGN(start, PAGE_SIZE * KASAN_GRANULE_SIZE); - region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_GRANULE_SIZE); + region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE); + region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE); - free_region_start = ALIGN(free_region_start, - PAGE_SIZE * KASAN_GRANULE_SIZE); + free_region_start = ALIGN(free_region_start, KASAN_MEMORY_PER_SHADOW_PAGE); if (start != region_start && free_region_start < region_start) - region_start -= PAGE_SIZE * KASAN_GRANULE_SIZE; + region_start -= KASAN_MEMORY_PER_SHADOW_PAGE; - free_region_end = ALIGN_DOWN(free_region_end, - PAGE_SIZE * KASAN_GRANULE_SIZE); + free_region_end = ALIGN_DOWN(free_region_end, KASAN_MEMORY_PER_SHADOW_PAGE); if (end != region_end && free_region_end > region_end) - region_end += PAGE_SIZE * KASAN_GRANULE_SIZE; + region_end += KASAN_MEMORY_PER_SHADOW_PAGE; shadow_start = kasan_mem_to_shadow((void *)region_start); shadow_end = kasan_mem_to_shadow((void *)region_end); -- GitLab From 59fd51b2ba6b02e0f7c989fc12c3537988f8c8fe Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:39 -0800 Subject: [PATCH 1034/1894] kasan: rename report and tags files Rename generic_report.c to report_generic.c and tags_report.c to report_sw_tags.c, as their content is more relevant to report.c file. Also rename tags.c to sw_tags.c to better reflect that this file contains code for software tag-based mode. No functional changes. Link: https://lkml.kernel.org/r/a6105d416da97d389580015afed66c4c3cfd4c08.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/Makefile | 16 ++++++++-------- mm/kasan/report.c | 2 +- mm/kasan/{generic_report.c => report_generic.c} | 0 mm/kasan/{tags_report.c => report_sw_tags.c} | 0 mm/kasan/{tags.c => sw_tags.c} | 0 5 files changed, 9 insertions(+), 9 deletions(-) rename mm/kasan/{generic_report.c => report_generic.c} (100%) rename mm/kasan/{tags_report.c => report_sw_tags.c} (100%) rename mm/kasan/{tags.c => sw_tags.c} (100%) diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 7cc1031e1ef80..f1d68a34f3c90 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -6,13 +6,13 @@ KCOV_INSTRUMENT := n # Disable ftrace to avoid recursion. CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_generic.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_generic_report.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_report_generic.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_report_sw_tags.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_shadow.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_tags_report.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_sw_tags.o = $(CC_FLAGS_FTRACE) # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 @@ -23,14 +23,14 @@ CC_FLAGS_KASAN_RUNTIME += -DDISABLE_BRANCH_PROFILING CFLAGS_common.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_generic.o := $(CC_FLAGS_KASAN_RUNTIME) -CFLAGS_generic_report.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_init.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_quarantine.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_report.o := $(CC_FLAGS_KASAN_RUNTIME) +CFLAGS_report_generic.o := $(CC_FLAGS_KASAN_RUNTIME) +CFLAGS_report_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_shadow.o := $(CC_FLAGS_KASAN_RUNTIME) -CFLAGS_tags.o := $(CC_FLAGS_KASAN_RUNTIME) -CFLAGS_tags_report.o := $(CC_FLAGS_KASAN_RUNTIME) +CFLAGS_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME) obj-$(CONFIG_KASAN) := common.o report.o -obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o generic_report.o shadow.o quarantine.o -obj-$(CONFIG_KASAN_SW_TAGS) += init.o shadow.o tags.o tags_report.o +obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o report_generic.o shadow.o quarantine.o +obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o diff --git a/mm/kasan/report.c b/mm/kasan/report.c index d16ccbc7e4b2b..96a70041a6900 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This file contains common generic and tag-based KASAN error reporting code. + * This file contains common KASAN error reporting code. * * Copyright (c) 2014 Samsung Electronics Co., Ltd. * Author: Andrey Ryabinin diff --git a/mm/kasan/generic_report.c b/mm/kasan/report_generic.c similarity index 100% rename from mm/kasan/generic_report.c rename to mm/kasan/report_generic.c diff --git a/mm/kasan/tags_report.c b/mm/kasan/report_sw_tags.c similarity index 100% rename from mm/kasan/tags_report.c rename to mm/kasan/report_sw_tags.c diff --git a/mm/kasan/tags.c b/mm/kasan/sw_tags.c similarity index 100% rename from mm/kasan/tags.c rename to mm/kasan/sw_tags.c -- GitLab From ffcc5cea46c0c3dde4eeb101fdf3a37da43863de Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:42 -0800 Subject: [PATCH 1035/1894] kasan: don't duplicate config dependencies Both KASAN_GENERIC and KASAN_SW_TAGS have common dependencies, move those to KASAN. Link: https://lkml.kernel.org/r/c1cc0d562608a318c607afe22db5ec2a7af72e47.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 58dd3b86ef846..c0e9e78741227 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -24,6 +24,8 @@ menuconfig KASAN (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS) depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS + select CONSTRUCTORS + select STACKDEPOT help Enables KASAN (KernelAddressSANitizer) - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. @@ -46,10 +48,7 @@ choice config KASAN_GENERIC bool "Generic mode" depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC - depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) select SLUB_DEBUG if SLUB - select CONSTRUCTORS - select STACKDEPOT help Enables generic KASAN mode. @@ -70,10 +69,7 @@ config KASAN_GENERIC config KASAN_SW_TAGS bool "Software tag-based mode" depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS - depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) select SLUB_DEBUG if SLUB - select CONSTRUCTORS - select STACKDEPOT help Enables software tag-based KASAN mode. -- GitLab From 2cdbed63490d0d2bcbae60abcc5639caa5aba49b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:46 -0800 Subject: [PATCH 1036/1894] kasan: hide invalid free check implementation This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. For software KASAN modes the check is based on the value in the shadow memory. Hardware tag-based KASAN won't be using shadow, so hide the implementation of the check in check_invalid_free(). Also simplify the code for software tag-based mode. No functional changes for software modes. Link: https://lkml.kernel.org/r/d01534a4b977f97d87515dc590e6348e1406de81.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 19 +------------------ mm/kasan/generic.c | 7 +++++++ mm/kasan/kasan.h | 2 ++ mm/kasan/sw_tags.c | 9 +++++++++ 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 88f57346c7bd3..663ffa71cd20a 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -277,25 +277,9 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, return (void *)object; } -static inline bool shadow_invalid(u8 tag, s8 shadow_byte) -{ - if (IS_ENABLED(CONFIG_KASAN_GENERIC)) - return shadow_byte < 0 || - shadow_byte >= KASAN_GRANULE_SIZE; - - /* else CONFIG_KASAN_SW_TAGS: */ - if ((u8)shadow_byte == KASAN_TAG_INVALID) - return true; - if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte)) - return true; - - return false; -} - static bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip, bool quarantine) { - s8 shadow_byte; u8 tag; void *tagged_object; unsigned long rounded_up_size; @@ -314,8 +298,7 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return false; - shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); - if (shadow_invalid(tag, shadow_byte)) { + if (check_invalid_free(tagged_object)) { kasan_report_invalid_free(tagged_object, ip); return true; } diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 71ae070b80c6b..662212d103b57 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -187,6 +187,13 @@ bool check_memory_region(unsigned long addr, size_t size, bool write, return check_memory_region_inline(addr, size, write, ret_ip); } +bool check_invalid_free(void *addr) +{ + s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr)); + + return shadow_byte < 0 || shadow_byte >= KASAN_GRANULE_SIZE; +} + void kasan_cache_shrink(struct kmem_cache *cache) { quarantine_remove_cache(cache); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index eec88bf28c641..e5b5f60bc9638 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -166,6 +166,8 @@ void unpoison_range(const void *address, size_t size); bool check_memory_region(unsigned long addr, size_t size, bool write, unsigned long ret_ip); +bool check_invalid_free(void *addr); + void *find_first_bad_addr(void *addr, size_t size); const char *get_bug_type(struct kasan_access_info *info); diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index c0b3f327812b9..64540109c461c 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -121,6 +121,15 @@ bool check_memory_region(unsigned long addr, size_t size, bool write, return true; } +bool check_invalid_free(void *addr) +{ + u8 tag = get_tag(addr); + u8 shadow_byte = READ_ONCE(*(u8 *)kasan_mem_to_shadow(reset_tag(addr))); + + return (shadow_byte == KASAN_TAG_INVALID) || + (tag != KASAN_TAG_KERNEL && tag != shadow_byte); +} + #define DEFINE_HWASAN_LOAD_STORE(size) \ void __hwasan_load##size##_noabort(unsigned long addr) \ { \ -- GitLab From 97fc712232368ddeabd91cdabf40da9b2155c033 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:49 -0800 Subject: [PATCH 1037/1894] kasan: decode stack frame only with KASAN_STACK_ENABLE Decoding routines aren't needed when CONFIG_KASAN_STACK_ENABLE is not enabled. Currently only generic KASAN mode implements stack error reporting. No functional changes for software modes. Link: https://lkml.kernel.org/r/05a24db36f5ec876af876a299bbea98c29468ebd.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 6 ++ mm/kasan/report.c | 162 -------------------------------------- mm/kasan/report_generic.c | 162 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 162 deletions(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index e5b5f60bc9638..488ca1ff59791 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -171,6 +171,12 @@ bool check_invalid_free(void *addr); void *find_first_bad_addr(void *addr, size_t size); const char *get_bug_type(struct kasan_access_info *info); +#if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK +void print_address_stack_frame(const void *addr); +#else +static inline void print_address_stack_frame(const void *addr) { } +#endif + bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); void kasan_report_invalid_free(void *object, unsigned long ip); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 96a70041a6900..d140d26cfb316 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -211,168 +211,6 @@ static inline bool init_task_stack_addr(const void *addr) sizeof(init_thread_union.stack)); } -static bool __must_check tokenize_frame_descr(const char **frame_descr, - char *token, size_t max_tok_len, - unsigned long *value) -{ - const char *sep = strchr(*frame_descr, ' '); - - if (sep == NULL) - sep = *frame_descr + strlen(*frame_descr); - - if (token != NULL) { - const size_t tok_len = sep - *frame_descr; - - if (tok_len + 1 > max_tok_len) { - pr_err("KASAN internal error: frame description too long: %s\n", - *frame_descr); - return false; - } - - /* Copy token (+ 1 byte for '\0'). */ - strlcpy(token, *frame_descr, tok_len + 1); - } - - /* Advance frame_descr past separator. */ - *frame_descr = sep + 1; - - if (value != NULL && kstrtoul(token, 10, value)) { - pr_err("KASAN internal error: not a valid number: %s\n", token); - return false; - } - - return true; -} - -static void print_decoded_frame_descr(const char *frame_descr) -{ - /* - * We need to parse the following string: - * "n alloc_1 alloc_2 ... alloc_n" - * where alloc_i looks like - * "offset size len name" - * or "offset size len name:line". - */ - - char token[64]; - unsigned long num_objects; - - if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), - &num_objects)) - return; - - pr_err("\n"); - pr_err("this frame has %lu %s:\n", num_objects, - num_objects == 1 ? "object" : "objects"); - - while (num_objects--) { - unsigned long offset; - unsigned long size; - - /* access offset */ - if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), - &offset)) - return; - /* access size */ - if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), - &size)) - return; - /* name length (unused) */ - if (!tokenize_frame_descr(&frame_descr, NULL, 0, NULL)) - return; - /* object name */ - if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), - NULL)) - return; - - /* Strip line number; without filename it's not very helpful. */ - strreplace(token, ':', '\0'); - - /* Finally, print object information. */ - pr_err(" [%lu, %lu) '%s'", offset, offset + size, token); - } -} - -static bool __must_check get_address_stack_frame_info(const void *addr, - unsigned long *offset, - const char **frame_descr, - const void **frame_pc) -{ - unsigned long aligned_addr; - unsigned long mem_ptr; - const u8 *shadow_bottom; - const u8 *shadow_ptr; - const unsigned long *frame; - - BUILD_BUG_ON(IS_ENABLED(CONFIG_STACK_GROWSUP)); - - /* - * NOTE: We currently only support printing frame information for - * accesses to the task's own stack. - */ - if (!object_is_on_stack(addr)) - return false; - - aligned_addr = round_down((unsigned long)addr, sizeof(long)); - mem_ptr = round_down(aligned_addr, KASAN_GRANULE_SIZE); - shadow_ptr = kasan_mem_to_shadow((void *)aligned_addr); - shadow_bottom = kasan_mem_to_shadow(end_of_stack(current)); - - while (shadow_ptr >= shadow_bottom && *shadow_ptr != KASAN_STACK_LEFT) { - shadow_ptr--; - mem_ptr -= KASAN_GRANULE_SIZE; - } - - while (shadow_ptr >= shadow_bottom && *shadow_ptr == KASAN_STACK_LEFT) { - shadow_ptr--; - mem_ptr -= KASAN_GRANULE_SIZE; - } - - if (shadow_ptr < shadow_bottom) - return false; - - frame = (const unsigned long *)(mem_ptr + KASAN_GRANULE_SIZE); - if (frame[0] != KASAN_CURRENT_STACK_FRAME_MAGIC) { - pr_err("KASAN internal error: frame info validation failed; invalid marker: %lu\n", - frame[0]); - return false; - } - - *offset = (unsigned long)addr - (unsigned long)frame; - *frame_descr = (const char *)frame[1]; - *frame_pc = (void *)frame[2]; - - return true; -} - -static void print_address_stack_frame(const void *addr) -{ - unsigned long offset; - const char *frame_descr; - const void *frame_pc; - - if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - return; - - if (!get_address_stack_frame_info(addr, &offset, &frame_descr, - &frame_pc)) - return; - - /* - * get_address_stack_frame_info only returns true if the given addr is - * on the current task's stack. - */ - pr_err("\n"); - pr_err("addr %px is located in stack of task %s/%d at offset %lu in frame:\n", - addr, current->comm, task_pid_nr(current), offset); - pr_err(" %pS\n", frame_pc); - - if (!frame_descr) - return; - - print_decoded_frame_descr(frame_descr); -} - static void print_address_description(void *addr, u8 tag) { struct page *page = kasan_addr_to_page(addr); diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c index 7d5b9e5c7cfe8..b543a1ed6078f 100644 --- a/mm/kasan/report_generic.c +++ b/mm/kasan/report_generic.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -122,6 +123,167 @@ const char *get_bug_type(struct kasan_access_info *info) return get_wild_bug_type(info); } +#if CONFIG_KASAN_STACK +static bool __must_check tokenize_frame_descr(const char **frame_descr, + char *token, size_t max_tok_len, + unsigned long *value) +{ + const char *sep = strchr(*frame_descr, ' '); + + if (sep == NULL) + sep = *frame_descr + strlen(*frame_descr); + + if (token != NULL) { + const size_t tok_len = sep - *frame_descr; + + if (tok_len + 1 > max_tok_len) { + pr_err("KASAN internal error: frame description too long: %s\n", + *frame_descr); + return false; + } + + /* Copy token (+ 1 byte for '\0'). */ + strlcpy(token, *frame_descr, tok_len + 1); + } + + /* Advance frame_descr past separator. */ + *frame_descr = sep + 1; + + if (value != NULL && kstrtoul(token, 10, value)) { + pr_err("KASAN internal error: not a valid number: %s\n", token); + return false; + } + + return true; +} + +static void print_decoded_frame_descr(const char *frame_descr) +{ + /* + * We need to parse the following string: + * "n alloc_1 alloc_2 ... alloc_n" + * where alloc_i looks like + * "offset size len name" + * or "offset size len name:line". + */ + + char token[64]; + unsigned long num_objects; + + if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), + &num_objects)) + return; + + pr_err("\n"); + pr_err("this frame has %lu %s:\n", num_objects, + num_objects == 1 ? "object" : "objects"); + + while (num_objects--) { + unsigned long offset; + unsigned long size; + + /* access offset */ + if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), + &offset)) + return; + /* access size */ + if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), + &size)) + return; + /* name length (unused) */ + if (!tokenize_frame_descr(&frame_descr, NULL, 0, NULL)) + return; + /* object name */ + if (!tokenize_frame_descr(&frame_descr, token, sizeof(token), + NULL)) + return; + + /* Strip line number; without filename it's not very helpful. */ + strreplace(token, ':', '\0'); + + /* Finally, print object information. */ + pr_err(" [%lu, %lu) '%s'", offset, offset + size, token); + } +} + +static bool __must_check get_address_stack_frame_info(const void *addr, + unsigned long *offset, + const char **frame_descr, + const void **frame_pc) +{ + unsigned long aligned_addr; + unsigned long mem_ptr; + const u8 *shadow_bottom; + const u8 *shadow_ptr; + const unsigned long *frame; + + BUILD_BUG_ON(IS_ENABLED(CONFIG_STACK_GROWSUP)); + + /* + * NOTE: We currently only support printing frame information for + * accesses to the task's own stack. + */ + if (!object_is_on_stack(addr)) + return false; + + aligned_addr = round_down((unsigned long)addr, sizeof(long)); + mem_ptr = round_down(aligned_addr, KASAN_GRANULE_SIZE); + shadow_ptr = kasan_mem_to_shadow((void *)aligned_addr); + shadow_bottom = kasan_mem_to_shadow(end_of_stack(current)); + + while (shadow_ptr >= shadow_bottom && *shadow_ptr != KASAN_STACK_LEFT) { + shadow_ptr--; + mem_ptr -= KASAN_GRANULE_SIZE; + } + + while (shadow_ptr >= shadow_bottom && *shadow_ptr == KASAN_STACK_LEFT) { + shadow_ptr--; + mem_ptr -= KASAN_GRANULE_SIZE; + } + + if (shadow_ptr < shadow_bottom) + return false; + + frame = (const unsigned long *)(mem_ptr + KASAN_GRANULE_SIZE); + if (frame[0] != KASAN_CURRENT_STACK_FRAME_MAGIC) { + pr_err("KASAN internal error: frame info validation failed; invalid marker: %lu\n", + frame[0]); + return false; + } + + *offset = (unsigned long)addr - (unsigned long)frame; + *frame_descr = (const char *)frame[1]; + *frame_pc = (void *)frame[2]; + + return true; +} + +void print_address_stack_frame(const void *addr) +{ + unsigned long offset; + const char *frame_descr; + const void *frame_pc; + + if (!get_address_stack_frame_info(addr, &offset, &frame_descr, + &frame_pc)) + return; + + /* + * get_address_stack_frame_info only returns true if the given addr is + * on the current task's stack. + */ + pr_err("\n"); + pr_err("addr %px is located in stack of task %s/%d at offset %lu in frame:\n", + addr, current->comm, task_pid_nr(current), offset); + pr_err(" %pS\n", frame_pc); + + if (!frame_descr) + return; + + print_decoded_frame_descr(frame_descr); +} +#endif /* CONFIG_KASAN_STACK */ + #define DEFINE_ASAN_REPORT_LOAD(size) \ void __asan_report_load##size##_noabort(unsigned long addr) \ { \ -- GitLab From afe6ef80dcecf2cf7ccab0d94257b985e4c47d80 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:53 -0800 Subject: [PATCH 1038/1894] kasan, arm64: only init shadow for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Hardware tag-based KASAN won't be using shadow memory. Only initialize it when one of the software KASAN modes are enabled. No functional changes for software modes. Link: https://lkml.kernel.org/r/d1742eea2cd728d150d49b144e49b6433405c7ba.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/kasan.h | 8 ++++++-- arch/arm64/mm/kasan_init.c | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index b0dc4abc35891..f7ea70d02cab8 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -13,6 +13,12 @@ #define arch_kasan_get_tag(addr) __tag_get(addr) #ifdef CONFIG_KASAN +void kasan_init(void); +#else +static inline void kasan_init(void) { } +#endif + +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. @@ -33,12 +39,10 @@ #define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT))) #define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) -void kasan_init(void); void kasan_copy_shadow(pgd_t *pgdir); asmlinkage void kasan_early_init(void); #else -static inline void kasan_init(void) { } static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index b24e43d20667e..ffeb80d5aa8d4 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -21,6 +21,8 @@ #include #include +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); /* @@ -208,7 +210,7 @@ static void __init clear_pgds(unsigned long start, set_pgd(pgd_offset_k(start), __pgd(0)); } -void __init kasan_init(void) +static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; u64 mod_shadow_start, mod_shadow_end; @@ -269,6 +271,17 @@ void __init kasan_init(void) memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) */ + +static inline void __init kasan_init_shadow(void) { } + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +void __init kasan_init(void) +{ + kasan_init_shadow(); /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; -- GitLab From d73b49365ee65ac48074bdb5aa717bb4644dbbb7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:56 -0800 Subject: [PATCH 1039/1894] kasan, arm64: only use kasan_depth for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Hardware tag-based KASAN won't use kasan_depth. Only define and use it when one of the software KASAN modes are enabled. No functional changes for software modes. Link: https://lkml.kernel.org/r/e16f15aeda90bc7fb4dfc2e243a14b74cc5c8219.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/kasan_init.c | 11 ++++++++--- include/linux/kasan.h | 18 +++++++++--------- include/linux/sched.h | 2 +- init/init_task.c | 2 +- mm/kasan/common.c | 2 ++ mm/kasan/report.c | 2 ++ 6 files changed, 23 insertions(+), 14 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index ffeb80d5aa8d4..5172799f831f9 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -273,17 +273,22 @@ static void __init kasan_init_shadow(void) cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); } +static void __init kasan_init_depth(void) +{ + init_task.kasan_depth = 0; +} + #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) */ static inline void __init kasan_init_shadow(void) { } +static inline void __init kasan_init_depth(void) { } + #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ void __init kasan_init(void) { kasan_init_shadow(); - - /* At this point kasan is fully initialized. Enable error messages */ - init_task.kasan_depth = 0; + kasan_init_depth(); pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9740c06a04a14..b272960e8396b 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,6 +52,12 @@ static inline void *kasan_mem_to_shadow(const void *addr) int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); +/* Enable reporting bugs after kasan_disable_current() */ +extern void kasan_enable_current(void); + +/* Disable reporting bugs for current task */ +extern void kasan_disable_current(void); + #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline int kasan_add_zero_shadow(void *start, unsigned long size) @@ -62,16 +68,13 @@ static inline void kasan_remove_zero_shadow(void *start, unsigned long size) {} +static inline void kasan_enable_current(void) {} +static inline void kasan_disable_current(void) {} + #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN -/* Enable reporting bugs after kasan_disable_current() */ -extern void kasan_enable_current(void); - -/* Disable reporting bugs for current task */ -extern void kasan_disable_current(void); - void kasan_unpoison_range(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); @@ -122,9 +125,6 @@ static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} -static inline void kasan_enable_current(void) {} -static inline void kasan_disable_current(void) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} diff --git a/include/linux/sched.h b/include/linux/sched.h index 51d535b69bd6f..6e3a5eeec509a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1234,7 +1234,7 @@ struct task_struct { u64 timer_slack_ns; u64 default_timer_slack_ns; -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) unsigned int kasan_depth; #endif diff --git a/init/init_task.c b/init/init_task.c index 15f6eb93a04fa..8a992d73e6fb7 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -176,7 +176,7 @@ struct task_struct init_task .numa_group = NULL, .numa_faults = NULL, #endif -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) .kasan_depth = 1, #endif #ifdef CONFIG_KCSAN diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 663ffa71cd20a..d5f23b2f170ab 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -46,6 +46,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags) track->stack = kasan_save_stack(flags); } +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_enable_current(void) { current->kasan_depth++; @@ -55,6 +56,7 @@ void kasan_disable_current(void) { current->kasan_depth--; } +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ void kasan_unpoison_range(const void *address, size_t size) { diff --git a/mm/kasan/report.c b/mm/kasan/report.c index d140d26cfb316..914ab5cfc3ea2 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -292,8 +292,10 @@ static void print_shadow_for_address(const void *addr) static bool report_enabled(void) { +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) if (current->kasan_depth) return false; +#endif if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) return true; return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags); -- GitLab From 28ab35841ce0262b41074464d9fb6709bb26348f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:00 -0800 Subject: [PATCH 1040/1894] kasan, arm64: move initialization message Software tag-based KASAN mode is fully initialized with kasan_init_tags(), while the generic mode only requires kasan_init(). Move the initialization message for tag-based mode into kasan_init_tags(). Also fix pr_fmt() usage for KASAN code: generic.c doesn't need it as it doesn't use any printing functions; tag-based mode should use "kasan:" instead of KBUILD_MODNAME (which stands for file name). Link: https://lkml.kernel.org/r/29a30ea4e1750450dd1f693d25b7b6cb05913ecf.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/kasan.h | 9 +++------ arch/arm64/mm/kasan_init.c | 13 +++++-------- mm/kasan/generic.c | 2 -- mm/kasan/sw_tags.c | 4 +++- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index f7ea70d02cab8..0aaf9044cd6a5 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -12,14 +12,10 @@ #define arch_kasan_reset_tag(addr) __tag_reset(addr) #define arch_kasan_get_tag(addr) __tag_get(addr) -#ifdef CONFIG_KASAN -void kasan_init(void); -#else -static inline void kasan_init(void) { } -#endif - #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) +void kasan_init(void); + /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses, @@ -43,6 +39,7 @@ void kasan_copy_shadow(pgd_t *pgdir); asmlinkage void kasan_early_init(void); #else +static inline void kasan_init(void) { } static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 5172799f831f9..e35ce04beed1e 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -278,17 +278,14 @@ static void __init kasan_init_depth(void) init_task.kasan_depth = 0; } -#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) */ - -static inline void __init kasan_init_shadow(void) { } - -static inline void __init kasan_init_depth(void) { } - -#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ - void __init kasan_init(void) { kasan_init_shadow(); kasan_init_depth(); +#if defined(CONFIG_KASAN_GENERIC) + /* CONFIG_KASAN_SW_TAGS also requires kasan_init_tags(). */ pr_info("KernelAddressSanitizer initialized\n"); +#endif } + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 662212d103b57..707ea0a4c4c6e 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -9,8 +9,6 @@ * Andrey Konovalov */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include #include #include diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index 64540109c461c..9445cf4ccdc86 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -6,7 +6,7 @@ * Author: Andrey Konovalov */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define pr_fmt(fmt) "kasan: " fmt #include #include @@ -41,6 +41,8 @@ void kasan_init_tags(void) for_each_possible_cpu(cpu) per_cpu(prng_state, cpu) = (u32)get_cycles(); + + pr_info("KernelAddressSanitizer initialized\n"); } /* -- GitLab From 60a3a5fe950f4e6c02e9fc6676dc96de043ed743 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:03 -0800 Subject: [PATCH 1041/1894] kasan, arm64: rename kasan_init_tags and mark as __init Rename kasan_init_tags() to kasan_init_sw_tags() as the upcoming hardware tag-based KASAN mode will have its own initialization routine. Also similarly to kasan_init() mark kasan_init_tags() as __init. Link: https://lkml.kernel.org/r/71e52af72a09f4b50c8042f16101c60e50649fbb.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/setup.c | 2 +- arch/arm64/mm/kasan_init.c | 2 +- include/linux/kasan.h | 4 ++-- mm/kasan/sw_tags.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c44eb4b801634..c18aacde8bb0e 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -358,7 +358,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) smp_build_mpidr_hash(); /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_tags(); + kasan_init_sw_tags(); #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index e35ce04beed1e..d8e66c78440ec 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -283,7 +283,7 @@ void __init kasan_init(void) kasan_init_shadow(); kasan_init_depth(); #if defined(CONFIG_KASAN_GENERIC) - /* CONFIG_KASAN_SW_TAGS also requires kasan_init_tags(). */ + /* CONFIG_KASAN_SW_TAGS also requires kasan_init_sw_tags(). */ pr_info("KernelAddressSanitizer initialized\n"); #endif } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b272960e8396b..d7042d129dc1a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -192,7 +192,7 @@ static inline void kasan_record_aux_stack(void *ptr) {} #ifdef CONFIG_KASAN_SW_TAGS -void kasan_init_tags(void); +void __init kasan_init_sw_tags(void); void *kasan_reset_tag(const void *addr); @@ -201,7 +201,7 @@ bool kasan_report(unsigned long addr, size_t size, #else /* CONFIG_KASAN_SW_TAGS */ -static inline void kasan_init_tags(void) { } +static inline void kasan_init_sw_tags(void) { } static inline void *kasan_reset_tag(const void *addr) { diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index 9445cf4ccdc86..7317d5229b2b9 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU(u32, prng_state); -void kasan_init_tags(void) +void __init kasan_init_sw_tags(void) { int cpu; -- GitLab From 6882464faf74666dbce86b77686d78ff4e506af3 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:07 -0800 Subject: [PATCH 1042/1894] kasan: rename addr_has_shadow to addr_has_metadata This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Hardware tag-based KASAN won't be using shadow memory, but will reuse this function. Rename "shadow" to implementation-neutral "metadata". No functional changes. Link: https://lkml.kernel.org/r/370466fba590a4596b55ffd38adfd990f8886db4.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 2 +- mm/kasan/report.c | 6 +++--- mm/kasan/report_generic.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 488ca1ff59791..c79d30c6fcdbf 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -147,7 +147,7 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) << KASAN_SHADOW_SCALE_SHIFT); } -static inline bool addr_has_shadow(const void *addr) +static inline bool addr_has_metadata(const void *addr) { return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); } diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 914ab5cfc3ea2..a265c16c0f3f7 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -361,7 +361,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, untagged_addr = reset_tag(tagged_addr); info.access_addr = tagged_addr; - if (addr_has_shadow(untagged_addr)) + if (addr_has_metadata(untagged_addr)) info.first_bad_addr = find_first_bad_addr(tagged_addr, size); else info.first_bad_addr = untagged_addr; @@ -372,11 +372,11 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, start_report(&flags); print_error_description(&info); - if (addr_has_shadow(untagged_addr)) + if (addr_has_metadata(untagged_addr)) print_tags(get_tag(tagged_addr), info.first_bad_addr); pr_err("\n"); - if (addr_has_shadow(untagged_addr)) { + if (addr_has_metadata(untagged_addr)) { print_address_description(untagged_addr, get_tag(tagged_addr)); pr_err("\n"); print_shadow_for_address(info.first_bad_addr); diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c index b543a1ed6078f..16ed550850e9b 100644 --- a/mm/kasan/report_generic.c +++ b/mm/kasan/report_generic.c @@ -118,7 +118,7 @@ const char *get_bug_type(struct kasan_access_info *info) if (info->access_addr + info->access_size < info->access_addr) return "out-of-bounds"; - if (addr_has_shadow(info->access_addr)) + if (addr_has_metadata(info->access_addr)) return get_shadow_bug_type(info); return get_wild_bug_type(info); } -- GitLab From db3de8f759c80712dae456019968c19672589fdc Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:10 -0800 Subject: [PATCH 1043/1894] kasan: rename print_shadow_for_address to print_memory_metadata This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Hardware tag-based KASAN won't be using shadow memory, but will reuse this function. Rename "shadow" to implementation-neutral "metadata". No functional changes. Link: https://lkml.kernel.org/r/dd955c5aadaee16aef451a6189d19172166a23f5.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/report.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index a265c16c0f3f7..08825fe2ce246 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -252,7 +252,7 @@ static int shadow_pointer_offset(const void *row, const void *shadow) (shadow - row) / SHADOW_BYTES_PER_BLOCK + 1; } -static void print_shadow_for_address(const void *addr) +static void print_memory_metadata(const void *addr) { int i; const void *shadow = kasan_mem_to_shadow(addr); @@ -338,7 +338,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip) pr_err("\n"); print_address_description(object, tag); pr_err("\n"); - print_shadow_for_address(object); + print_memory_metadata(object); end_report(&flags); } @@ -379,7 +379,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, if (addr_has_metadata(untagged_addr)) { print_address_description(untagged_addr, get_tag(tagged_addr)); pr_err("\n"); - print_shadow_for_address(info.first_bad_addr); + print_memory_metadata(info.first_bad_addr); } else { dump_stack(); } -- GitLab From 88b865974d17059e9c9286f08efbebe569e3067b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:14 -0800 Subject: [PATCH 1044/1894] kasan: rename SHADOW layout macros to META This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Hardware tag-based KASAN won't be using shadow memory, but will reuse these macros. Rename "SHADOW" to implementation-neutral "META". No functional changes. Link: https://lkml.kernel.org/r/f96244ec59dc17db35173ec352c5592b14aefaf8.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/report.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 08825fe2ce246..4148a8bde1312 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -33,11 +33,11 @@ #include "kasan.h" #include "../slab.h" -/* Shadow layout customization. */ -#define SHADOW_BYTES_PER_BLOCK 1 -#define SHADOW_BLOCKS_PER_ROW 16 -#define SHADOW_BYTES_PER_ROW (SHADOW_BLOCKS_PER_ROW * SHADOW_BYTES_PER_BLOCK) -#define SHADOW_ROWS_AROUND_ADDR 2 +/* Metadata layout customization. */ +#define META_BYTES_PER_BLOCK 1 +#define META_BLOCKS_PER_ROW 16 +#define META_BYTES_PER_ROW (META_BLOCKS_PER_ROW * META_BYTES_PER_BLOCK) +#define META_ROWS_AROUND_ADDR 2 static unsigned long kasan_flags; @@ -240,7 +240,7 @@ static void print_address_description(void *addr, u8 tag) static bool row_is_guilty(const void *row, const void *guilty) { - return (row <= guilty) && (guilty < row + SHADOW_BYTES_PER_ROW); + return (row <= guilty) && (guilty < row + META_BYTES_PER_ROW); } static int shadow_pointer_offset(const void *row, const void *shadow) @@ -249,7 +249,7 @@ static int shadow_pointer_offset(const void *row, const void *shadow) * 3 + (BITS_PER_LONG/8)*2 chars. */ return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 + - (shadow - row) / SHADOW_BYTES_PER_BLOCK + 1; + (shadow - row) / META_BYTES_PER_BLOCK + 1; } static void print_memory_metadata(const void *addr) @@ -259,15 +259,15 @@ static void print_memory_metadata(const void *addr) const void *shadow_row; shadow_row = (void *)round_down((unsigned long)shadow, - SHADOW_BYTES_PER_ROW) - - SHADOW_ROWS_AROUND_ADDR * SHADOW_BYTES_PER_ROW; + META_BYTES_PER_ROW) + - META_ROWS_AROUND_ADDR * META_BYTES_PER_ROW; pr_err("Memory state around the buggy address:\n"); - for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) { + for (i = -META_ROWS_AROUND_ADDR; i <= META_ROWS_AROUND_ADDR; i++) { const void *kaddr = kasan_shadow_to_mem(shadow_row); char buffer[4 + (BITS_PER_LONG/8)*2]; - char shadow_buf[SHADOW_BYTES_PER_ROW]; + char shadow_buf[META_BYTES_PER_ROW]; snprintf(buffer, sizeof(buffer), (i == 0) ? ">%px: " : " %px: ", kaddr); @@ -276,17 +276,17 @@ static void print_memory_metadata(const void *addr) * function, because generic functions may try to * access kasan mapping for the passed address. */ - memcpy(shadow_buf, shadow_row, SHADOW_BYTES_PER_ROW); + memcpy(shadow_buf, shadow_row, META_BYTES_PER_ROW); print_hex_dump(KERN_ERR, buffer, - DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1, - shadow_buf, SHADOW_BYTES_PER_ROW, 0); + DUMP_PREFIX_NONE, META_BYTES_PER_ROW, 1, + shadow_buf, META_BYTES_PER_ROW, 0); if (row_is_guilty(shadow_row, shadow)) pr_err("%*c\n", shadow_pointer_offset(shadow_row, shadow), '^'); - shadow_row += SHADOW_BYTES_PER_ROW; + shadow_row += META_BYTES_PER_ROW; } } -- GitLab From 96e0279df6d8f2a1394de2b41815b0065c031950 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:17 -0800 Subject: [PATCH 1045/1894] kasan: separate metadata_fetch_row for each mode This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Rework print_memory_metadata() to make it agnostic with regard to the way metadata is stored. Allow providing a separate metadata_fetch_row() implementation for each KASAN mode. Hardware tag-based KASAN will provide its own implementation that doesn't use shadow memory. No functional changes for software modes. Link: https://lkml.kernel.org/r/5fb1ec0152bb1f521505017800387ec3e36ffe18.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 8 ++++++ mm/kasan/report.c | 56 +++++++++++++++++++-------------------- mm/kasan/report_generic.c | 5 ++++ mm/kasan/report_sw_tags.c | 5 ++++ 4 files changed, 45 insertions(+), 29 deletions(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index c79d30c6fcdbf..3b349a6e799dc 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -58,6 +58,13 @@ #define KASAN_ABI_VERSION 1 #endif +/* Metadata layout customization. */ +#define META_BYTES_PER_BLOCK 1 +#define META_BLOCKS_PER_ROW 16 +#define META_BYTES_PER_ROW (META_BLOCKS_PER_ROW * META_BYTES_PER_BLOCK) +#define META_MEM_BYTES_PER_ROW (META_BYTES_PER_ROW * KASAN_GRANULE_SIZE) +#define META_ROWS_AROUND_ADDR 2 + struct kasan_access_info { const void *access_addr; const void *first_bad_addr; @@ -170,6 +177,7 @@ bool check_invalid_free(void *addr); void *find_first_bad_addr(void *addr, size_t size); const char *get_bug_type(struct kasan_access_info *info); +void metadata_fetch_row(char *buffer, void *row); #if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK void print_address_stack_frame(const void *addr); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 4148a8bde1312..9a6569a57ca28 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -33,12 +33,6 @@ #include "kasan.h" #include "../slab.h" -/* Metadata layout customization. */ -#define META_BYTES_PER_BLOCK 1 -#define META_BLOCKS_PER_ROW 16 -#define META_BYTES_PER_ROW (META_BLOCKS_PER_ROW * META_BYTES_PER_BLOCK) -#define META_ROWS_AROUND_ADDR 2 - static unsigned long kasan_flags; #define KASAN_BIT_REPORTED 0 @@ -238,55 +232,59 @@ static void print_address_description(void *addr, u8 tag) print_address_stack_frame(addr); } -static bool row_is_guilty(const void *row, const void *guilty) +static bool meta_row_is_guilty(const void *row, const void *addr) { - return (row <= guilty) && (guilty < row + META_BYTES_PER_ROW); + return (row <= addr) && (addr < row + META_MEM_BYTES_PER_ROW); } -static int shadow_pointer_offset(const void *row, const void *shadow) +static int meta_pointer_offset(const void *row, const void *addr) { - /* The length of ">ff00ff00ff00ff00: " is - * 3 + (BITS_PER_LONG/8)*2 chars. + /* + * Memory state around the buggy address: + * ff00ff00ff00ff00: 00 00 00 05 fe fe fe fe fe fe fe fe fe fe fe fe + * ... + * + * The length of ">ff00ff00ff00ff00: " is + * 3 + (BITS_PER_LONG / 8) * 2 chars. + * The length of each granule metadata is 2 bytes + * plus 1 byte for space. */ - return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 + - (shadow - row) / META_BYTES_PER_BLOCK + 1; + return 3 + (BITS_PER_LONG / 8) * 2 + + (addr - row) / KASAN_GRANULE_SIZE * 3 + 1; } static void print_memory_metadata(const void *addr) { int i; - const void *shadow = kasan_mem_to_shadow(addr); - const void *shadow_row; + void *row; - shadow_row = (void *)round_down((unsigned long)shadow, - META_BYTES_PER_ROW) - - META_ROWS_AROUND_ADDR * META_BYTES_PER_ROW; + row = (void *)round_down((unsigned long)addr, META_MEM_BYTES_PER_ROW) + - META_ROWS_AROUND_ADDR * META_MEM_BYTES_PER_ROW; pr_err("Memory state around the buggy address:\n"); for (i = -META_ROWS_AROUND_ADDR; i <= META_ROWS_AROUND_ADDR; i++) { - const void *kaddr = kasan_shadow_to_mem(shadow_row); - char buffer[4 + (BITS_PER_LONG/8)*2]; - char shadow_buf[META_BYTES_PER_ROW]; + char buffer[4 + (BITS_PER_LONG / 8) * 2]; + char metadata[META_BYTES_PER_ROW]; snprintf(buffer, sizeof(buffer), - (i == 0) ? ">%px: " : " %px: ", kaddr); + (i == 0) ? ">%px: " : " %px: ", row); + /* * We should not pass a shadow pointer to generic * function, because generic functions may try to * access kasan mapping for the passed address. */ - memcpy(shadow_buf, shadow_row, META_BYTES_PER_ROW); + metadata_fetch_row(&metadata[0], row); + print_hex_dump(KERN_ERR, buffer, DUMP_PREFIX_NONE, META_BYTES_PER_ROW, 1, - shadow_buf, META_BYTES_PER_ROW, 0); + metadata, META_BYTES_PER_ROW, 0); - if (row_is_guilty(shadow_row, shadow)) - pr_err("%*c\n", - shadow_pointer_offset(shadow_row, shadow), - '^'); + if (meta_row_is_guilty(row, addr)) + pr_err("%*c\n", meta_pointer_offset(row, addr), '^'); - shadow_row += META_BYTES_PER_ROW; + row += META_MEM_BYTES_PER_ROW; } } diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c index 16ed550850e9b..8a9c889872da3 100644 --- a/mm/kasan/report_generic.c +++ b/mm/kasan/report_generic.c @@ -123,6 +123,11 @@ const char *get_bug_type(struct kasan_access_info *info) return get_wild_bug_type(info); } +void metadata_fetch_row(char *buffer, void *row) +{ + memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); +} + #if CONFIG_KASAN_STACK static bool __must_check tokenize_frame_descr(const char **frame_descr, char *token, size_t max_tok_len, diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c index c87d5a343b4ef..add2dfe6169c0 100644 --- a/mm/kasan/report_sw_tags.c +++ b/mm/kasan/report_sw_tags.c @@ -80,6 +80,11 @@ void *find_first_bad_addr(void *addr, size_t size) return p; } +void metadata_fetch_row(char *buffer, void *row) +{ + memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); +} + void print_tags(u8 addr_tag, const void *addr) { u8 *shadow = (u8 *)kasan_mem_to_shadow(addr); -- GitLab From 6a63a63ff1ac2959706dba218d5e17f9ec721c0c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:20 -0800 Subject: [PATCH 1046/1894] kasan: introduce CONFIG_KASAN_HW_TAGS This patch adds a configuration option for a new KASAN mode called hardware tag-based KASAN. This mode uses the memory tagging approach like the software tag-based mode, but relies on arm64 Memory Tagging Extension feature for tag management and access checking. Link: https://lkml.kernel.org/r/44906a209d3a44f9c6f5a21841e90988e365601e.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 61 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index c0e9e78741227..f5fa4ba126bf6 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,7 +6,10 @@ config HAVE_ARCH_KASAN config HAVE_ARCH_KASAN_SW_TAGS bool -config HAVE_ARCH_KASAN_VMALLOC +config HAVE_ARCH_KASAN_HW_TAGS + bool + +config HAVE_ARCH_KASAN_VMALLOC bool config CC_HAS_KASAN_GENERIC @@ -15,16 +18,19 @@ config CC_HAS_KASAN_GENERIC config CC_HAS_KASAN_SW_TAGS def_bool $(cc-option, -fsanitize=kernel-hwaddress) +# This option is only required for software KASAN modes. +# Old GCC versions don't have proper support for no_sanitize_address. +# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89124 for details. config CC_HAS_WORKING_NOSANITIZE_ADDRESS def_bool !CC_IS_GCC || GCC_VERSION >= 80300 menuconfig KASAN bool "KASAN: runtime memory debugger" - depends on (HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC) || \ - (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS) + depends on (((HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC) || \ + (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \ + CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \ + HAVE_ARCH_KASAN_HW_TAGS depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) - depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS - select CONSTRUCTORS select STACKDEPOT help Enables KASAN (KernelAddressSANitizer) - runtime memory debugger, @@ -37,18 +43,24 @@ choice prompt "KASAN mode" default KASAN_GENERIC help - KASAN has two modes: generic KASAN (similar to userspace ASan, - x86_64/arm64/xtensa, enabled with CONFIG_KASAN_GENERIC) and - software tag-based KASAN (a version based on software memory - tagging, arm64 only, similar to userspace HWASan, enabled with - CONFIG_KASAN_SW_TAGS). + KASAN has three modes: + 1. generic KASAN (similar to userspace ASan, + x86_64/arm64/xtensa, enabled with CONFIG_KASAN_GENERIC), + 2. software tag-based KASAN (arm64 only, based on software + memory tagging (similar to userspace HWASan), enabled with + CONFIG_KASAN_SW_TAGS), and + 3. hardware tag-based KASAN (arm64 only, based on hardware + memory tagging, enabled with CONFIG_KASAN_HW_TAGS). + + All KASAN modes are strictly debugging features. - Both generic and tag-based KASAN are strictly debugging features. + For better error reports enable CONFIG_STACKTRACE. config KASAN_GENERIC bool "Generic mode" depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC select SLUB_DEBUG if SLUB + select CONSTRUCTORS help Enables generic KASAN mode. @@ -61,8 +73,6 @@ config KASAN_GENERIC and introduces an overhead of ~x1.5 for the rest of the allocations. The performance slowdown is ~x3. - For better error detection enable CONFIG_STACKTRACE. - Currently CONFIG_KASAN_GENERIC doesn't work with CONFIG_DEBUG_SLAB (the resulting kernel does not boot). @@ -70,11 +80,15 @@ config KASAN_SW_TAGS bool "Software tag-based mode" depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS select SLUB_DEBUG if SLUB + select CONSTRUCTORS help Enables software tag-based KASAN mode. - This mode requires Top Byte Ignore support by the CPU and therefore - is only supported for arm64. This mode requires Clang. + This mode require software memory tagging support in the form of + HWASan-like compiler instrumentation. + + Currently this mode is only implemented for arm64 CPUs and relies on + Top Byte Ignore. This mode requires Clang. This mode consumes about 1/16th of available memory at kernel start and introduces an overhead of ~20% for the rest of the allocations. @@ -82,15 +96,27 @@ config KASAN_SW_TAGS casting and comparison, as it embeds tags into the top byte of each pointer. - For better error detection enable CONFIG_STACKTRACE. - Currently CONFIG_KASAN_SW_TAGS doesn't work with CONFIG_DEBUG_SLAB (the resulting kernel does not boot). +config KASAN_HW_TAGS + bool "Hardware tag-based mode" + depends on HAVE_ARCH_KASAN_HW_TAGS + depends on SLUB + help + Enables hardware tag-based KASAN mode. + + This mode requires hardware memory tagging support, and can be used + by any architecture that provides it. + + Currently this mode is only implemented for arm64 CPUs starting from + ARMv8.5 and relies on Memory Tagging Extension and Top Byte Ignore. + endchoice choice prompt "Instrumentation type" + depends on KASAN_GENERIC || KASAN_SW_TAGS default KASAN_OUTLINE config KASAN_OUTLINE @@ -114,6 +140,7 @@ endchoice config KASAN_STACK_ENABLE bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST + depends on KASAN_GENERIC || KASAN_SW_TAGS help The LLVM stack address sanitizer has a know problem that causes excessive stack usage in a lot of functions, see -- GitLab From f469c032c05e0572be806149307b45d0fc9ae706 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:24 -0800 Subject: [PATCH 1047/1894] arm64: enable armv8.5-a asm-arch option Hardware tag-based KASAN relies on Memory Tagging Extension (MTE) which is an armv8.5-a architecture extension. Enable the correct asm option when the compiler supports it in order to allow the usage of ALTERNATIVE()s with MTE instructions. Link: https://lkml.kernel.org/r/d03d1157124ea3532eaeb77507988733f5734986.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 4 ++++ arch/arm64/Makefile | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 81463eb537bbe..69f968adbcf64 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1571,6 +1571,9 @@ endmenu menu "ARMv8.5 architectural features" +config AS_HAS_ARMV8_5 + def_bool $(cc-option,-Wa$(comma)-march=armv8.5-a) + config ARM64_BTI bool "Branch Target Identification support" default y @@ -1645,6 +1648,7 @@ config ARM64_MTE bool "Memory Tagging Extension support" default y depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI + depends on AS_HAS_ARMV8_5 select ARCH_USES_HIGH_VMA_FLAGS help Memory Tagging (part of the ARMv8.5 Extensions) provides diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 6a87d592bd001..05f46a60b245e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -96,6 +96,11 @@ ifeq ($(CONFIG_AS_HAS_ARMV8_4), y) asm-arch := armv8.4-a endif +ifeq ($(CONFIG_AS_HAS_ARMV8_5), y) +# make sure to pass the newest target architecture to -march. +asm-arch := armv8.5-a +endif + ifdef asm-arch KBUILD_CFLAGS += -Wa,-march=$(asm-arch) \ -DARM64_ASM_ARCH='"$(asm-arch)"' -- GitLab From 85f49cae4dfcfae16f17418466e00370091de03d Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:28 -0800 Subject: [PATCH 1048/1894] arm64: mte: add in-kernel MTE helpers Provide helper functions to manipulate allocation and pointer tags for kernel addresses. Low-level helper functions (mte_assign_*, written in assembly) operate tag values from the [0x0, 0xF] range. High-level helper functions (mte_get/set_*) use the [0xF0, 0xFF] range to preserve compatibility with normal kernel pointers that have 0xFF in their top byte. MTE_GRANULE_SIZE and related definitions are moved to mte-def.h header that doesn't have any dependencies and is safe to include into any low-level header. Link: https://lkml.kernel.org/r/c31bf759b4411b2d98cdd801eb928e241584fd1f.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Co-developed-by: Andrey Konovalov Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/include/asm/mte-def.h | 15 ++++++++ arch/arm64/include/asm/mte-kasan.h | 56 ++++++++++++++++++++++++++++++ arch/arm64/include/asm/mte.h | 20 +++++++---- arch/arm64/kernel/mte.c | 48 +++++++++++++++++++++++++ arch/arm64/lib/mte.S | 16 +++++++++ 6 files changed, 150 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/include/asm/mte-def.h create mode 100644 arch/arm64/include/asm/mte-kasan.h diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 85a3e49f92f4a..29f97eb3dad41 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -106,6 +106,7 @@ #define ESR_ELx_FSC_TYPE (0x3C) #define ESR_ELx_FSC_LEVEL (0x03) #define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_MTE (0x11) #define ESR_ELx_FSC_SERROR (0x11) #define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h new file mode 100644 index 0000000000000..8401ac5840c71 --- /dev/null +++ b/arch/arm64/include/asm/mte-def.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_MTE_DEF_H +#define __ASM_MTE_DEF_H + +#define MTE_GRANULE_SIZE UL(16) +#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_TAG_SHIFT 56 +#define MTE_TAG_SIZE 4 +#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) +#define MTE_TAG_MAX (MTE_TAG_MASK >> MTE_TAG_SHIFT) + +#endif /* __ASM_MTE_DEF_H */ diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h new file mode 100644 index 0000000000000..3a70fb1807fdd --- /dev/null +++ b/arch/arm64/include/asm/mte-kasan.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_MTE_KASAN_H +#define __ASM_MTE_KASAN_H + +#include + +#ifndef __ASSEMBLY__ + +#include + +/* + * The functions below are meant to be used only for the + * KASAN_HW_TAGS interface defined in asm/memory.h. + */ +#ifdef CONFIG_ARM64_MTE + +static inline u8 mte_get_ptr_tag(void *ptr) +{ + /* Note: The format of KASAN tags is 0xF */ + u8 tag = 0xF0 | (u8)(((u64)(ptr)) >> MTE_TAG_SHIFT); + + return tag; +} + +u8 mte_get_mem_tag(void *addr); +u8 mte_get_random_tag(void); +void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag); + +#else /* CONFIG_ARM64_MTE */ + +static inline u8 mte_get_ptr_tag(void *ptr) +{ + return 0xFF; +} + +static inline u8 mte_get_mem_tag(void *addr) +{ + return 0xFF; +} +static inline u8 mte_get_random_tag(void) +{ + return 0xFF; +} +static inline void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag) +{ + return addr; +} + +#endif /* CONFIG_ARM64_MTE */ + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_MTE_KASAN_H */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 1c99fcadb58ca..cf1cd181dcb2a 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -5,14 +5,16 @@ #ifndef __ASM_MTE_H #define __ASM_MTE_H -#define MTE_GRANULE_SIZE UL(16) -#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) -#define MTE_TAG_SHIFT 56 -#define MTE_TAG_SIZE 4 +#include +#include + +#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n" #ifndef __ASSEMBLY__ +#include #include +#include #include @@ -45,7 +47,9 @@ long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, unsigned long addr, unsigned long data); -#else +void mte_assign_mem_tag_range(void *addr, size_t size); + +#else /* CONFIG_ARM64_MTE */ /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 @@ -80,7 +84,11 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, return -EIO; } -#endif +static inline void mte_assign_mem_tag_range(void *addr, size_t size) +{ +} + +#endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ #endif /* __ASM_MTE_H */ diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index ef15c8a2a49dc..78f81c459cb14 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -13,10 +13,13 @@ #include #include #include +#include #include +#include #include #include +#include #include #include @@ -72,6 +75,51 @@ int memcmp_pages(struct page *page1, struct page *page2) return ret; } +u8 mte_get_mem_tag(void *addr) +{ + if (!system_supports_mte()) + return 0xFF; + + asm(__MTE_PREAMBLE "ldg %0, [%0]" + : "+r" (addr)); + + return mte_get_ptr_tag(addr); +} + +u8 mte_get_random_tag(void) +{ + void *addr; + + if (!system_supports_mte()) + return 0xFF; + + asm(__MTE_PREAMBLE "irg %0, %0" + : "+r" (addr)); + + return mte_get_ptr_tag(addr); +} + +void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag) +{ + void *ptr = addr; + + if ((!system_supports_mte()) || (size == 0)) + return addr; + + /* Make sure that size is MTE granule aligned. */ + WARN_ON(size & (MTE_GRANULE_SIZE - 1)); + + /* Make sure that the address is MTE granule aligned. */ + WARN_ON((u64)addr & (MTE_GRANULE_SIZE - 1)); + + tag = 0xF0 | tag; + ptr = (void *)__tag_set(ptr, tag); + + mte_assign_mem_tag_range(ptr, size); + + return ptr; +} + static void update_sctlr_el1_tcf0(u64 tcf0) { /* ISB required for the kernel uaccess routines */ diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index 351537c12f36e..9e1a12e100533 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -149,3 +149,19 @@ SYM_FUNC_START(mte_restore_page_tags) ret SYM_FUNC_END(mte_restore_page_tags) + +/* + * Assign allocation tags for a region of memory based on the pointer tag + * x0 - source pointer + * x1 - size + * + * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and + * size must be non-zero and MTE_GRANULE_SIZE aligned. + */ +SYM_FUNC_START(mte_assign_mem_tag_range) +1: stg x0, [x0] + add x0, x0, #MTE_GRANULE_SIZE + subs x1, x1, #MTE_GRANULE_SIZE + b.gt 1b + ret +SYM_FUNC_END(mte_assign_mem_tag_range) -- GitLab From e5b8d9218951e59df986f627ec93569a0d22149b Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:31 -0800 Subject: [PATCH 1049/1894] arm64: mte: reset the page tag in page->flags The hardware tag-based KASAN for compatibility with the other modes stores the tag associated to a page in page->flags. Due to this the kernel faults on access when it allocates a page with an initial tag and the user changes the tags. Reset the tag associated by the kernel to a page in all the meaningful places to prevent kernel faults on access. Note: An alternative to this approach could be to modify page_to_virt(). This though could end up being racy, in fact if a CPU checks the PG_mte_tagged bit and decides that the page is not tagged but another CPU maps the same with PROT_MTE and becomes tagged the subsequent kernel access would fail. Link: https://lkml.kernel.org/r/9073d4e973747a6f78d5bdd7ebe17f290d087096.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/hibernate.c | 5 +++++ arch/arm64/kernel/mte.c | 9 +++++++++ arch/arm64/mm/copypage.c | 9 +++++++++ arch/arm64/mm/mteswap.c | 9 +++++++++ 4 files changed, 32 insertions(+) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 42003774d261d..9c9f47e9f7f47 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -371,6 +371,11 @@ static void swsusp_mte_restore_tags(void) unsigned long pfn = xa_state.xa_index; struct page *page = pfn_to_online_page(pfn); + /* + * It is not required to invoke page_kasan_tag_reset(page) + * at this point since the tags stored in page->flags are + * already restored. + */ mte_restore_page_tags(page_address(page), tags); mte_free_tag_storage(tags); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 78f81c459cb14..3a6216b0002ef 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -34,6 +34,15 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) return; } + page_kasan_tag_reset(page); + /* + * We need smp_wmb() in between setting the flags and clearing the + * tags because if another thread reads page->flags and builds a + * tagged address out of it, there is an actual dependency to the + * memory access, but on the current thread we do not guarantee that + * the new page->flags are visible before the tags were updated. + */ + smp_wmb(); mte_clear_page_tags(page_address(page)); } diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 70a71f38b6a9e..b5447e53cd73e 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -23,6 +23,15 @@ void copy_highpage(struct page *to, struct page *from) if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { set_bit(PG_mte_tagged, &to->flags); + page_kasan_tag_reset(to); + /* + * We need smp_wmb() in between setting the flags and clearing the + * tags because if another thread reads page->flags and builds a + * tagged address out of it, there is an actual dependency to the + * memory access, but on the current thread we do not guarantee that + * the new page->flags are visible before the tags were updated. + */ + smp_wmb(); mte_copy_page_tags(kto, kfrom); } } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index c52c1847079c1..7c4ef56265ee1 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -53,6 +53,15 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page) if (!tags) return false; + page_kasan_tag_reset(page); + /* + * We need smp_wmb() in between setting the flags and clearing the + * tags because if another thread reads page->flags and builds a + * tagged address out of it, there is an actual dependency to the + * memory access, but on the current thread we do not guarantee that + * the new page->flags are visible before the tags were updated. + */ + smp_wmb(); mte_restore_page_tags(page_address(page), tags); return true; -- GitLab From 98c970da8b35e919f985818eda7c1bcbcec8f4c4 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:35 -0800 Subject: [PATCH 1050/1894] arm64: mte: add in-kernel tag fault handler Add the implementation of the in-kernel fault handler. When a tag fault happens on a kernel address: * MTE is disabled on the current CPU, * the execution continues. When a tag fault happens on a user address: * the kernel executes do_bad_area() and panics. The tag fault handler for kernel addresses is currently empty and will be filled in by a future commit. Link: https://lkml.kernel.org/r/20201203102628.GB2224@gaia Link: https://lkml.kernel.org/r/ad31529b073e22840b7a2246172c2b67747ed7c4.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Co-developed-by: Andrey Konovalov Signed-off-by: Andrey Konovalov Signed-off-by: Catalin Marinas Reviewed-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon [catalin.marinas@arm.com: ensure CONFIG_ARM64_PAN is enabled with MTE] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/uaccess.h | 23 ++++++++++++++++ arch/arm64/mm/fault.c | 45 ++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 69f968adbcf64..a33718f8b62fb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1649,6 +1649,8 @@ config ARM64_MTE default y depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI depends on AS_HAS_ARMV8_5 + # Required for tag checking in the uaccess routines + depends on ARM64_PAN select ARCH_USES_HIGH_VMA_FLAGS help Memory Tagging (part of the ARMv8.5 Extensions) provides diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index abb31aa1f8cad..6f986e09a7815 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -159,8 +159,28 @@ static inline void __uaccess_enable_hw_pan(void) CONFIG_ARM64_PAN)); } +/* + * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0 + * affects EL0 and TCF affects EL1 irrespective of which TTBR is + * used. + * The kernel accesses TTBR0 usually with LDTR/STTR instructions + * when UAO is available, so these would act as EL0 accesses using + * TCF0. + * However futex.h code uses exclusives which would be executed as + * EL1, this can potentially cause a tag check fault even if the + * user disables TCF0. + * + * To address the problem we set the PSTATE.TCO bit in uaccess_enable() + * and reset it in uaccess_disable(). + * + * The Tag check override (TCO) bit disables temporarily the tag checking + * preventing the issue. + */ static inline void uaccess_disable_privileged(void) { + asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0), + ARM64_MTE, CONFIG_KASAN_HW_TAGS)); + if (uaccess_ttbr0_disable()) return; @@ -169,6 +189,9 @@ static inline void uaccess_disable_privileged(void) static inline void uaccess_enable_privileged(void) { + asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), + ARM64_MTE, CONFIG_KASAN_HW_TAGS)); + if (uaccess_ttbr0_enable()) return; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2848952b178d6..46ac15249eca8 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -296,6 +297,44 @@ static void die_kernel_fault(const char *msg, unsigned long addr, do_exit(SIGKILL); } +static void report_tag_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ +} + +static void do_tag_recovery(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + static bool reported; + + if (!READ_ONCE(reported)) { + report_tag_fault(addr, esr, regs); + WRITE_ONCE(reported, true); + } + + /* + * Disable MTE Tag Checking on the local CPU for the current EL. + * It will be done lazily on the other CPUs when they will hit a + * tag fault. + */ + sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE); + isb(); +} + +static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) +{ + unsigned int ec = ESR_ELx_EC(esr); + unsigned int fsc = esr & ESR_ELx_FSC; + + if (ec != ESR_ELx_EC_DABT_CUR) + return false; + + if (fsc == ESR_ELx_FSC_MTE) + return true; + + return false; +} + static void __do_kernel_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -312,6 +351,12 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr)) return; + if (is_el1_mte_sync_tag_check_fault(esr)) { + do_tag_recovery(addr, esr, regs); + + return; + } + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; -- GitLab From bfc62c5985274e926ee959dd3aaf999d4bdfbb1d Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:38 -0800 Subject: [PATCH 1051/1894] arm64: kasan: allow enabling in-kernel MTE Hardware tag-based KASAN relies on Memory Tagging Extension (MTE) feature and requires it to be enabled. MTE supports This patch adds a new mte_enable_kernel() helper, that enables MTE in Synchronous mode in EL1 and is intended to be called from KASAN runtime during initialization. The Tag Checking operation causes a synchronous data abort as a consequence of a tag check fault when MTE is configured in synchronous mode. As part of this change enable match-all tag for EL1 to allow the kernel to access user pages without faulting. This is required because the kernel does not have knowledge of the tags set by the user in a page. Note: For MTE, the TCF bit field in SCTLR_EL1 affects only EL1 in a similar way as TCF0 affects EL0. MTE that is built on top of the Top Byte Ignore (TBI) feature hence we enable it as part of this patch as well. Link: https://lkml.kernel.org/r/7352b0a0899af65c2785416c8ca6bf3845b66fa1.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Co-developed-by: Andrey Konovalov Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/mte-kasan.h | 6 ++++++ arch/arm64/kernel/mte.c | 7 +++++++ arch/arm64/mm/proc.S | 23 ++++++++++++++++++++--- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 3a70fb1807fdd..71ff6c6786ac4 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -29,6 +29,8 @@ u8 mte_get_mem_tag(void *addr); u8 mte_get_random_tag(void); void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag); +void mte_enable_kernel(void); + #else /* CONFIG_ARM64_MTE */ static inline u8 mte_get_ptr_tag(void *ptr) @@ -49,6 +51,10 @@ static inline void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag) return addr; } +static inline void mte_enable_kernel(void) +{ +} + #endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 3a6216b0002ef..fd88fe98bc100 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -129,6 +129,13 @@ void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag) return ptr; } +void mte_enable_kernel(void) +{ + /* Enable MTE Sync Mode for EL1. */ + sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_SYNC); + isb(); +} + static void update_sctlr_el1_tcf0(u64 tcf0) { /* ISB required for the kernel uaccess routines */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a0831bf8a0182..37a54b57178a7 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -40,9 +40,15 @@ #define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA #ifdef CONFIG_KASAN_SW_TAGS -#define TCR_KASAN_FLAGS TCR_TBI1 | TCR_TBID1 +#define TCR_KASAN_SW_FLAGS TCR_TBI1 | TCR_TBID1 #else -#define TCR_KASAN_FLAGS 0 +#define TCR_KASAN_SW_FLAGS 0 +#endif + +#ifdef CONFIG_KASAN_HW_TAGS +#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 +#else +#define TCR_KASAN_HW_FLAGS 0 #endif /* @@ -427,6 +433,10 @@ SYM_FUNC_START(__cpu_setup) */ mov_q x5, MAIR_EL1_SET #ifdef CONFIG_ARM64_MTE + mte_tcr .req x20 + + mov mte_tcr, #0 + /* * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported * (ID_AA64PFR1_EL1[11:8] > 1). @@ -447,6 +457,9 @@ SYM_FUNC_START(__cpu_setup) /* clear any pending tag check faults in TFSR*_EL1 */ msr_s SYS_TFSR_EL1, xzr msr_s SYS_TFSRE0_EL1, xzr + + /* set the TCR_EL1 bits */ + mov_q mte_tcr, TCR_KASAN_HW_FLAGS 1: #endif msr mair_el1, x5 @@ -456,7 +469,11 @@ SYM_FUNC_START(__cpu_setup) */ mov_q x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS + TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS +#ifdef CONFIG_ARM64_MTE + orr x10, x10, mte_tcr + .unreq mte_tcr +#endif tcr_clear_errata_bits x10, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 -- GitLab From 620954a67bcec6ca6b902baaaa1e3f2601b371a7 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:42 -0800 Subject: [PATCH 1052/1894] arm64: mte: convert gcr_user into an exclude mask The gcr_user mask is a per thread mask that represents the tags that are excluded from random generation when the Memory Tagging Extension is present and an 'irg' instruction is invoked. gcr_user affects the behavior on EL0 only. Currently that mask is an include mask and it is controlled by the user via prctl() while GCR_EL1 accepts an exclude mask. Convert the include mask into an exclude one to make it easier the register setting. Note: This change will affect gcr_kernel (for EL1) introduced with a future patch. Link: https://lkml.kernel.org/r/946dd31be833b660334c4f93410acf6d6c4cf3c4.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/kernel/mte.c | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 724249f37af5d..ca2cd75d32863 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -152,7 +152,7 @@ struct thread_struct { #endif #ifdef CONFIG_ARM64_MTE u64 sctlr_tcf0; - u64 gcr_user_incl; + u64 gcr_user_excl; #endif }; diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index fd88fe98bc100..2e1b1d14c0db5 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -156,23 +156,22 @@ static void set_sctlr_el1_tcf0(u64 tcf0) preempt_enable(); } -static void update_gcr_el1_excl(u64 incl) +static void update_gcr_el1_excl(u64 excl) { - u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK; /* - * Note that 'incl' is an include mask (controlled by the user via - * prctl()) while GCR_EL1 accepts an exclude mask. + * Note that the mask controlled by the user via prctl() is an + * include while GCR_EL1 accepts an exclude mask. * No need for ISB since this only affects EL0 currently, implicit * with ERET. */ sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); } -static void set_gcr_el1_excl(u64 incl) +static void set_gcr_el1_excl(u64 excl) { - current->thread.gcr_user_incl = incl; - update_gcr_el1_excl(incl); + current->thread.gcr_user_excl = excl; + update_gcr_el1_excl(excl); } void flush_mte_state(void) @@ -187,7 +186,7 @@ void flush_mte_state(void) /* disable tag checking */ set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); /* reset tag generation mask */ - set_gcr_el1_excl(0); + set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK); } void mte_thread_switch(struct task_struct *next) @@ -198,7 +197,7 @@ void mte_thread_switch(struct task_struct *next) /* avoid expensive SCTLR_EL1 accesses if no change */ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); - update_gcr_el1_excl(next->thread.gcr_user_incl); + update_gcr_el1_excl(next->thread.gcr_user_excl); } void mte_suspend_exit(void) @@ -206,13 +205,14 @@ void mte_suspend_exit(void) if (!system_supports_mte()) return; - update_gcr_el1_excl(current->thread.gcr_user_incl); + update_gcr_el1_excl(current->thread.gcr_user_excl); } long set_mte_ctrl(struct task_struct *task, unsigned long arg) { u64 tcf0; - u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; + u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) & + SYS_GCR_EL1_EXCL_MASK; if (!system_supports_mte()) return 0; @@ -233,10 +233,10 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) if (task != current) { task->thread.sctlr_tcf0 = tcf0; - task->thread.gcr_user_incl = gcr_incl; + task->thread.gcr_user_excl = gcr_excl; } else { set_sctlr_el1_tcf0(tcf0); - set_gcr_el1_excl(gcr_incl); + set_gcr_el1_excl(gcr_excl); } return 0; @@ -245,11 +245,12 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) long get_mte_ctrl(struct task_struct *task) { unsigned long ret; + u64 incl = ~task->thread.gcr_user_excl & SYS_GCR_EL1_EXCL_MASK; if (!system_supports_mte()) return 0; - ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT; + ret = incl << PR_MTE_TAG_SHIFT; switch (task->thread.sctlr_tcf0) { case SCTLR_EL1_TCF0_NONE: -- GitLab From bad1e1c663e0a72f9cf7b230a00d821678f80455 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:45 -0800 Subject: [PATCH 1053/1894] arm64: mte: switch GCR_EL1 in kernel entry and exit When MTE is present, the GCR_EL1 register contains the tags mask that allows to exclude tags from the random generation via the IRG instruction. With the introduction of the new Tag-Based KASAN API that provides a mechanism to reserve tags for special reasons, the MTE implementation has to make sure that the GCR_EL1 setting for the kernel does not affect the userspace processes and viceversa. Save and restore the kernel/user mask in GCR_EL1 in kernel entry and exit. Link: https://lkml.kernel.org/r/578b03294708cc7258fad0dc9c2a2e809e5a8214.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Co-developed-by: Andrey Konovalov Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/mte-def.h | 1 - arch/arm64/include/asm/mte-kasan.h | 5 ++++ arch/arm64/include/asm/mte.h | 2 ++ arch/arm64/kernel/asm-offsets.c | 3 +++ arch/arm64/kernel/entry.S | 41 ++++++++++++++++++++++++++++++ arch/arm64/kernel/mte.c | 31 +++++++++++++++++++--- 6 files changed, 79 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index 8401ac5840c71..2d73a1612f099 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -10,6 +10,5 @@ #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) -#define MTE_TAG_MAX (MTE_TAG_MASK >> MTE_TAG_SHIFT) #endif /* __ASM_MTE_DEF_H */ diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 71ff6c6786ac4..26349a4b5e2ea 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -30,6 +30,7 @@ u8 mte_get_random_tag(void); void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag); void mte_enable_kernel(void); +void mte_init_tags(u64 max_tag); #else /* CONFIG_ARM64_MTE */ @@ -55,6 +56,10 @@ static inline void mte_enable_kernel(void) { } +static inline void mte_init_tags(u64 max_tag) +{ +} + #endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index cf1cd181dcb2a..d02aff9f493d0 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -18,6 +18,8 @@ #include +extern u64 gcr_kernel_excl; + void mte_clear_page_tags(void *addr); unsigned long mte_copy_tags_from_user(void *to, const void __user *from, unsigned long n); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5e82488f1b828..f42fd9e339815 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -46,6 +46,9 @@ int main(void) #ifdef CONFIG_ARM64_PTR_AUTH DEFINE(THREAD_KEYS_USER, offsetof(struct task_struct, thread.keys_user)); DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel)); +#endif +#ifdef CONFIG_ARM64_MTE + DEFINE(THREAD_GCR_EL1_USER, offsetof(struct task_struct, thread.gcr_user_excl)); #endif BLANK(); DEFINE(S_X0, offsetof(struct pt_regs, regs[0])); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 51c7621560996..2a93fa5f4e49d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -173,6 +173,43 @@ alternative_else_nop_endif #endif .endm + .macro mte_set_gcr, tmp, tmp2 +#ifdef CONFIG_ARM64_MTE + /* + * Calculate and set the exclude mask preserving + * the RRND (bit[16]) setting. + */ + mrs_s \tmp2, SYS_GCR_EL1 + bfi \tmp2, \tmp, #0, #16 + msr_s SYS_GCR_EL1, \tmp2 + isb +#endif + .endm + + .macro mte_set_kernel_gcr, tmp, tmp2 +#ifdef CONFIG_KASAN_HW_TAGS +alternative_if_not ARM64_MTE + b 1f +alternative_else_nop_endif + ldr_l \tmp, gcr_kernel_excl + + mte_set_gcr \tmp, \tmp2 +1: +#endif + .endm + + .macro mte_set_user_gcr, tsk, tmp, tmp2 +#ifdef CONFIG_ARM64_MTE +alternative_if_not ARM64_MTE + b 1f +alternative_else_nop_endif + ldr \tmp, [\tsk, #THREAD_GCR_EL1_USER] + + mte_set_gcr \tmp, \tmp2 +1: +#endif + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -212,6 +249,8 @@ alternative_else_nop_endif ptrauth_keys_install_kernel tsk, x20, x22, x23 + mte_set_kernel_gcr x22, x23 + scs_load tsk, x20 .else add x21, sp, #S_FRAME_SIZE @@ -315,6 +354,8 @@ alternative_else_nop_endif /* No kernel C function calls after this as user keys are set. */ ptrauth_keys_install_user tsk, x0, x1, x2 + mte_set_user_gcr tsk, x0, x1 + apply_ssbd 0, x0, x1 .endif diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 2e1b1d14c0db5..dc9ada64feed8 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -23,6 +23,8 @@ #include #include +u64 gcr_kernel_excl __ro_after_init; + static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) { pte_t old_pte = READ_ONCE(*ptep); @@ -129,6 +131,26 @@ void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag) return ptr; } +void mte_init_tags(u64 max_tag) +{ + static bool gcr_kernel_excl_initialized; + + if (!gcr_kernel_excl_initialized) { + /* + * The format of the tags in KASAN is 0xFF and in MTE is 0xF. + * This conversion extracts an MTE tag from a KASAN tag. + */ + u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT, + max_tag), 0); + + gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK; + gcr_kernel_excl_initialized = true; + } + + /* Enable the kernel exclude mask for random tags generation. */ + write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1); +} + void mte_enable_kernel(void) { /* Enable MTE Sync Mode for EL1. */ @@ -171,7 +193,11 @@ static void update_gcr_el1_excl(u64 excl) static void set_gcr_el1_excl(u64 excl) { current->thread.gcr_user_excl = excl; - update_gcr_el1_excl(excl); + + /* + * SYS_GCR_EL1 will be set to current->thread.gcr_user_excl value + * by mte_set_user_gcr() in kernel_exit, + */ } void flush_mte_state(void) @@ -197,7 +223,6 @@ void mte_thread_switch(struct task_struct *next) /* avoid expensive SCTLR_EL1 accesses if no change */ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); - update_gcr_el1_excl(next->thread.gcr_user_excl); } void mte_suspend_exit(void) @@ -205,7 +230,7 @@ void mte_suspend_exit(void) if (!system_supports_mte()) return; - update_gcr_el1_excl(current->thread.gcr_user_excl); + update_gcr_el1_excl(gcr_kernel_excl); } long set_mte_ctrl(struct task_struct *task, unsigned long arg) -- GitLab From c746170d6a48b59d1233b375905f7faef6ce80bc Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:01:49 -0800 Subject: [PATCH 1054/1894] kasan, mm: untag page address in free_reserved_area free_reserved_area() memsets the pages belonging to a given memory area. As that memory hasn't been allocated via page_alloc, the KASAN tags that those pages have are 0x00. As the result the memset might result in a tag mismatch. Untag the address to avoid spurious faults. Link: https://lkml.kernel.org/r/ebef6425f4468d063e2f09c1b62ccbb2236b71d3.1606161801.git.andreyknvl@google.com Cc: Andrew Morton Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3beeb8d722f37..1887852fb934f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7671,6 +7671,11 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char * alias for the memset(). */ direct_map_addr = page_address(page); + /* + * Perform a kasan-unchecked memset() since this memory + * has not been initialized. + */ + direct_map_addr = kasan_reset_tag(direct_map_addr); if ((unsigned int)poison <= 0xFF) memset(direct_map_addr, poison, PAGE_SIZE); -- GitLab From dc09b29fd0718300fad79d327d275b6ffb6d3315 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:52 -0800 Subject: [PATCH 1055/1894] arm64: kasan: align allocations for HW_TAGS Hardware tag-based KASAN uses the memory tagging approach, which requires all allocations to be aligned to the memory granule size. Align the allocations to MTE_GRANULE_SIZE via ARCH_SLAB_MINALIGN when CONFIG_KASAN_HW_TAGS is enabled. Link: https://lkml.kernel.org/r/fe64131606b1c2aabfd34ae99554c0d9df18eb19.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/cache.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 63d43b5f82f67..77cbbe3625f21 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -6,6 +6,7 @@ #define __ASM_CACHE_H #include +#include #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK 3 @@ -51,6 +52,8 @@ #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) +#elif defined(CONFIG_KASAN_HW_TAGS) +#define ARCH_SLAB_MINALIGN MTE_GRANULE_SIZE #endif #ifndef __ASSEMBLY__ -- GitLab From ccbe2aaba1ed37441d8206a8c95b6199cbee2823 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:56 -0800 Subject: [PATCH 1056/1894] arm64: kasan: add arch layer for memory tagging helpers This patch add a set of arch_*() memory tagging helpers currently only defined for arm64 when hardware tag-based KASAN is enabled. These helpers will be used by KASAN runtime to implement the hardware tag-based mode. The arch-level indirection level is introduced to simplify adding hardware tag-based KASAN support for other architectures in the future by defining the appropriate arch_*() macros. Link: https://lkml.kernel.org/r/fc9e5bb71201c03131a2fc00a74125723568dda9.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/memory.h | 9 +++++++++ mm/kasan/kasan.h | 26 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 556cb2d62b5bb..3bc08e6cf82e7 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -230,6 +230,15 @@ static inline const void *__tag_set(const void *addr, u8 tag) return (const void *)(__addr | __tag_shifted(tag)); } +#ifdef CONFIG_KASAN_HW_TAGS +#define arch_enable_tagging() mte_enable_kernel() +#define arch_init_tags(max_tag) mte_init_tags(max_tag) +#define arch_get_random_tag() mte_get_random_tag() +#define arch_get_mem_tag(addr) mte_get_mem_tag(addr) +#define arch_set_mem_tag_range(addr, size, tag) \ + mte_set_mem_tag_range((addr), (size), (tag)) +#endif /* CONFIG_KASAN_HW_TAGS */ + /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 3b349a6e799dc..bc4f281561575 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -243,6 +243,32 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define reset_tag(addr) ((void *)arch_kasan_reset_tag(addr)) #define get_tag(addr) arch_kasan_get_tag(addr) +#ifdef CONFIG_KASAN_HW_TAGS + +#ifndef arch_enable_tagging +#define arch_enable_tagging() +#endif +#ifndef arch_init_tags +#define arch_init_tags(max_tag) +#endif +#ifndef arch_get_random_tag +#define arch_get_random_tag() (0xFF) +#endif +#ifndef arch_get_mem_tag +#define arch_get_mem_tag(addr) (0xFF) +#endif +#ifndef arch_set_mem_tag_range +#define arch_set_mem_tag_range(addr, size, tag) ((void *)(addr)) +#endif + +#define hw_enable_tagging() arch_enable_tagging() +#define hw_init_tags(max_tag) arch_init_tags(max_tag) +#define hw_get_random_tag() arch_get_random_tag() +#define hw_get_mem_tag(addr) arch_get_mem_tag(addr) +#define hw_set_mem_tag_range(addr, size, tag) arch_set_mem_tag_range((addr), (size), (tag)) + +#endif /* CONFIG_KASAN_HW_TAGS */ + /* * Exported functions for interfaces called from assembly or from generated * code. Declarations here to avoid warning about missing declarations. -- GitLab From 6c6a04fe367f58f066dd5da2c86087b4bd74365e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:59 -0800 Subject: [PATCH 1057/1894] kasan: define KASAN_GRANULE_SIZE for HW_TAGS Hardware tag-based KASAN has granules of MTE_GRANULE_SIZE. Define KASAN_GRANULE_SIZE to MTE_GRANULE_SIZE for CONFIG_KASAN_HW_TAGS. Link: https://lkml.kernel.org/r/3d15794b3d1b27447fd7fdf862c073192ba657bd.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index bc4f281561575..92cb2c16e3140 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -5,7 +5,13 @@ #include #include +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) +#else +#include +#define KASAN_GRANULE_SIZE MTE_GRANULE_SIZE +#endif + #define KASAN_GRANULE_MASK (KASAN_GRANULE_SIZE - 1) #define KASAN_MEMORY_PER_SHADOW_PAGE (KASAN_GRANULE_SIZE << PAGE_SHIFT) -- GitLab From 8a494023b80e29bb3638be18a6710a1c884ee68e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:03 -0800 Subject: [PATCH 1058/1894] kasan, x86, s390: update undef CONFIG_KASAN With the intoduction of hardware tag-based KASAN some kernel checks of this kind: ifdef CONFIG_KASAN will be updated to: if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) x86 and s390 use a trick to #undef CONFIG_KASAN for some of the code that isn't linked with KASAN runtime and shouldn't have any KASAN annotations. Also #undef CONFIG_KASAN_GENERIC with CONFIG_KASAN. Link: https://lkml.kernel.org/r/9d84bfaaf8fabe0fc89f913c9e420a30bd31a260.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Acked-by: Vasily Gorbik Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/boot/string.c | 1 + arch/x86/boot/compressed/misc.h | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index b11e8108773a5..faccb33b462cf 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -3,6 +3,7 @@ #include #include #undef CONFIG_KASAN +#undef CONFIG_KASAN_GENERIC #include "../lib/string.c" int strncmp(const char *cs, const char *ct, size_t count) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index d9a631c5973c7..901ea5ebec22a 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -12,6 +12,7 @@ #undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN +#undef CONFIG_KASAN_GENERIC /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 -- GitLab From 0fea6e9af889f1a4e072f5de999e07fe6859fc88 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:06 -0800 Subject: [PATCH 1059/1894] kasan, arm64: expand CONFIG_KASAN checks Some #ifdef CONFIG_KASAN checks are only relevant for software KASAN modes (either related to shadow memory or compiler instrumentation). Expand those into CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS. Link: https://lkml.kernel.org/r/e6971e432dbd72bb897ff14134ebb7e169bdcf0c.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 2 +- arch/arm64/Makefile | 2 +- arch/arm64/include/asm/assembler.h | 2 +- arch/arm64/include/asm/memory.h | 2 +- arch/arm64/include/asm/string.h | 5 +++-- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/image-vars.h | 2 +- arch/arm64/kernel/kaslr.c | 3 ++- arch/arm64/kernel/module.c | 6 ++++-- arch/arm64/mm/ptdump.c | 6 +++--- include/linux/kasan-checks.h | 2 +- include/linux/kasan.h | 7 ++++--- include/linux/moduleloader.h | 3 ++- include/linux/string.h | 2 +- mm/ptdump.c | 13 ++++++++----- scripts/Makefile.lib | 2 ++ 16 files changed, 36 insertions(+), 25 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a33718f8b62fb..9386b108b132f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -334,7 +334,7 @@ config BROKEN_GAS_INST config KASAN_SHADOW_OFFSET hex - depends on KASAN + depends on KASAN_GENERIC || KASAN_SW_TAGS default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 05f46a60b245e..6be9b37502503 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -137,7 +137,7 @@ head-y := arch/arm64/kernel/head.o ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 -else +else ifeq ($(CONFIG_KASAN_GENERIC), y) KASAN_SHADOW_SCALE_SHIFT := 3 endif diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ddbe6bf00e336..bf125c5911168 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -473,7 +473,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU #define NOKPROBE(x) #endif -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define EXPORT_SYMBOL_NOKASAN(name) #else #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 3bc08e6cf82e7..cd671fb6707c5 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -72,7 +72,7 @@ * address space for the shadow region respectively. They can bloat the stack * significantly, so double the (minimum) stack size when they are in use. */ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) #define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \ + KASAN_SHADOW_OFFSET) diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index b31e8e87a0db9..3a3264ff47b97 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -5,7 +5,7 @@ #ifndef __ASM_STRING_H #define __ASM_STRING_H -#ifndef CONFIG_KASAN +#if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) #define __HAVE_ARCH_STRRCHR extern char *strrchr(const char *, int c); @@ -48,7 +48,8 @@ extern void *__memset(void *, int, __kernel_size_t); void memcpy_flushcache(void *dst, const void *src, size_t cnt); #endif -#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(__SANITIZE_ADDRESS__) /* * For files that are not instrumented (e.g. mm/slub.c) we diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 42b23ce679dc7..a0dc987724eda 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -433,7 +433,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) bl __pi_memset dsb ishst // Make zero page visible to PTW -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bl kasan_early_init #endif #ifdef CONFIG_RANDOMIZE_BASE diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 39289d75118d7..f676243abac62 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -37,7 +37,7 @@ __efistub_strncmp = __pi_strncmp; __efistub_strrchr = __pi_strrchr; __efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc; -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) __efistub___memcpy = __pi_memcpy; __efistub___memmove = __pi_memmove; __efistub___memset = __pi_memset; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 0921aa1520b07..1c74c45b94942 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -161,7 +161,8 @@ u64 __init kaslr_early_init(u64 dt_phys) /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - if (IS_ENABLED(CONFIG_KASAN)) + if (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS)) /* * KASAN does not expect the module region to intersect the * vmalloc region, since shadow memory is allocated for each diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 2a1ad95d9b2cc..fe21e0f06492e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -30,7 +30,8 @@ void *module_alloc(unsigned long size) if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) gfp_mask |= __GFP_NOWARN; - if (IS_ENABLED(CONFIG_KASAN)) + if (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS)) /* don't exceed the static module region - see below */ module_alloc_end = MODULES_END; @@ -39,7 +40,8 @@ void *module_alloc(unsigned long size) NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - !IS_ENABLED(CONFIG_KASAN)) + !IS_ENABLED(CONFIG_KASAN_GENERIC) && + !IS_ENABLED(CONFIG_KASAN_SW_TAGS)) /* * KASAN can only deal with module allocations being served * from the reserved module region, since the remainder of diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 807dc634bbd24..04137a8f3d2de 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -29,7 +29,7 @@ enum address_markers_idx { PAGE_OFFSET_NR = 0, PAGE_END_NR, -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) KASAN_START_NR, #endif }; @@ -37,7 +37,7 @@ enum address_markers_idx { static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Linear Mapping start" }, { 0 /* PAGE_END */, "Linear Mapping end" }, -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, #endif @@ -383,7 +383,7 @@ void ptdump_check_wx(void) static int ptdump_init(void) { address_markers[PAGE_END_NR].start_address = PAGE_END; -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; #endif ptdump_initialize(); diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h index ac6aba632f2dd..ca5e89fb10d3d 100644 --- a/include/linux/kasan-checks.h +++ b/include/linux/kasan-checks.h @@ -9,7 +9,7 @@ * even in compilation units that selectively disable KASAN, but must use KASAN * to validate access to an address. Never use these in header files! */ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bool __kasan_check_read(const volatile void *p, unsigned int size); bool __kasan_check_write(const volatile void *p, unsigned int size); #else diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d7042d129dc1a..b1381ee6922a9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -238,7 +238,8 @@ static inline void kasan_release_vmalloc(unsigned long start, #endif /* CONFIG_KASAN_VMALLOC */ -#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) /* * These functions provide a special case to support backing module @@ -248,12 +249,12 @@ static inline void kasan_release_vmalloc(unsigned long start, int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); -#else /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ +#else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} -#endif /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ +#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 4fa67a8b22652..9e09d11ffe5b3 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -96,7 +96,8 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); -#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) #include #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else diff --git a/include/linux/string.h b/include/linux/string.h index 1cd63a8a23ab9..4fcfb56abcf5f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -267,7 +267,7 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr); extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp); extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy); diff --git a/mm/ptdump.c b/mm/ptdump.c index ba88ec43ff218..4354c1422d57c 100644 --- a/mm/ptdump.c +++ b/mm/ptdump.c @@ -4,7 +4,7 @@ #include #include -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) /* * This is an optimization for KASAN=y case. Since all kasan page tables * eventually point to the kasan_early_shadow_page we could call note_page() @@ -31,7 +31,8 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr, struct ptdump_state *st = walk->private; pgd_t val = READ_ONCE(*pgd); -#if CONFIG_PGTABLE_LEVELS > 4 && defined(CONFIG_KASAN) +#if CONFIG_PGTABLE_LEVELS > 4 && \ + (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) if (pgd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_p4d))) return note_kasan_page_table(walk, addr); #endif @@ -51,7 +52,8 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr, struct ptdump_state *st = walk->private; p4d_t val = READ_ONCE(*p4d); -#if CONFIG_PGTABLE_LEVELS > 3 && defined(CONFIG_KASAN) +#if CONFIG_PGTABLE_LEVELS > 3 && \ + (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) if (p4d_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pud))) return note_kasan_page_table(walk, addr); #endif @@ -71,7 +73,8 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr, struct ptdump_state *st = walk->private; pud_t val = READ_ONCE(*pud); -#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_KASAN) +#if CONFIG_PGTABLE_LEVELS > 2 && \ + (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) if (pud_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pmd))) return note_kasan_page_table(walk, addr); #endif @@ -91,7 +94,7 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr, struct ptdump_state *st = walk->private; pmd_t val = READ_ONCE(*pmd); -#if defined(CONFIG_KASAN) +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte))) return note_kasan_page_table(walk, addr); #endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 94133708889d7..213677a5ed33e 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -148,10 +148,12 @@ endif # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE) # ifeq ($(CONFIG_KASAN),y) +ifneq ($(CONFIG_KASAN_HW_TAGS),y) _c_flags += $(if $(patsubst n%,, \ $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ $(CFLAGS_KASAN), $(CFLAGS_KASAN_NOSANITIZE)) endif +endif ifeq ($(CONFIG_UBSAN),y) _c_flags += $(if $(patsubst n%,, \ -- GitLab From 2e903b91479782b7dedd869603423d77e079d3de Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:10 -0800 Subject: [PATCH 1060/1894] kasan, arm64: implement HW_TAGS runtime Provide implementation of KASAN functions required for the hardware tag-based mode. Those include core functions for memory and pointer tagging (tags_hw.c) and bug reporting (report_tags_hw.c). Also adapt common KASAN code to support the new mode. Link: https://lkml.kernel.org/r/cfd0fbede579a6b66755c98c88c108e54f9c56bf.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/memory.h | 4 +- arch/arm64/kernel/cpufeature.c | 3 ++ arch/arm64/kernel/smp.c | 2 + include/linux/kasan.h | 24 +++++++--- include/linux/mm.h | 2 +- include/linux/page-flags-layout.h | 2 +- mm/kasan/Makefile | 5 ++ mm/kasan/common.c | 15 +++--- mm/kasan/hw_tags.c | 80 +++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 19 ++++++-- mm/kasan/report_hw_tags.c | 42 ++++++++++++++++ mm/kasan/report_sw_tags.c | 2 +- mm/kasan/shadow.c | 2 +- mm/kasan/sw_tags.c | 2 +- 14 files changed, 178 insertions(+), 26 deletions(-) create mode 100644 mm/kasan/hw_tags.c create mode 100644 mm/kasan/report_hw_tags.c diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index cd671fb6707c5..18fce223b67b2 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -214,7 +214,7 @@ static inline unsigned long kaslr_offset(void) (__force __typeof__(addr))__addr; \ }) -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) #define __tag_shifted(tag) ((u64)(tag) << 56) #define __tag_reset(addr) __untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) @@ -222,7 +222,7 @@ static inline unsigned long kaslr_offset(void) #define __tag_shifted(tag) 0UL #define __tag_reset(addr) (addr) #define __tag_get(addr) 0 -#endif /* CONFIG_KASAN_SW_TAGS */ +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ static inline const void *__tag_set(const void *addr, u8 tag) { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d87cfc6246e08..7ffb5f1d8b682 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -1710,6 +1711,8 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) cleared_zero_page = true; mte_clear_page_tags(lm_alias(empty_zero_page)); } + + kasan_init_hw_tags_cpu(); } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2499b895efea4..19b1705ae5cb6 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -462,6 +462,8 @@ void __init smp_prepare_boot_cpu(void) /* Conditionally switch to GIC PMR for interrupt masking */ if (system_uses_irq_prio_masking()) init_gic_priority_masking(); + + kasan_init_hw_tags(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b1381ee6922a9..d22ec4c9c1bd1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -190,25 +190,35 @@ static inline void kasan_record_aux_stack(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ -#ifdef CONFIG_KASAN_SW_TAGS - -void __init kasan_init_sw_tags(void); +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) void *kasan_reset_tag(const void *addr); bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -#else /* CONFIG_KASAN_SW_TAGS */ - -static inline void kasan_init_sw_tags(void) { } +#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ static inline void *kasan_reset_tag(const void *addr) { return (void *)addr; } -#endif /* CONFIG_KASAN_SW_TAGS */ +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/ + +#ifdef CONFIG_KASAN_SW_TAGS +void __init kasan_init_sw_tags(void); +#else +static inline void kasan_init_sw_tags(void) { } +#endif + +#ifdef CONFIG_KASAN_HW_TAGS +void kasan_init_hw_tags_cpu(void); +void __init kasan_init_hw_tags(void); +#else +static inline void kasan_init_hw_tags_cpu(void) { } +static inline void kasan_init_hw_tags(void) { } +#endif #ifdef CONFIG_KASAN_VMALLOC diff --git a/include/linux/mm.h b/include/linux/mm.h index 362579ad0758b..024ec0a00c72c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1421,7 +1421,7 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) } #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) static inline u8 page_kasan_tag(const struct page *page) { return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index e200eef6a7fde..7d4ec26d8a3ed 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -77,7 +77,7 @@ #define LAST_CPUPID_SHIFT 0 #endif -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) #define KASAN_TAG_WIDTH 8 #else #define KASAN_TAG_WIDTH 0 diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index f1d68a34f3c90..9fe39a66388a8 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -10,8 +10,10 @@ CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report_generic.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_report_hw_tags.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report_sw_tags.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_shadow.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_hw_tags.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sw_tags.o = $(CC_FLAGS_FTRACE) # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 @@ -27,10 +29,13 @@ CFLAGS_init.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_quarantine.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_report.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_report_generic.o := $(CC_FLAGS_KASAN_RUNTIME) +CFLAGS_report_hw_tags.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_report_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_shadow.o := $(CC_FLAGS_KASAN_RUNTIME) +CFLAGS_hw_tags.o := $(CC_FLAGS_KASAN_RUNTIME) CFLAGS_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME) obj-$(CONFIG_KASAN) := common.o report.o obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o report_generic.o shadow.o quarantine.o +obj-$(CONFIG_KASAN_HW_TAGS) += hw_tags.o report_hw_tags.o obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o diff --git a/mm/kasan/common.c b/mm/kasan/common.c index d5f23b2f170ab..02613883846ed 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -118,7 +118,7 @@ void kasan_free_pages(struct page *page, unsigned int order) */ static inline unsigned int optimal_redzone(unsigned int object_size) { - if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) return 0; return @@ -183,14 +183,14 @@ size_t kasan_metadata_size(struct kmem_cache *cache) struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, const void *object) { - return (void *)object + cache->kasan_info.alloc_meta_offset; + return (void *)reset_tag(object) + cache->kasan_info.alloc_meta_offset; } struct kasan_free_meta *get_free_info(struct kmem_cache *cache, const void *object) { BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32); - return (void *)object + cache->kasan_info.free_meta_offset; + return (void *)reset_tag(object) + cache->kasan_info.free_meta_offset; } void kasan_poison_slab(struct page *page) @@ -272,9 +272,8 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, alloc_info = get_alloc_info(cache, object); __memset(alloc_info, 0, sizeof(*alloc_info)); - if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - object = set_tag(object, - assign_tag(cache, object, true, false)); + if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) || IS_ENABLED(CONFIG_KASAN_HW_TAGS)) + object = set_tag(object, assign_tag(cache, object, true, false)); return (void *)object; } @@ -342,10 +341,10 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, redzone_end = round_up((unsigned long)object + cache->object_size, KASAN_GRANULE_SIZE); - if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) || IS_ENABLED(CONFIG_KASAN_HW_TAGS)) tag = assign_tag(cache, object, false, keep_tag); - /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */ + /* Tag is ignored in set_tag without CONFIG_KASAN_SW/HW_TAGS */ unpoison_range(set_tag(object, tag), size); poison_range((void *)redzone_start, redzone_end - redzone_start, KASAN_KMALLOC_REDZONE); diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c new file mode 100644 index 0000000000000..66419e908e21a --- /dev/null +++ b/mm/kasan/hw_tags.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file contains core hardware tag-based KASAN code. + * + * Copyright (c) 2020 Google, Inc. + * Author: Andrey Konovalov + */ + +#define pr_fmt(fmt) "kasan: " fmt + +#include +#include +#include +#include +#include +#include + +#include "kasan.h" + +/* kasan_init_hw_tags_cpu() is called for each CPU. */ +void kasan_init_hw_tags_cpu(void) +{ + hw_init_tags(KASAN_TAG_MAX); + hw_enable_tagging(); +} + +/* kasan_init_hw_tags() is called once on boot CPU. */ +void __init kasan_init_hw_tags(void) +{ + pr_info("KernelAddressSanitizer initialized\n"); +} + +void *kasan_reset_tag(const void *addr) +{ + return reset_tag(addr); +} + +void poison_range(const void *address, size_t size, u8 value) +{ + hw_set_mem_tag_range(reset_tag(address), + round_up(size, KASAN_GRANULE_SIZE), value); +} + +void unpoison_range(const void *address, size_t size) +{ + hw_set_mem_tag_range(reset_tag(address), + round_up(size, KASAN_GRANULE_SIZE), get_tag(address)); +} + +u8 random_tag(void) +{ + return hw_get_random_tag(); +} + +bool check_invalid_free(void *addr) +{ + u8 ptr_tag = get_tag(addr); + u8 mem_tag = hw_get_mem_tag(addr); + + return (mem_tag == KASAN_TAG_INVALID) || + (ptr_tag != KASAN_TAG_KERNEL && ptr_tag != mem_tag); +} + +void kasan_set_free_info(struct kmem_cache *cache, + void *object, u8 tag) +{ + struct kasan_alloc_meta *alloc_meta; + + alloc_meta = get_alloc_info(cache, object); + kasan_set_track(&alloc_meta->free_track[0], GFP_NOWAIT); +} + +struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, + void *object, u8 tag) +{ + struct kasan_alloc_meta *alloc_meta; + + alloc_meta = get_alloc_info(cache, object); + return &alloc_meta->free_track[0]; +} diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 92cb2c16e3140..64560cc711910 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -154,6 +154,11 @@ struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, struct kasan_free_meta *get_free_info(struct kmem_cache *cache, const void *object); +void poison_range(const void *address, size_t size, u8 value); +void unpoison_range(const void *address, size_t size); + +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) @@ -165,9 +170,6 @@ static inline bool addr_has_metadata(const void *addr) return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); } -void poison_range(const void *address, size_t size, u8 value); -void unpoison_range(const void *address, size_t size); - /** * check_memory_region - Check memory region, and report if invalid access. * @addr: the accessed address @@ -179,6 +181,15 @@ void unpoison_range(const void *address, size_t size); bool check_memory_region(unsigned long addr, size_t size, bool write, unsigned long ret_ip); +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +static inline bool addr_has_metadata(const void *addr) +{ + return true; +} + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + bool check_invalid_free(void *addr); void *find_first_bad_addr(void *addr, size_t size); @@ -215,7 +226,7 @@ static inline void quarantine_reduce(void) { } static inline void quarantine_remove_cache(struct kmem_cache *cache) { } #endif -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) void print_tags(u8 addr_tag, const void *addr); diff --git a/mm/kasan/report_hw_tags.c b/mm/kasan/report_hw_tags.c new file mode 100644 index 0000000000000..da543eb832cd6 --- /dev/null +++ b/mm/kasan/report_hw_tags.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file contains hardware tag-based KASAN specific error reporting code. + * + * Copyright (c) 2020 Google, Inc. + * Author: Andrey Konovalov + */ + +#include +#include +#include +#include +#include +#include + +#include "kasan.h" + +const char *get_bug_type(struct kasan_access_info *info) +{ + return "invalid-access"; +} + +void *find_first_bad_addr(void *addr, size_t size) +{ + return reset_tag(addr); +} + +void metadata_fetch_row(char *buffer, void *row) +{ + int i; + + for (i = 0; i < META_BYTES_PER_ROW; i++) + buffer[i] = hw_get_mem_tag(row + i * KASAN_GRANULE_SIZE); +} + +void print_tags(u8 addr_tag, const void *addr) +{ + u8 memory_tag = hw_get_mem_tag((void *)addr); + + pr_err("Pointer tag: [%02x], memory tag: [%02x]\n", + addr_tag, memory_tag); +} diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c index add2dfe6169c0..aebc44a29e83c 100644 --- a/mm/kasan/report_sw_tags.c +++ b/mm/kasan/report_sw_tags.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This file contains tag-based KASAN specific error reporting code. + * This file contains software tag-based KASAN specific error reporting code. * * Copyright (c) 2014 Samsung Electronics Co., Ltd. * Author: Andrey Ryabinin diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index ba84e51065852..ac6a5f57df33d 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -107,7 +107,7 @@ void unpoison_range(const void *address, size_t size) if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) *shadow = tag; - else + else /* CONFIG_KASAN_GENERIC */ *shadow = size & KASAN_GRANULE_MASK; } } diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index 7317d5229b2b9..a518483f39653 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This file contains core tag-based KASAN code. + * This file contains core software tag-based KASAN code. * * Copyright (c) 2018 Google, Inc. * Author: Andrey Konovalov -- GitLab From 4291e9ee618956eeae02b17f35b272193a6cda5a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:13 -0800 Subject: [PATCH 1061/1894] kasan, arm64: print report from tag fault handler Add error reporting for hardware tag-based KASAN. When CONFIG_KASAN_HW_TAGS is enabled, print KASAN report from the arm64 tag fault handler. SAS bits aren't set in ESR for all faults reported in EL1, so it's impossible to find out the size of the access the caused the fault. Adapt KASAN reporting code to handle this case. Link: https://lkml.kernel.org/r/b559c82b6a969afedf53b4694b475f0234067a1a.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/fault.c | 14 ++++++++++++++ mm/kasan/report.c | 11 ++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 46ac15249eca8..3c40da479899d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -297,10 +298,23 @@ static void die_kernel_fault(const char *msg, unsigned long addr, do_exit(SIGKILL); } +#ifdef CONFIG_KASAN_HW_TAGS static void report_tag_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { + bool is_write = ((esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT) != 0; + + /* + * SAS bits aren't set for all faults reported in EL1, so we can't + * find out access size. + */ + kasan_report(addr, 0, is_write, regs->pc); } +#else +/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */ +static inline void report_tag_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { } +#endif static void do_tag_recovery(unsigned long addr, unsigned int esr, struct pt_regs *regs) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 9a6569a57ca28..929fcb2a02046 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -62,9 +62,14 @@ static void print_error_description(struct kasan_access_info *info) { pr_err("BUG: KASAN: %s in %pS\n", get_bug_type(info), (void *)info->ip); - pr_err("%s of size %zu at addr %px by task %s/%d\n", - info->is_write ? "Write" : "Read", info->access_size, - info->access_addr, current->comm, task_pid_nr(current)); + if (info->access_size) + pr_err("%s of size %zu at addr %px by task %s/%d\n", + info->is_write ? "Write" : "Read", info->access_size, + info->access_addr, current->comm, task_pid_nr(current)); + else + pr_err("%s at addr %px by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_addr, current->comm, task_pid_nr(current)); } static DEFINE_SPINLOCK(report_lock); -- GitLab From aa1ef4d7b3f67f7f17aa4aa34f5ec513c7e4db6c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:17 -0800 Subject: [PATCH 1062/1894] kasan, mm: reset tags when accessing metadata Kernel allocator code accesses metadata for slab objects, that may lie out-of-bounds of the object itself, or be accessed when an object is freed. Such accesses trigger tag faults and lead to false-positive reports with hardware tag-based KASAN. Software KASAN modes disable instrumentation for allocator code via KASAN_SANITIZE Makefile macro, and rely on kasan_enable/disable_current() annotations which are used to ignore KASAN reports. With hardware tag-based KASAN neither of those options are available, as it doesn't use compiler instrumetation, no tag faults are ignored, and MTE is disabled after the first one. Instead, reset tags when accessing metadata (currently only for SLUB). Link: https://lkml.kernel.org/r/a0f3cefbc49f34c843b664110842de4db28179d0.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Acked-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 +++- mm/page_poison.c | 2 +- mm/slub.c | 29 ++++++++++++++++------------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1887852fb934f..7a2c89b211150 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1204,8 +1204,10 @@ static void kernel_init_free_pages(struct page *page, int numpages) /* s390's use of memset() could override KASAN redzones. */ kasan_disable_current(); - for (i = 0; i < numpages; i++) + for (i = 0; i < numpages; i++) { + page_kasan_tag_reset(page + i); clear_highpage(page + i); + } kasan_enable_current(); } diff --git a/mm/page_poison.c b/mm/page_poison.c index 06ec518b20894..65cdf844c8adb 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -25,7 +25,7 @@ static void poison_page(struct page *page) /* KASAN still think the page is in-use, so skip it. */ kasan_disable_current(); - memset(addr, PAGE_POISON, PAGE_SIZE); + memset(kasan_reset_tag(addr), PAGE_POISON, PAGE_SIZE); kasan_enable_current(); kunmap_atomic(addr); } diff --git a/mm/slub.c b/mm/slub.c index 4552319148f68..0c8b43a5b3b03 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -249,7 +249,7 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, { #ifdef CONFIG_SLAB_FREELIST_HARDENED /* - * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged. + * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged. * Normally, this doesn't cause any issues, as both set_freepointer() * and get_freepointer() are called with a pointer with the same tag. * However, there are some issues with CONFIG_SLUB_DEBUG code. For @@ -275,6 +275,7 @@ static inline void *freelist_dereference(const struct kmem_cache *s, static inline void *get_freepointer(struct kmem_cache *s, void *object) { + object = kasan_reset_tag(object); return freelist_dereference(s, object + s->offset); } @@ -304,6 +305,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) BUG_ON(object == fp); /* naive detection of double free or corruption */ #endif + freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr); } @@ -538,8 +540,8 @@ static void print_section(char *level, char *text, u8 *addr, unsigned int length) { metadata_access_enable(); - print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, - length, 1); + print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS, + 16, 1, addr, length, 1); metadata_access_disable(); } @@ -570,7 +572,7 @@ static struct track *get_track(struct kmem_cache *s, void *object, p = object + get_info_end(s); - return p + alloc; + return kasan_reset_tag(p + alloc); } static void set_track(struct kmem_cache *s, void *object, @@ -583,7 +585,8 @@ static void set_track(struct kmem_cache *s, void *object, unsigned int nr_entries; metadata_access_enable(); - nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3); + nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), + TRACK_ADDRS_COUNT, 3); metadata_access_disable(); if (nr_entries < TRACK_ADDRS_COUNT) @@ -747,7 +750,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, static void init_object(struct kmem_cache *s, void *object, u8 val) { - u8 *p = object; + u8 *p = kasan_reset_tag(object); if (s->flags & SLAB_RED_ZONE) memset(p - s->red_left_pad, val, s->red_left_pad); @@ -777,7 +780,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, u8 *addr = page_address(page); metadata_access_enable(); - fault = memchr_inv(start, value, bytes); + fault = memchr_inv(kasan_reset_tag(start), value, bytes); metadata_access_disable(); if (!fault) return 1; @@ -873,7 +876,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) pad = end - remainder; metadata_access_enable(); - fault = memchr_inv(pad, POISON_INUSE, remainder); + fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder); metadata_access_disable(); if (!fault) return 1; @@ -1118,7 +1121,7 @@ void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) return; metadata_access_enable(); - memset(addr, POISON_INUSE, page_size(page)); + memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page)); metadata_access_disable(); } @@ -1566,10 +1569,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, * Clear the object and the metadata, but don't touch * the redzone. */ - memset(object, 0, s->object_size); + memset(kasan_reset_tag(object), 0, s->object_size); rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0; - memset((char *)object + s->inuse, 0, + memset((char *)kasan_reset_tag(object) + s->inuse, 0, s->size - s->inuse - rsize); } @@ -2881,10 +2884,10 @@ redo: stat(s, ALLOC_FASTPATH); } - maybe_wipe_obj_freeptr(s, object); + maybe_wipe_obj_freeptr(s, kasan_reset_tag(object)); if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) - memset(object, 0, s->object_size); + memset(kasan_reset_tag(object), 0, s->object_size); slab_post_alloc_hook(s, objcg, gfpflags, 1, &object); -- GitLab From 94ab5b61ee16250b7dab38eec5ba9f666350402c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:20 -0800 Subject: [PATCH 1063/1894] kasan, arm64: enable CONFIG_KASAN_HW_TAGS Hardware tag-based KASAN is now ready, enable the configuration option. Link: https://lkml.kernel.org/r/a6fa50d3bb6b318e05c6389a44095be96442b8b0.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9386b108b132f..d0d94f77d000a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -137,6 +137,7 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN + select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT -- GitLab From 948e32539f4ac7a778d50eb3eed307c6f3068f75 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:24 -0800 Subject: [PATCH 1064/1894] kasan: add documentation for hardware tag-based mode Add documentation for hardware tag-based KASAN mode and also add some clarifications for software tag-based mode. Link: https://lkml.kernel.org/r/20ed1d387685e89fc31be068f890f070ef9fd5d5.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kasan.rst | 80 +++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 924d19aa1248b..14ae8a9249994 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -5,12 +5,14 @@ Overview -------- KernelAddressSANitizer (KASAN) is a dynamic memory error detector designed to -find out-of-bound and use-after-free bugs. KASAN has two modes: generic KASAN -(similar to userspace ASan) and software tag-based KASAN (similar to userspace -HWASan). +find out-of-bound and use-after-free bugs. KASAN has three modes: +1. generic KASAN (similar to userspace ASan), +2. software tag-based KASAN (similar to userspace HWASan), +3. hardware tag-based KASAN (based on hardware memory tagging). -KASAN uses compile-time instrumentation to insert validity checks before every -memory access, and therefore requires a compiler version that supports that. +Software KASAN modes (1 and 2) use compile-time instrumentation to insert +validity checks before every memory access, and therefore require a compiler +version that supports that. Generic KASAN is supported in both GCC and Clang. With GCC it requires version 8.3.0 or later. Any supported Clang version is compatible, but detection of @@ -19,7 +21,7 @@ out-of-bounds accesses for global variables is only supported since Clang 11. Tag-based KASAN is only supported in Clang. Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and -riscv architectures, and tag-based KASAN is supported only for arm64. +and riscv architectures, and tag-based KASAN modes are supported only for arm64. Usage ----- @@ -28,14 +30,16 @@ To enable KASAN configure kernel with:: CONFIG_KASAN = y -and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN) and -CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN). +and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN), +CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN), and +CONFIG_KASAN_HW_TAGS (to enable hardware tag-based KASAN). -You also need to choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. -Outline and inline are compiler instrumentation types. The former produces -smaller binary while the latter is 1.1 - 2 times faster. +For software modes, you also need to choose between CONFIG_KASAN_OUTLINE and +CONFIG_KASAN_INLINE. Outline and inline are compiler instrumentation types. +The former produces smaller binary while the latter is 1.1 - 2 times faster. -Both KASAN modes work with both SLUB and SLAB memory allocators. +Both software KASAN modes work with both SLUB and SLAB memory allocators, +hardware tag-based KASAN currently only support SLUB. For better bug detection and nicer reporting, enable CONFIG_STACKTRACE. To augment reports with last allocation and freeing stack of the physical page, @@ -197,17 +201,24 @@ call_rcu() and workqueue queuing. Software tag-based KASAN ~~~~~~~~~~~~~~~~~~~~~~~~ -Tag-based KASAN uses the Top Byte Ignore (TBI) feature of modern arm64 CPUs to -store a pointer tag in the top byte of kernel pointers. Like generic KASAN it -uses shadow memory to store memory tags associated with each 16-byte memory +Software tag-based KASAN requires software memory tagging support in the form +of HWASan-like compiler instrumentation (see HWASan documentation for details). + +Software tag-based KASAN is currently only implemented for arm64 architecture. + +Software tag-based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs +to store a pointer tag in the top byte of kernel pointers. Like generic KASAN +it uses shadow memory to store memory tags associated with each 16-byte memory cell (therefore it dedicates 1/16th of the kernel memory for shadow memory). -On each memory allocation tag-based KASAN generates a random tag, tags the -allocated memory with this tag, and embeds this tag into the returned pointer. +On each memory allocation software tag-based KASAN generates a random tag, tags +the allocated memory with this tag, and embeds this tag into the returned +pointer. + Software tag-based KASAN uses compile-time instrumentation to insert checks before each memory access. These checks make sure that tag of the memory that is being accessed is equal to tag of the pointer that is used to access this -memory. In case of a tag mismatch tag-based KASAN prints a bug report. +memory. In case of a tag mismatch software tag-based KASAN prints a bug report. Software tag-based KASAN also has two instrumentation modes (outline, that emits callbacks to check memory accesses; and inline, that performs the shadow @@ -216,9 +227,36 @@ simply printed from the function that performs the access check. With inline instrumentation a brk instruction is emitted by the compiler, and a dedicated brk handler is used to print bug reports. -A potential expansion of this mode is a hardware tag-based mode, which would -use hardware memory tagging support instead of compiler instrumentation and -manual shadow memory manipulation. +Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through +pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently +reserved to tag freed memory regions. + +Software tag-based KASAN currently only supports tagging of +kmem_cache_alloc/kmalloc and page_alloc memory. + +Hardware tag-based KASAN +~~~~~~~~~~~~~~~~~~~~~~~~ + +Hardware tag-based KASAN is similar to the software mode in concept, but uses +hardware memory tagging support instead of compiler instrumentation and +shadow memory. + +Hardware tag-based KASAN is currently only implemented for arm64 architecture +and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5 +Instruction Set Architecture, and Top Byte Ignore (TBI). + +Special arm64 instructions are used to assign memory tags for each allocation. +Same tags are assigned to pointers to those allocations. On every memory +access, hardware makes sure that tag of the memory that is being accessed is +equal to tag of the pointer that is used to access this memory. In case of a +tag mismatch a fault is generated and a report is printed. + +Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through +pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently +reserved to tag freed memory regions. + +Hardware tag-based KASAN currently only supports tagging of +kmem_cache_alloc/kmalloc and page_alloc memory. What memory accesses are sanitised by KASAN? -------------------------------------------- -- GitLab From 29f080881601c90d39c8fa31c125ac70b8894b5e Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:02:27 -0800 Subject: [PATCH 1065/1894] kselftest/arm64: check GCR_EL1 after context switch This test is specific to MTE and verifies that the GCR_EL1 register is context switched correctly. It spawns 1024 processes and each process spawns 5 threads. Each thread writes a random setting of GCR_EL1 through the prctl() system call and reads it back verifying that it is the same. If the values are not the same it reports a failure. Note: The test has been extended to verify that even SYNC and ASYNC mode setting is preserved correctly over context switching. Link: https://lkml.kernel.org/r/b51a165426e906e7ec8a68d806ef3f8cd92581a6.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Acked-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/arm64/mte/Makefile | 2 +- .../arm64/mte/check_gcr_el1_cswitch.c | 154 ++++++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 2480226dfe57c..0b3af552632a6 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 ARM Limited -CFLAGS += -std=gnu99 -I. +CFLAGS += -std=gnu99 -I. -lpthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c new file mode 100644 index 0000000000000..a876db1f096ab --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" + +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) + +#include "mte_def.h" + +#define NUM_ITERATIONS 1024 +#define MAX_THREADS 5 +#define THREAD_ITERATIONS 1000 + +void *execute_thread(void *x) +{ + pid_t pid = *((pid_t *)x); + pid_t tid = gettid(); + uint64_t prctl_tag_mask; + uint64_t prctl_set; + uint64_t prctl_get; + uint64_t prctl_tcf; + + srand(time(NULL) ^ (pid << 16) ^ (tid << 16)); + + prctl_tag_mask = rand() & 0xffff; + + if (prctl_tag_mask % 2) + prctl_tcf = PR_MTE_TCF_SYNC; + else + prctl_tcf = PR_MTE_TCF_ASYNC; + + prctl_set = PR_TAGGED_ADDR_ENABLE | prctl_tcf | (prctl_tag_mask << PR_MTE_TAG_SHIFT); + + for (int j = 0; j < THREAD_ITERATIONS; j++) { + if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_set, 0, 0, 0)) { + perror("prctl() failed"); + goto fail; + } + + prctl_get = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + + if (prctl_set != prctl_get) { + ksft_print_msg("Error: prctl_set: 0x%lx != prctl_get: 0x%lx\n", + prctl_set, prctl_get); + goto fail; + } + } + + return (void *)KSFT_PASS; + +fail: + return (void *)KSFT_FAIL; +} + +int execute_test(pid_t pid) +{ + pthread_t thread_id[MAX_THREADS]; + int thread_data[MAX_THREADS]; + + for (int i = 0; i < MAX_THREADS; i++) + pthread_create(&thread_id[i], NULL, + execute_thread, (void *)&pid); + + for (int i = 0; i < MAX_THREADS; i++) + pthread_join(thread_id[i], (void *)&thread_data[i]); + + for (int i = 0; i < MAX_THREADS; i++) + if (thread_data[i] == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int mte_gcr_fork_test(void) +{ + pid_t pid; + int results[NUM_ITERATIONS]; + pid_t cpid; + int res; + + for (int i = 0; i < NUM_ITERATIONS; i++) { + pid = fork(); + + if (pid < 0) + return KSFT_FAIL; + + if (pid == 0) { + cpid = getpid(); + + res = execute_test(cpid); + + exit(res); + } + } + + for (int i = 0; i < NUM_ITERATIONS; i++) { + wait(&res); + + if (WIFEXITED(res)) + results[i] = WEXITSTATUS(res); + else + --i; + } + + for (int i = 0; i < NUM_ITERATIONS; i++) + if (results[i] == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + ksft_set_plan(1); + + evaluate_test(mte_gcr_fork_test(), + "Verify that GCR_EL1 is set correctly on context switch\n"); + + mte_restore_setup(); + ksft_print_cnts(); + + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} -- GitLab From c696de9f12b7ddeddc05d378fc4dc0f66e9a8c95 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:31 -0800 Subject: [PATCH 1066/1894] kasan: simplify quarantine_put call site Patch series "kasan: boot parameters for hardware tag-based mode", v4. === Overview Hardware tag-based KASAN mode [1] is intended to eventually be used in production as a security mitigation. Therefore there's a need for finer control over KASAN features and for an existence of a kill switch. This patchset adds a few boot parameters for hardware tag-based KASAN that allow to disable or otherwise control particular KASAN features, as well as provides some initial optimizations for running KASAN in production. There's another planned patchset what will further optimize hardware tag-based KASAN, provide proper benchmarking and tests, and will fully enable tag-based KASAN for production use. Hardware tag-based KASAN relies on arm64 Memory Tagging Extension (MTE) [2] to perform memory and pointer tagging. Please see [3] and [4] for detailed analysis of how MTE helps to fight memory safety problems. The features that can be controlled are: 1. Whether KASAN is enabled at all. 2. Whether KASAN collects and saves alloc/free stacks. 3. Whether KASAN panics on a detected bug or not. The patch titled "kasan: add and integrate kasan boot parameters" of this series adds a few new boot parameters. kasan.mode allows to choose one of three main modes: - kasan.mode=off - KASAN is disabled, no tag checks are performed - kasan.mode=prod - only essential production features are enabled - kasan.mode=full - all KASAN features are enabled The chosen mode provides default control values for the features mentioned above. However it's also possible to override the default values by providing: - kasan.stacktrace=off/on - enable stacks collection (default: on for mode=full, otherwise off) - kasan.fault=report/panic - only report tag fault or also panic (default: report) If kasan.mode parameter is not provided, it defaults to full when CONFIG_DEBUG_KERNEL is enabled, and to prod otherwise. It is essential that switching between these modes doesn't require rebuilding the kernel with different configs, as this is required by the Android GKI (Generic Kernel Image) initiative. === Benchmarks For now I've only performed a few simple benchmarks such as measuring kernel boot time and slab memory usage after boot. There's an upcoming patchset which will optimize KASAN further and include more detailed benchmarking results. The benchmarks were performed in QEMU and the results below exclude the slowdown caused by QEMU memory tagging emulation (as it's different from the slowdown that will be introduced by hardware and is therefore irrelevant). KASAN_HW_TAGS=y + kasan.mode=off introduces no performance or memory impact compared to KASAN_HW_TAGS=n. kasan.mode=prod (manually excluding tagging) introduces 3% of performance and no memory impact (except memory used by hardware to store tags) compared to kasan.mode=off. kasan.mode=full has about 40% performance and 30% memory impact over kasan.mode=prod. Both come from alloc/free stack collection. === Notes This patchset is available here: https://github.com/xairy/linux/tree/up-boot-mte-v4 This patchset is based on v11 of "kasan: add hardware tag-based mode for arm64" patchset [1]. For testing in QEMU hardware tag-based KASAN requires: 1. QEMU built from master [6] (use "-machine virt,mte=on -cpu max" arguments to run). 2. GCC version 10. [1] https://lore.kernel.org/linux-arm-kernel/cover.1606161801.git.andreyknvl@google.com/T/#t [2] https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/enhancing-memory-safety [3] https://arxiv.org/pdf/1802.09517.pdf [4] https://github.com/microsoft/MSRC-Security-Research/blob/master/papers/2020/Security%20analysis%20of%20memory%20tagging.pdf [5] https://source.android.com/devices/architecture/kernel/generic-kernel-image [6] https://github.com/qemu/qemu === Tags Tested-by: Vincenzo Frascino This patch (of 19): Move get_free_info() call into quarantine_put() to simplify the call site. No functional changes. Link: https://lkml.kernel.org/r/cover.1606162397.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/312d0a3ef92cc6dc4fa5452cbc1714f9393ca239.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Iab0f04e7ebf8d83247024b7190c67c3c34c7940f Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Catalin Marinas Cc: Will Deacon Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 2 +- mm/kasan/kasan.h | 5 ++--- mm/kasan/quarantine.c | 3 ++- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 02613883846ed..34a9d4a8a5ffe 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -313,7 +313,7 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, kasan_set_free_info(cache, object, tag); - quarantine_put(get_free_info(cache, object), cache); + quarantine_put(cache, object); return IS_ENABLED(CONFIG_KASAN_GENERIC); } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 64560cc711910..13c511e85d5f1 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -216,12 +216,11 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, #if defined(CONFIG_KASAN_GENERIC) && \ (defined(CONFIG_SLAB) || defined(CONFIG_SLUB)) -void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache); +void quarantine_put(struct kmem_cache *cache, void *object); void quarantine_reduce(void); void quarantine_remove_cache(struct kmem_cache *cache); #else -static inline void quarantine_put(struct kasan_free_meta *info, - struct kmem_cache *cache) { } +static inline void quarantine_put(struct kmem_cache *cache, void *object) { } static inline void quarantine_reduce(void) { } static inline void quarantine_remove_cache(struct kmem_cache *cache) { } #endif diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 693b05e2c2aa7..88fab9ff2d6e1 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -163,11 +163,12 @@ static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache) qlist_init(q); } -void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) +void quarantine_put(struct kmem_cache *cache, void *object) { unsigned long flags; struct qlist_head *q; struct qlist_head temp = QLIST_INIT; + struct kasan_free_meta *info = get_free_info(cache, object); /* * Note: irq must be disabled until after we move the batch to the -- GitLab From 6476792f1015a356e6864076c210b328b64d08cc Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:34 -0800 Subject: [PATCH 1067/1894] kasan: rename get_alloc/free_info Rename get_alloc_info() and get_free_info() to kasan_get_alloc_meta() and kasan_get_free_meta() to better reflect what those do and avoid confusion with kasan_set_free_info(). No functional changes. Link: https://lkml.kernel.org/r/27b7c036b754af15a2839e945f6d8bfce32b4c2f.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ib6e4ba61c8b12112b403d3479a9799ac8fff8de1 Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 16 ++++++++-------- mm/kasan/generic.c | 12 ++++++------ mm/kasan/hw_tags.c | 4 ++-- mm/kasan/kasan.h | 8 ++++---- mm/kasan/quarantine.c | 4 ++-- mm/kasan/report.c | 12 ++++++------ mm/kasan/report_sw_tags.c | 2 +- mm/kasan/sw_tags.c | 4 ++-- 8 files changed, 31 insertions(+), 31 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 34a9d4a8a5ffe..11bc85c09e3af 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -180,14 +180,14 @@ size_t kasan_metadata_size(struct kmem_cache *cache) sizeof(struct kasan_free_meta) : 0); } -struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, - const void *object) +struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, + const void *object) { return (void *)reset_tag(object) + cache->kasan_info.alloc_meta_offset; } -struct kasan_free_meta *get_free_info(struct kmem_cache *cache, - const void *object) +struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, + const void *object) { BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32); return (void *)reset_tag(object) + cache->kasan_info.free_meta_offset; @@ -264,13 +264,13 @@ static u8 assign_tag(struct kmem_cache *cache, const void *object, void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, const void *object) { - struct kasan_alloc_meta *alloc_info; + struct kasan_alloc_meta *alloc_meta; if (!(cache->flags & SLAB_KASAN)) return (void *)object; - alloc_info = get_alloc_info(cache, object); - __memset(alloc_info, 0, sizeof(*alloc_info)); + alloc_meta = kasan_get_alloc_meta(cache, object); + __memset(alloc_meta, 0, sizeof(*alloc_meta)); if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) || IS_ENABLED(CONFIG_KASAN_HW_TAGS)) object = set_tag(object, assign_tag(cache, object, true, false)); @@ -350,7 +350,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, KASAN_KMALLOC_REDZONE); if (cache->flags & SLAB_KASAN) - kasan_set_track(&get_alloc_info(cache, object)->alloc_track, flags); + kasan_set_track(&kasan_get_alloc_meta(cache, object)->alloc_track, flags); return set_tag(object, tag); } diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 707ea0a4c4c6e..4c233b3393012 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -328,7 +328,7 @@ void kasan_record_aux_stack(void *addr) { struct page *page = kasan_addr_to_page(addr); struct kmem_cache *cache; - struct kasan_alloc_meta *alloc_info; + struct kasan_alloc_meta *alloc_meta; void *object; if (!(page && PageSlab(page))) @@ -336,10 +336,10 @@ void kasan_record_aux_stack(void *addr) cache = page->slab_cache; object = nearest_obj(cache, page, addr); - alloc_info = get_alloc_info(cache, object); + alloc_meta = kasan_get_alloc_meta(cache, object); - alloc_info->aux_stack[1] = alloc_info->aux_stack[0]; - alloc_info->aux_stack[0] = kasan_save_stack(GFP_NOWAIT); + alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; + alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT); } void kasan_set_free_info(struct kmem_cache *cache, @@ -347,7 +347,7 @@ void kasan_set_free_info(struct kmem_cache *cache, { struct kasan_free_meta *free_meta; - free_meta = get_free_info(cache, object); + free_meta = kasan_get_free_meta(cache, object); kasan_set_track(&free_meta->free_track, GFP_NOWAIT); /* @@ -361,5 +361,5 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, { if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_KMALLOC_FREETRACK) return NULL; - return &get_free_info(cache, object)->free_track; + return &kasan_get_free_meta(cache, object)->free_track; } diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 66419e908e21a..cb849c8da9789 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -66,7 +66,7 @@ void kasan_set_free_info(struct kmem_cache *cache, { struct kasan_alloc_meta *alloc_meta; - alloc_meta = get_alloc_info(cache, object); + alloc_meta = kasan_get_alloc_meta(cache, object); kasan_set_track(&alloc_meta->free_track[0], GFP_NOWAIT); } @@ -75,6 +75,6 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, { struct kasan_alloc_meta *alloc_meta; - alloc_meta = get_alloc_info(cache, object); + alloc_meta = kasan_get_alloc_meta(cache, object); return &alloc_meta->free_track[0]; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 13c511e85d5f1..0eab7e4cecb8b 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -149,10 +149,10 @@ struct kasan_free_meta { #endif }; -struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, - const void *object); -struct kasan_free_meta *get_free_info(struct kmem_cache *cache, - const void *object); +struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, + const void *object); +struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, + const void *object); void poison_range(const void *address, size_t size, u8 value); void unpoison_range(const void *address, size_t size); diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 88fab9ff2d6e1..8fb71e11df3ff 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -168,7 +168,7 @@ void quarantine_put(struct kmem_cache *cache, void *object) unsigned long flags; struct qlist_head *q; struct qlist_head temp = QLIST_INIT; - struct kasan_free_meta *info = get_free_info(cache, object); + struct kasan_free_meta *meta = kasan_get_free_meta(cache, object); /* * Note: irq must be disabled until after we move the batch to the @@ -185,7 +185,7 @@ void quarantine_put(struct kmem_cache *cache, void *object) local_irq_restore(flags); return; } - qlist_put(q, &info->quarantine_link, cache->size); + qlist_put(q, &meta->quarantine_link, cache->size); if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 929fcb2a02046..85ce2cb2cd2b6 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -164,12 +164,12 @@ static void describe_object_addr(struct kmem_cache *cache, void *object, static void describe_object(struct kmem_cache *cache, void *object, const void *addr, u8 tag) { - struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); + struct kasan_alloc_meta *alloc_meta = kasan_get_alloc_meta(cache, object); if (cache->flags & SLAB_KASAN) { struct kasan_track *free_track; - print_track(&alloc_info->alloc_track, "Allocated"); + print_track(&alloc_meta->alloc_track, "Allocated"); pr_err("\n"); free_track = kasan_get_free_track(cache, object, tag); if (free_track) { @@ -178,14 +178,14 @@ static void describe_object(struct kmem_cache *cache, void *object, } #ifdef CONFIG_KASAN_GENERIC - if (alloc_info->aux_stack[0]) { + if (alloc_meta->aux_stack[0]) { pr_err("Last potentially related work creation:\n"); - print_stack(alloc_info->aux_stack[0]); + print_stack(alloc_meta->aux_stack[0]); pr_err("\n"); } - if (alloc_info->aux_stack[1]) { + if (alloc_meta->aux_stack[1]) { pr_err("Second to last potentially related work creation:\n"); - print_stack(alloc_info->aux_stack[1]); + print_stack(alloc_meta->aux_stack[1]); pr_err("\n"); } #endif diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c index aebc44a29e83c..317100fd95b91 100644 --- a/mm/kasan/report_sw_tags.c +++ b/mm/kasan/report_sw_tags.c @@ -46,7 +46,7 @@ const char *get_bug_type(struct kasan_access_info *info) if (page && PageSlab(page)) { cache = page->slab_cache; object = nearest_obj(cache, page, (void *)addr); - alloc_meta = get_alloc_info(cache, object); + alloc_meta = kasan_get_alloc_meta(cache, object); for (i = 0; i < KASAN_NR_FREE_STACKS; i++) if (alloc_meta->free_pointer_tag[i] == tag) diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index a518483f39653..6d7648cc3b985 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -174,7 +174,7 @@ void kasan_set_free_info(struct kmem_cache *cache, struct kasan_alloc_meta *alloc_meta; u8 idx = 0; - alloc_meta = get_alloc_info(cache, object); + alloc_meta = kasan_get_alloc_meta(cache, object); #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY idx = alloc_meta->free_track_idx; @@ -191,7 +191,7 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, struct kasan_alloc_meta *alloc_meta; int i = 0; - alloc_meta = get_alloc_info(cache, object); + alloc_meta = kasan_get_alloc_meta(cache, object); #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY for (i = 0; i < KASAN_NR_FREE_STACKS; i++) { -- GitLab From 8bb0009b19465da5a0cd394b5a6ccc2eaf418f23 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:38 -0800 Subject: [PATCH 1068/1894] kasan: introduce set_alloc_info Add set_alloc_info() helper and move kasan_set_track() into it. This will simplify the code for one of the upcoming changes. No functional changes. Link: https://lkml.kernel.org/r/b2393e8f1e311a70fc3aaa2196461b6acdee7d21.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I0316193cbb4ecc9b87b7c2eee0dd79f8ec908c1a Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 11bc85c09e3af..6c38fd0a9e5ce 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -323,6 +323,11 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) return __kasan_slab_free(cache, object, ip, true); } +static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) +{ + kasan_set_track(&kasan_get_alloc_meta(cache, object)->alloc_track, flags); +} + static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size, gfp_t flags, bool keep_tag) { @@ -350,7 +355,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, KASAN_KMALLOC_REDZONE); if (cache->flags & SLAB_KASAN) - kasan_set_track(&kasan_get_alloc_meta(cache, object)->alloc_track, flags); + set_alloc_info(cache, (void *)object, flags); return set_tag(object, tag); } -- GitLab From d56a9ef84bd0e1e8fba7a837ab12a4ec8476579f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:42 -0800 Subject: [PATCH 1069/1894] kasan, arm64: unpoison stack only with CONFIG_KASAN_STACK There's a config option CONFIG_KASAN_STACK that has to be enabled for KASAN to use stack instrumentation and perform validity checks for stack variables. There's no need to unpoison stack when CONFIG_KASAN_STACK is not enabled. Only call kasan_unpoison_task_stack[_below]() when CONFIG_KASAN_STACK is enabled. Note, that CONFIG_KASAN_STACK is an option that is currently always defined when CONFIG_KASAN is enabled, and therefore has to be tested with #if instead of #ifdef. Link: https://lkml.kernel.org/r/d09dd3f8abb388da397fd11598c5edeaa83fe559.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/If8a891e9fe01ea543e00b576852685afec0887e3 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Acked-by: Catalin Marinas Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/sleep.S | 2 +- arch/x86/kernel/acpi/wakeup_64.S | 2 +- include/linux/kasan.h | 10 ++++++---- mm/kasan/common.c | 2 ++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 4be7f7eed8750..6bdef7362c0eb 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -133,7 +133,7 @@ SYM_FUNC_START(_cpu_resume) */ bl cpu_do_resume -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK mov x0, sp bl kasan_unpoison_task_stack_below #endif diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index c8daa92f38dcd..5d3a0b8fd3798 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -112,7 +112,7 @@ SYM_FUNC_START(do_suspend_lowlevel) movq pt_regs_r14(%rax), %r14 movq pt_regs_r15(%rax), %r15 -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK /* * The suspend path may have poisoned some areas deeper in the stack, * which we now need to unpoison. diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d22ec4c9c1bd1..e638255ce9060 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -77,8 +77,6 @@ static inline void kasan_disable_current(void) {} void kasan_unpoison_range(const void *address, size_t size); -void kasan_unpoison_task_stack(struct task_struct *task); - void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -123,8 +121,6 @@ void kasan_restore_multi_shot(bool enabled); static inline void kasan_unpoison_range(const void *address, size_t size) {} -static inline void kasan_unpoison_task_stack(struct task_struct *task) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} @@ -176,6 +172,12 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ +#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +void kasan_unpoison_task_stack(struct task_struct *task); +#else +static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +#endif + #ifdef CONFIG_KASAN_GENERIC void kasan_cache_shrink(struct kmem_cache *cache); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 6c38fd0a9e5ce..2754ce0c83343 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -63,6 +63,7 @@ void kasan_unpoison_range(const void *address, size_t size) unpoison_range(address, size); } +#if CONFIG_KASAN_STACK static void __kasan_unpoison_stack(struct task_struct *task, const void *sp) { void *base = task_stack_page(task); @@ -89,6 +90,7 @@ asmlinkage void kasan_unpoison_task_stack_below(const void *watermark) unpoison_range(base, watermark - base); } +#endif /* CONFIG_KASAN_STACK */ void kasan_alloc_pages(struct page *page, unsigned int order) { -- GitLab From 38dd767daed1af5b5751441b95c4b28767a34fe3 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:45 -0800 Subject: [PATCH 1070/1894] kasan: allow VMAP_STACK for HW_TAGS mode Even though hardware tag-based mode currently doesn't support checking vmalloc allocations, it doesn't use shadow memory and works with VMAP_STACK as is. Change VMAP_STACK definition accordingly. Link: https://lkml.kernel.org/r/ecdb2a1658ebd88eb276dee2493518ac0e82de41.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I3552cbc12321dec82cd7372676e9372a2eb452ac Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Acked-by: Catalin Marinas Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a08999dfcf162..78c6f05b10f91 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -976,16 +976,16 @@ config VMAP_STACK default y bool "Use a virtually-mapped stack" depends on HAVE_ARCH_VMAP_STACK - depends on !KASAN || KASAN_VMALLOC + depends on !KASAN || KASAN_HW_TAGS || KASAN_VMALLOC help Enable this if you want the use virtually-mapped kernel stacks with guard pages. This causes kernel stack overflows to be caught immediately rather than causing difficult-to-diagnose corruption. - To use this with KASAN, the architecture must support backing - virtual mappings with real shadow memory, and KASAN_VMALLOC must - be enabled. + To use this with software KASAN modes, the architecture must support + backing virtual mappings with real shadow memory, and KASAN_VMALLOC + must be enabled. config ARCH_OPTIONAL_KERNEL_RWX def_bool n -- GitLab From 77f57c983065d0569ee1b4af80f07224b439af57 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:49 -0800 Subject: [PATCH 1071/1894] kasan: remove __kasan_unpoison_stack There's no need for __kasan_unpoison_stack() helper, as it's only currently used in a single place. Removing it also removes unneeded arithmetic. No functional changes. Link: https://lkml.kernel.org/r/93e78948704a42ea92f6248ff8a725613d721161.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ie5ba549d445292fe629b4a96735e4034957bcc50 Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 2754ce0c83343..b71dfe7c50595 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -64,18 +64,12 @@ void kasan_unpoison_range(const void *address, size_t size) } #if CONFIG_KASAN_STACK -static void __kasan_unpoison_stack(struct task_struct *task, const void *sp) -{ - void *base = task_stack_page(task); - size_t size = sp - base; - - unpoison_range(base, size); -} - /* Unpoison the entire stack for a task. */ void kasan_unpoison_task_stack(struct task_struct *task) { - __kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE); + void *base = task_stack_page(task); + + unpoison_range(base, THREAD_SIZE); } /* Unpoison the stack for the current task beyond a watermark sp value. */ -- GitLab From c0054c565ae598073d6c27762c7d4f7de49a45d9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:52 -0800 Subject: [PATCH 1072/1894] kasan: inline kasan_reset_tag for tag-based modes Using kasan_reset_tag() currently results in a function call. As it's called quite often from the allocator code, this leads to a noticeable slowdown. Move it to include/linux/kasan.h and turn it into a static inline function. Also remove the now unneeded reset_tag() internal KASAN macro and use kasan_reset_tag() instead. Link: https://lkml.kernel.org/r/6940383a3a9dfb416134d338d8fac97a9ebb8686.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I4d2061acfe91d480a75df00b07c22d8494ef14b5 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 5 ++++- mm/kasan/common.c | 6 +++--- mm/kasan/hw_tags.c | 9 ++------- mm/kasan/kasan.h | 4 ---- mm/kasan/report.c | 4 ++-- mm/kasan/report_hw_tags.c | 2 +- mm/kasan/report_sw_tags.c | 4 ++-- mm/kasan/shadow.c | 4 ++-- mm/kasan/sw_tags.c | 9 ++------- 9 files changed, 18 insertions(+), 29 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e638255ce9060..3bb72de94f90d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -194,7 +194,10 @@ static inline void kasan_record_aux_stack(void *ptr) {} #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) -void *kasan_reset_tag(const void *addr); +static inline void *kasan_reset_tag(const void *addr) +{ + return (void *)arch_kasan_reset_tag(addr); +} bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index b71dfe7c50595..780ec27459ab0 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -179,14 +179,14 @@ size_t kasan_metadata_size(struct kmem_cache *cache) struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, const void *object) { - return (void *)reset_tag(object) + cache->kasan_info.alloc_meta_offset; + return kasan_reset_tag(object) + cache->kasan_info.alloc_meta_offset; } struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, const void *object) { BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32); - return (void *)reset_tag(object) + cache->kasan_info.free_meta_offset; + return kasan_reset_tag(object) + cache->kasan_info.free_meta_offset; } void kasan_poison_slab(struct page *page) @@ -283,7 +283,7 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, tag = get_tag(object); tagged_object = object; - object = reset_tag(object); + object = kasan_reset_tag(object); if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) != object)) { diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index cb849c8da9789..227599e54e8eb 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -30,20 +30,15 @@ void __init kasan_init_hw_tags(void) pr_info("KernelAddressSanitizer initialized\n"); } -void *kasan_reset_tag(const void *addr) -{ - return reset_tag(addr); -} - void poison_range(const void *address, size_t size, u8 value) { - hw_set_mem_tag_range(reset_tag(address), + hw_set_mem_tag_range(kasan_reset_tag(address), round_up(size, KASAN_GRANULE_SIZE), value); } void unpoison_range(const void *address, size_t size) { - hw_set_mem_tag_range(reset_tag(address), + hw_set_mem_tag_range(kasan_reset_tag(address), round_up(size, KASAN_GRANULE_SIZE), get_tag(address)); } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 0eab7e4cecb8b..5e8cd20803694 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -248,15 +248,11 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) return addr; } #endif -#ifndef arch_kasan_reset_tag -#define arch_kasan_reset_tag(addr) ((void *)(addr)) -#endif #ifndef arch_kasan_get_tag #define arch_kasan_get_tag(addr) 0 #endif #define set_tag(addr, tag) ((void *)arch_kasan_set_tag((addr), (tag))) -#define reset_tag(addr) ((void *)arch_kasan_reset_tag(addr)) #define get_tag(addr) arch_kasan_get_tag(addr) #ifdef CONFIG_KASAN_HW_TAGS diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 85ce2cb2cd2b6..00c590efdaeab 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -328,7 +328,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip) unsigned long flags; u8 tag = get_tag(object); - object = reset_tag(object); + object = kasan_reset_tag(object); #if IS_ENABLED(CONFIG_KUNIT) if (current->kunit_test) @@ -361,7 +361,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, disable_trace_on_warning(); tagged_addr = (void *)addr; - untagged_addr = reset_tag(tagged_addr); + untagged_addr = kasan_reset_tag(tagged_addr); info.access_addr = tagged_addr; if (addr_has_metadata(untagged_addr)) diff --git a/mm/kasan/report_hw_tags.c b/mm/kasan/report_hw_tags.c index da543eb832cd6..57114f0e14d10 100644 --- a/mm/kasan/report_hw_tags.c +++ b/mm/kasan/report_hw_tags.c @@ -22,7 +22,7 @@ const char *get_bug_type(struct kasan_access_info *info) void *find_first_bad_addr(void *addr, size_t size) { - return reset_tag(addr); + return kasan_reset_tag(addr); } void metadata_fetch_row(char *buffer, void *row) diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c index 317100fd95b91..7604b46239d40 100644 --- a/mm/kasan/report_sw_tags.c +++ b/mm/kasan/report_sw_tags.c @@ -41,7 +41,7 @@ const char *get_bug_type(struct kasan_access_info *info) int i; tag = get_tag(info->access_addr); - addr = reset_tag(info->access_addr); + addr = kasan_reset_tag(info->access_addr); page = kasan_addr_to_page(addr); if (page && PageSlab(page)) { cache = page->slab_cache; @@ -72,7 +72,7 @@ const char *get_bug_type(struct kasan_access_info *info) void *find_first_bad_addr(void *addr, size_t size) { u8 tag = get_tag(addr); - void *p = reset_tag(addr); + void *p = kasan_reset_tag(addr); void *end = p + size; while (p < end && tag == *(u8 *)kasan_mem_to_shadow(p)) diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index ac6a5f57df33d..44a2b748f9d3c 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -81,7 +81,7 @@ void poison_range(const void *address, size_t size, u8 value) * some of the callers (e.g. kasan_poison_object_data) pass tagged * addresses to this function. */ - address = reset_tag(address); + address = kasan_reset_tag(address); shadow_start = kasan_mem_to_shadow(address); shadow_end = kasan_mem_to_shadow(address + size); @@ -98,7 +98,7 @@ void unpoison_range(const void *address, size_t size) * some of the callers (e.g. kasan_unpoison_object_data) pass tagged * addresses to this function. */ - address = reset_tag(address); + address = kasan_reset_tag(address); poison_range(address, size, tag); diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index 6d7648cc3b985..e17de2619bbfc 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -67,11 +67,6 @@ u8 random_tag(void) return (u8)(state % (KASAN_TAG_MAX + 1)); } -void *kasan_reset_tag(const void *addr) -{ - return reset_tag(addr); -} - bool check_memory_region(unsigned long addr, size_t size, bool write, unsigned long ret_ip) { @@ -107,7 +102,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write, if (tag == KASAN_TAG_KERNEL) return true; - untagged_addr = reset_tag((const void *)addr); + untagged_addr = kasan_reset_tag((const void *)addr); if (unlikely(untagged_addr < kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) { return !kasan_report(addr, size, write, ret_ip); @@ -126,7 +121,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write, bool check_invalid_free(void *addr) { u8 tag = get_tag(addr); - u8 shadow_byte = READ_ONCE(*(u8 *)kasan_mem_to_shadow(reset_tag(addr))); + u8 shadow_byte = READ_ONCE(*(u8 *)kasan_mem_to_shadow(kasan_reset_tag(addr))); return (shadow_byte == KASAN_TAG_INVALID) || (tag != KASAN_TAG_KERNEL && tag != shadow_byte); -- GitLab From d8dd397120be14a80220c78dad2bbcb9b0768959 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:56 -0800 Subject: [PATCH 1073/1894] kasan: inline random_tag for HW_TAGS Using random_tag() currently results in a function call. Move its definition to mm/kasan/kasan.h and turn it into a static inline function for hardware tag-based mode to avoid uneeded function calls. Link: https://lkml.kernel.org/r/be438471690e351e1d792e6bb432e8c03ccb15d3.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Iac5b2faf9a912900e16cca6834d621f5d4abf427 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/hw_tags.c | 5 ----- mm/kasan/kasan.h | 31 ++++++++++++++----------------- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 227599e54e8eb..09c8e9d2035d7 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -42,11 +42,6 @@ void unpoison_range(const void *address, size_t size) round_up(size, KASAN_GRANULE_SIZE), get_tag(address)); } -u8 random_tag(void) -{ - return hw_get_random_tag(); -} - bool check_invalid_free(void *addr) { u8 ptr_tag = get_tag(addr); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 5e8cd20803694..7876a2547b7d1 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -190,6 +190,12 @@ static inline bool addr_has_metadata(const void *addr) #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) +void print_tags(u8 addr_tag, const void *addr); +#else +static inline void print_tags(u8 addr_tag, const void *addr) { } +#endif + bool check_invalid_free(void *addr); void *find_first_bad_addr(void *addr, size_t size); @@ -225,23 +231,6 @@ static inline void quarantine_reduce(void) { } static inline void quarantine_remove_cache(struct kmem_cache *cache) { } #endif -#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) - -void print_tags(u8 addr_tag, const void *addr); - -u8 random_tag(void); - -#else - -static inline void print_tags(u8 addr_tag, const void *addr) { } - -static inline u8 random_tag(void) -{ - return 0; -} - -#endif - #ifndef arch_kasan_set_tag static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) { @@ -281,6 +270,14 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #endif /* CONFIG_KASAN_HW_TAGS */ +#ifdef CONFIG_KASAN_SW_TAGS +u8 random_tag(void); +#elif defined(CONFIG_KASAN_HW_TAGS) +static inline u8 random_tag(void) { return hw_get_random_tag(); } +#else +static inline u8 random_tag(void) { return 0; } +#endif + /* * Exported functions for interfaces called from assembly or from generated * code. Declarations here to avoid warning about missing declarations. -- GitLab From bffe690708c8b4fdb8f0bff8ff22b347fc6c709a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:59 -0800 Subject: [PATCH 1074/1894] kasan: open-code kasan_unpoison_slab There's the external annotation kasan_unpoison_slab() that is currently defined as static inline and uses kasan_unpoison_range(). Open-code this function in mempool.c. Otherwise with an upcoming change this function will result in an unnecessary function call. Link: https://lkml.kernel.org/r/131a6694a978a9a8b150187e539eecc8bcbf759b.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ia7c8b659f79209935cbaab3913bf7f082cc43a0e Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 ------ mm/mempool.c | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 3bb72de94f90d..7350de3e9fe42 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -107,11 +107,6 @@ struct kasan_cache { int free_meta_offset; }; -size_t __ksize(const void *); -static inline void kasan_unpoison_slab(const void *ptr) -{ - kasan_unpoison_range(ptr, __ksize(ptr)); -} size_t kasan_metadata_size(struct kmem_cache *cache); bool kasan_save_enable_multi_shot(void); @@ -167,7 +162,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ diff --git a/mm/mempool.c b/mm/mempool.c index f473cdddaff0f..583a9865b1818 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -112,7 +112,7 @@ static __always_inline void kasan_poison_element(mempool_t *pool, void *element) static void kasan_unpoison_element(mempool_t *pool, void *element) { if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) - kasan_unpoison_slab(element); + kasan_unpoison_range(element, __ksize(element)); else if (pool->alloc == mempool_alloc_pages) kasan_alloc_pages(element, (unsigned long)pool->pool_data); } -- GitLab From 57345fa68a2769e3bd2b6ca01794fba74e6fa938 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:03 -0800 Subject: [PATCH 1075/1894] kasan: inline (un)poison_range and check_invalid_free Using (un)poison_range() or check_invalid_free() currently results in function calls. Move their definitions to mm/kasan/kasan.h and turn them into static inline functions for hardware tag-based mode to avoid unneeded function calls. Link: https://lkml.kernel.org/r/7007955b69eb31b5376a7dc1e0f4ac49138504f2.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ia9d8191024a12d1374675b3d27197f10193f50bb Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/hw_tags.c | 21 --------------------- mm/kasan/kasan.h | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 09c8e9d2035d7..863fed4edd3ff 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -30,27 +30,6 @@ void __init kasan_init_hw_tags(void) pr_info("KernelAddressSanitizer initialized\n"); } -void poison_range(const void *address, size_t size, u8 value) -{ - hw_set_mem_tag_range(kasan_reset_tag(address), - round_up(size, KASAN_GRANULE_SIZE), value); -} - -void unpoison_range(const void *address, size_t size) -{ - hw_set_mem_tag_range(kasan_reset_tag(address), - round_up(size, KASAN_GRANULE_SIZE), get_tag(address)); -} - -bool check_invalid_free(void *addr) -{ - u8 ptr_tag = get_tag(addr); - u8 mem_tag = hw_get_mem_tag(addr); - - return (mem_tag == KASAN_TAG_INVALID) || - (ptr_tag != KASAN_TAG_KERNEL && ptr_tag != mem_tag); -} - void kasan_set_free_info(struct kmem_cache *cache, void *object, u8 tag) { diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 7876a2547b7d1..6e811d0f6242b 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -154,9 +154,6 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, const void *object); -void poison_range(const void *address, size_t size, u8 value); -void unpoison_range(const void *address, size_t size); - #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) static inline const void *kasan_shadow_to_mem(const void *shadow_addr) @@ -196,8 +193,6 @@ void print_tags(u8 addr_tag, const void *addr); static inline void print_tags(u8 addr_tag, const void *addr) { } #endif -bool check_invalid_free(void *addr); - void *find_first_bad_addr(void *addr, size_t size); const char *get_bug_type(struct kasan_access_info *info); void metadata_fetch_row(char *buffer, void *row); @@ -278,6 +273,37 @@ static inline u8 random_tag(void) { return hw_get_random_tag(); } static inline u8 random_tag(void) { return 0; } #endif +#ifdef CONFIG_KASAN_HW_TAGS + +static inline void poison_range(const void *address, size_t size, u8 value) +{ + hw_set_mem_tag_range(kasan_reset_tag(address), + round_up(size, KASAN_GRANULE_SIZE), value); +} + +static inline void unpoison_range(const void *address, size_t size) +{ + hw_set_mem_tag_range(kasan_reset_tag(address), + round_up(size, KASAN_GRANULE_SIZE), get_tag(address)); +} + +static inline bool check_invalid_free(void *addr) +{ + u8 ptr_tag = get_tag(addr); + u8 mem_tag = hw_get_mem_tag(addr); + + return (mem_tag == KASAN_TAG_INVALID) || + (ptr_tag != KASAN_TAG_KERNEL && ptr_tag != mem_tag); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +void poison_range(const void *address, size_t size, u8 value); +void unpoison_range(const void *address, size_t size); +bool check_invalid_free(void *addr); + +#endif /* CONFIG_KASAN_HW_TAGS */ + /* * Exported functions for interfaces called from assembly or from generated * code. Declarations here to avoid warning about missing declarations. -- GitLab From 8028caaca7a8107a52bb28fd92dea35bebd7ae8e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:06 -0800 Subject: [PATCH 1076/1894] kasan: add and integrate kasan boot parameters Hardware tag-based KASAN mode is intended to eventually be used in production as a security mitigation. Therefore there's a need for finer control over KASAN features and for an existence of a kill switch. This change adds a few boot parameters for hardware tag-based KASAN that allow to disable or otherwise control particular KASAN features. The features that can be controlled are: 1. Whether KASAN is enabled at all. 2. Whether KASAN collects and saves alloc/free stacks. 3. Whether KASAN panics on a detected bug or not. With this change a new boot parameter kasan.mode allows to choose one of three main modes: - kasan.mode=off - KASAN is disabled, no tag checks are performed - kasan.mode=prod - only essential production features are enabled - kasan.mode=full - all KASAN features are enabled The chosen mode provides default control values for the features mentioned above. However it's also possible to override the default values by providing: - kasan.stacktrace=off/on - enable alloc/free stack collection (default: on for mode=full, otherwise off) - kasan.fault=report/panic - only report tag fault or also panic (default: report) If kasan.mode parameter is not provided, it defaults to full when CONFIG_DEBUG_KERNEL is enabled, and to prod otherwise. It is essential that switching between these modes doesn't require rebuilding the kernel with different configs, as this is required by the Android GKI (Generic Kernel Image) initiative [1]. [1] https://source.android.com/devices/architecture/kernel/generic-kernel-image [andreyknvl@google.com: don't use read-only static keys] Link: https://lkml.kernel.org/r/f2ded589eba1597f7360a972226083de9afd86e2.1607537948.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/cb093613879d8d8841173f090133eddeb4c35f1f.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/If7d37003875b2ed3e0935702c8015c223d6416a4 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 22 +++++-- mm/kasan/hw_tags.c | 151 +++++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 16 +++++ mm/kasan/report.c | 14 ++++- 4 files changed, 196 insertions(+), 7 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 780ec27459ab0..219c2979bd3e6 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -134,6 +134,11 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, unsigned int redzone_size; int redzone_adjust; + if (!kasan_stack_collection_enabled()) { + *flags |= SLAB_KASAN; + return; + } + /* Add alloc meta. */ cache->kasan_info.alloc_meta_offset = *size; *size += sizeof(struct kasan_alloc_meta); @@ -170,6 +175,8 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, size_t kasan_metadata_size(struct kmem_cache *cache) { + if (!kasan_stack_collection_enabled()) + return 0; return (cache->kasan_info.alloc_meta_offset ? sizeof(struct kasan_alloc_meta) : 0) + (cache->kasan_info.free_meta_offset ? @@ -262,11 +269,13 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, { struct kasan_alloc_meta *alloc_meta; - if (!(cache->flags & SLAB_KASAN)) - return (void *)object; + if (kasan_stack_collection_enabled()) { + if (!(cache->flags & SLAB_KASAN)) + return (void *)object; - alloc_meta = kasan_get_alloc_meta(cache, object); - __memset(alloc_meta, 0, sizeof(*alloc_meta)); + alloc_meta = kasan_get_alloc_meta(cache, object); + __memset(alloc_meta, 0, sizeof(*alloc_meta)); + } if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) || IS_ENABLED(CONFIG_KASAN_HW_TAGS)) object = set_tag(object, assign_tag(cache, object, true, false)); @@ -303,6 +312,9 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, rounded_up_size = round_up(cache->object_size, KASAN_GRANULE_SIZE); poison_range(object, rounded_up_size, KASAN_KMALLOC_FREE); + if (!kasan_stack_collection_enabled()) + return false; + if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine) || unlikely(!(cache->flags & SLAB_KASAN))) return false; @@ -350,7 +362,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, poison_range((void *)redzone_start, redzone_end - redzone_start, KASAN_KMALLOC_REDZONE); - if (cache->flags & SLAB_KASAN) + if (kasan_stack_collection_enabled() && (cache->flags & SLAB_KASAN)) set_alloc_info(cache, (void *)object, flags); return set_tag(object, tag); diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 863fed4edd3ff..4ccc3c873c82f 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -8,18 +8,115 @@ #define pr_fmt(fmt) "kasan: " fmt +#include #include #include #include #include +#include #include #include #include "kasan.h" +enum kasan_arg_mode { + KASAN_ARG_MODE_DEFAULT, + KASAN_ARG_MODE_OFF, + KASAN_ARG_MODE_PROD, + KASAN_ARG_MODE_FULL, +}; + +enum kasan_arg_stacktrace { + KASAN_ARG_STACKTRACE_DEFAULT, + KASAN_ARG_STACKTRACE_OFF, + KASAN_ARG_STACKTRACE_ON, +}; + +enum kasan_arg_fault { + KASAN_ARG_FAULT_DEFAULT, + KASAN_ARG_FAULT_REPORT, + KASAN_ARG_FAULT_PANIC, +}; + +static enum kasan_arg_mode kasan_arg_mode __ro_after_init; +static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; +static enum kasan_arg_fault kasan_arg_fault __ro_after_init; + +/* Whether KASAN is enabled at all. */ +DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled); +EXPORT_SYMBOL(kasan_flag_enabled); + +/* Whether to collect alloc/free stack traces. */ +DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); + +/* Whether panic or disable tag checking on fault. */ +bool kasan_flag_panic __ro_after_init; + +/* kasan.mode=off/prod/full */ +static int __init early_kasan_mode(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strcmp(arg, "off")) + kasan_arg_mode = KASAN_ARG_MODE_OFF; + else if (!strcmp(arg, "prod")) + kasan_arg_mode = KASAN_ARG_MODE_PROD; + else if (!strcmp(arg, "full")) + kasan_arg_mode = KASAN_ARG_MODE_FULL; + else + return -EINVAL; + + return 0; +} +early_param("kasan.mode", early_kasan_mode); + +/* kasan.stack=off/on */ +static int __init early_kasan_flag_stacktrace(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strcmp(arg, "off")) + kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_OFF; + else if (!strcmp(arg, "on")) + kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_ON; + else + return -EINVAL; + + return 0; +} +early_param("kasan.stacktrace", early_kasan_flag_stacktrace); + +/* kasan.fault=report/panic */ +static int __init early_kasan_fault(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strcmp(arg, "report")) + kasan_arg_fault = KASAN_ARG_FAULT_REPORT; + else if (!strcmp(arg, "panic")) + kasan_arg_fault = KASAN_ARG_FAULT_PANIC; + else + return -EINVAL; + + return 0; +} +early_param("kasan.fault", early_kasan_fault); + /* kasan_init_hw_tags_cpu() is called for each CPU. */ void kasan_init_hw_tags_cpu(void) { + /* + * There's no need to check that the hardware is MTE-capable here, + * as this function is only called for MTE-capable hardware. + */ + + /* If KASAN is disabled, do nothing. */ + if (kasan_arg_mode == KASAN_ARG_MODE_OFF) + return; + hw_init_tags(KASAN_TAG_MAX); hw_enable_tagging(); } @@ -27,6 +124,60 @@ void kasan_init_hw_tags_cpu(void) /* kasan_init_hw_tags() is called once on boot CPU. */ void __init kasan_init_hw_tags(void) { + /* If hardware doesn't support MTE, do nothing. */ + if (!system_supports_mte()) + return; + + /* Choose KASAN mode if kasan boot parameter is not provided. */ + if (kasan_arg_mode == KASAN_ARG_MODE_DEFAULT) { + if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) + kasan_arg_mode = KASAN_ARG_MODE_FULL; + else + kasan_arg_mode = KASAN_ARG_MODE_PROD; + } + + /* Preset parameter values based on the mode. */ + switch (kasan_arg_mode) { + case KASAN_ARG_MODE_DEFAULT: + /* Shouldn't happen as per the check above. */ + WARN_ON(1); + return; + case KASAN_ARG_MODE_OFF: + /* If KASAN is disabled, do nothing. */ + return; + case KASAN_ARG_MODE_PROD: + static_branch_enable(&kasan_flag_enabled); + break; + case KASAN_ARG_MODE_FULL: + static_branch_enable(&kasan_flag_enabled); + static_branch_enable(&kasan_flag_stacktrace); + break; + } + + /* Now, optionally override the presets. */ + + switch (kasan_arg_stacktrace) { + case KASAN_ARG_STACKTRACE_DEFAULT: + break; + case KASAN_ARG_STACKTRACE_OFF: + static_branch_disable(&kasan_flag_stacktrace); + break; + case KASAN_ARG_STACKTRACE_ON: + static_branch_enable(&kasan_flag_stacktrace); + break; + } + + switch (kasan_arg_fault) { + case KASAN_ARG_FAULT_DEFAULT: + break; + case KASAN_ARG_FAULT_REPORT: + kasan_flag_panic = false; + break; + case KASAN_ARG_FAULT_PANIC: + kasan_flag_panic = true; + break; + } + pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 6e811d0f6242b..ca43de9811de5 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -5,6 +5,22 @@ #include #include +#ifdef CONFIG_KASAN_HW_TAGS +#include +DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace); +static inline bool kasan_stack_collection_enabled(void) +{ + return static_branch_unlikely(&kasan_flag_stacktrace); +} +#else +static inline bool kasan_stack_collection_enabled(void) +{ + return true; +} +#endif + +extern bool kasan_flag_panic __ro_after_init; + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #else diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 00c590efdaeab..ed9555a814df0 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -99,6 +99,10 @@ static void end_report(unsigned long *flags) panic_on_warn = 0; panic("panic_on_warn set ...\n"); } +#ifdef CONFIG_KASAN_HW_TAGS + if (kasan_flag_panic) + panic("kasan.fault=panic set ...\n"); +#endif kasan_enable_current(); } @@ -161,8 +165,8 @@ static void describe_object_addr(struct kmem_cache *cache, void *object, (void *)(object_addr + cache->object_size)); } -static void describe_object(struct kmem_cache *cache, void *object, - const void *addr, u8 tag) +static void describe_object_stacks(struct kmem_cache *cache, void *object, + const void *addr, u8 tag) { struct kasan_alloc_meta *alloc_meta = kasan_get_alloc_meta(cache, object); @@ -190,7 +194,13 @@ static void describe_object(struct kmem_cache *cache, void *object, } #endif } +} +static void describe_object(struct kmem_cache *cache, void *object, + const void *addr, u8 tag) +{ + if (kasan_stack_collection_enabled()) + describe_object_stacks(cache, object, addr, tag); describe_object_addr(cache, object, addr); } -- GitLab From 34303244f2615add92076a4bf2d4f39323bde4f2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:10 -0800 Subject: [PATCH 1077/1894] kasan, mm: check kasan_enabled in annotations Declare the kasan_enabled static key in include/linux/kasan.h and in include/linux/mm.h and check it in all kasan annotations. This allows to avoid any slowdown caused by function calls when kasan_enabled is disabled. Link: https://lkml.kernel.org/r/9f90e3c0aa840dbb4833367c2335193299f69023.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I2589451d3c96c97abbcbf714baabe6161c6f153e Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 213 ++++++++++++++++++++++++++++++++---------- include/linux/mm.h | 22 +++-- mm/kasan/common.c | 56 +++++------ 3 files changed, 210 insertions(+), 81 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7350de3e9fe42..9176849c49349 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -2,6 +2,7 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H +#include #include struct kmem_cache; @@ -75,54 +76,176 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN -void kasan_unpoison_range(const void *address, size_t size); +struct kasan_cache { + int alloc_meta_offset; + int free_meta_offset; +}; -void kasan_alloc_pages(struct page *page, unsigned int order); -void kasan_free_pages(struct page *page, unsigned int order); +#ifdef CONFIG_KASAN_HW_TAGS +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} +#else +static inline bool kasan_enabled(void) +{ + return true; +} +#endif -void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, - slab_flags_t *flags); +void __kasan_unpoison_range(const void *addr, size_t size); +static __always_inline void kasan_unpoison_range(const void *addr, size_t size) +{ + if (kasan_enabled()) + __kasan_unpoison_range(addr, size); +} -void kasan_poison_slab(struct page *page); -void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); -void kasan_poison_object_data(struct kmem_cache *cache, void *object); -void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, - const void *object); +void __kasan_alloc_pages(struct page *page, unsigned int order); +static __always_inline void kasan_alloc_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_alloc_pages(page, order); +} -void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, - gfp_t flags); -void kasan_kfree_large(void *ptr, unsigned long ip); -void kasan_poison_kfree(void *ptr, unsigned long ip); -void * __must_check kasan_kmalloc(struct kmem_cache *s, const void *object, - size_t size, gfp_t flags); -void * __must_check kasan_krealloc(const void *object, size_t new_size, - gfp_t flags); +void __kasan_free_pages(struct page *page, unsigned int order); +static __always_inline void kasan_free_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_free_pages(page, order); +} -void * __must_check kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags); -bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); +void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, + slab_flags_t *flags); +static __always_inline void kasan_cache_create(struct kmem_cache *cache, + unsigned int *size, slab_flags_t *flags) +{ + if (kasan_enabled()) + __kasan_cache_create(cache, size, flags); +} -struct kasan_cache { - int alloc_meta_offset; - int free_meta_offset; -}; +size_t __kasan_metadata_size(struct kmem_cache *cache); +static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache) +{ + if (kasan_enabled()) + return __kasan_metadata_size(cache); + return 0; +} + +void __kasan_poison_slab(struct page *page); +static __always_inline void kasan_poison_slab(struct page *page) +{ + if (kasan_enabled()) + __kasan_poison_slab(page); +} + +void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object); +static __always_inline void kasan_unpoison_object_data(struct kmem_cache *cache, + void *object) +{ + if (kasan_enabled()) + __kasan_unpoison_object_data(cache, object); +} + +void __kasan_poison_object_data(struct kmem_cache *cache, void *object); +static __always_inline void kasan_poison_object_data(struct kmem_cache *cache, + void *object) +{ + if (kasan_enabled()) + __kasan_poison_object_data(cache, object); +} + +void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, + const void *object); +static __always_inline void * __must_check kasan_init_slab_obj( + struct kmem_cache *cache, const void *object) +{ + if (kasan_enabled()) + return __kasan_init_slab_obj(cache, object); + return (void *)object; +} + +bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); +static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) +{ + if (kasan_enabled()) + return __kasan_slab_free(s, object, ip); + return false; +} + +void * __must_check __kasan_slab_alloc(struct kmem_cache *s, + void *object, gfp_t flags); +static __always_inline void * __must_check kasan_slab_alloc( + struct kmem_cache *s, void *object, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_slab_alloc(s, object, flags); + return object; +} + +void * __must_check __kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size, gfp_t flags); +static __always_inline void * __must_check kasan_kmalloc(struct kmem_cache *s, + const void *object, size_t size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_kmalloc(s, object, size, flags); + return (void *)object; +} -size_t kasan_metadata_size(struct kmem_cache *cache); +void * __must_check __kasan_kmalloc_large(const void *ptr, + size_t size, gfp_t flags); +static __always_inline void * __must_check kasan_kmalloc_large(const void *ptr, + size_t size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_kmalloc_large(ptr, size, flags); + return (void *)ptr; +} + +void * __must_check __kasan_krealloc(const void *object, + size_t new_size, gfp_t flags); +static __always_inline void * __must_check kasan_krealloc(const void *object, + size_t new_size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_krealloc(object, new_size, flags); + return (void *)object; +} + +void __kasan_poison_kfree(void *ptr, unsigned long ip); +static __always_inline void kasan_poison_kfree(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_poison_kfree(ptr, ip); +} + +void __kasan_kfree_large(void *ptr, unsigned long ip); +static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_kfree_large(ptr, ip); +} bool kasan_save_enable_multi_shot(void); void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ +static inline bool kasan_enabled(void) +{ + return false; +} static inline void kasan_unpoison_range(const void *address, size_t size) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} - static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) {} - +static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} @@ -133,36 +256,32 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache, { return (void *)object; } - -static inline void *kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) { - return ptr; + return false; +} +static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags) +{ + return object; } -static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} -static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) { return (void *)object; } +static inline void *kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags) +{ + return (void *)ptr; +} static inline void *kasan_krealloc(const void *object, size_t new_size, gfp_t flags) { return (void *)object; } - -static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags) -{ - return object; -} -static inline bool kasan_slab_free(struct kmem_cache *s, void *object, - unsigned long ip) -{ - return false; -} - -static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } +static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} +static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} #endif /* CONFIG_KASAN */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 024ec0a00c72c..5299b90a6c403 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -31,6 +31,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1422,22 +1423,30 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) #endif /* CONFIG_NUMA_BALANCING */ #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) + static inline u8 page_kasan_tag(const struct page *page) { - return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + if (kasan_enabled()) + return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + return 0xff; } static inline void page_kasan_tag_set(struct page *page, u8 tag) { - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + if (kasan_enabled()) { + page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + } } static inline void page_kasan_tag_reset(struct page *page) { - page_kasan_tag_set(page, 0xff); + if (kasan_enabled()) + page_kasan_tag_set(page, 0xff); } -#else + +#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ + static inline u8 page_kasan_tag(const struct page *page) { return 0xff; @@ -1445,7 +1454,8 @@ static inline u8 page_kasan_tag(const struct page *page) static inline void page_kasan_tag_set(struct page *page, u8 tag) { } static inline void page_kasan_tag_reset(struct page *page) { } -#endif + +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ static inline struct zone *page_zone(const struct page *page) { diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 219c2979bd3e6..ae0130cf9de3d 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -58,7 +58,7 @@ void kasan_disable_current(void) } #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ -void kasan_unpoison_range(const void *address, size_t size) +void __kasan_unpoison_range(const void *address, size_t size) { unpoison_range(address, size); } @@ -86,7 +86,7 @@ asmlinkage void kasan_unpoison_task_stack_below(const void *watermark) } #endif /* CONFIG_KASAN_STACK */ -void kasan_alloc_pages(struct page *page, unsigned int order) +void __kasan_alloc_pages(struct page *page, unsigned int order) { u8 tag; unsigned long i; @@ -100,7 +100,7 @@ void kasan_alloc_pages(struct page *page, unsigned int order) unpoison_range(page_address(page), PAGE_SIZE << order); } -void kasan_free_pages(struct page *page, unsigned int order) +void __kasan_free_pages(struct page *page, unsigned int order) { if (likely(!PageHighMem(page))) poison_range(page_address(page), @@ -127,8 +127,8 @@ static inline unsigned int optimal_redzone(unsigned int object_size) object_size <= (1 << 16) - 1024 ? 1024 : 2048; } -void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, - slab_flags_t *flags) +void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, + slab_flags_t *flags) { unsigned int orig_size = *size; unsigned int redzone_size; @@ -173,7 +173,7 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, *flags |= SLAB_KASAN; } -size_t kasan_metadata_size(struct kmem_cache *cache) +size_t __kasan_metadata_size(struct kmem_cache *cache) { if (!kasan_stack_collection_enabled()) return 0; @@ -196,7 +196,7 @@ struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, return kasan_reset_tag(object) + cache->kasan_info.free_meta_offset; } -void kasan_poison_slab(struct page *page) +void __kasan_poison_slab(struct page *page) { unsigned long i; @@ -206,12 +206,12 @@ void kasan_poison_slab(struct page *page) KASAN_KMALLOC_REDZONE); } -void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) +void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object) { unpoison_range(object, cache->object_size); } -void kasan_poison_object_data(struct kmem_cache *cache, void *object) +void __kasan_poison_object_data(struct kmem_cache *cache, void *object) { poison_range(object, round_up(cache->object_size, KASAN_GRANULE_SIZE), @@ -264,7 +264,7 @@ static u8 assign_tag(struct kmem_cache *cache, const void *object, #endif } -void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, +void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, const void *object) { struct kasan_alloc_meta *alloc_meta; @@ -283,7 +283,7 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, return (void *)object; } -static bool __kasan_slab_free(struct kmem_cache *cache, void *object, +static bool ____kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip, bool quarantine) { u8 tag; @@ -326,9 +326,9 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, return IS_ENABLED(CONFIG_KASAN_GENERIC); } -bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) +bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) { - return __kasan_slab_free(cache, object, ip, true); + return ____kasan_slab_free(cache, object, ip, true); } static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) @@ -336,7 +336,7 @@ static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) kasan_set_track(&kasan_get_alloc_meta(cache, object)->alloc_track, flags); } -static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, +static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size, gfp_t flags, bool keep_tag) { unsigned long redzone_start; @@ -368,20 +368,20 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, return set_tag(object, tag); } -void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, - gfp_t flags) +void * __must_check __kasan_slab_alloc(struct kmem_cache *cache, + void *object, gfp_t flags) { - return __kasan_kmalloc(cache, object, cache->object_size, flags, false); + return ____kasan_kmalloc(cache, object, cache->object_size, flags, false); } -void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, - size_t size, gfp_t flags) +void * __must_check __kasan_kmalloc(struct kmem_cache *cache, const void *object, + size_t size, gfp_t flags) { - return __kasan_kmalloc(cache, object, size, flags, true); + return ____kasan_kmalloc(cache, object, size, flags, true); } -EXPORT_SYMBOL(kasan_kmalloc); +EXPORT_SYMBOL(__kasan_kmalloc); -void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, +void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags) { struct page *page; @@ -406,7 +406,7 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, return (void *)ptr; } -void * __must_check kasan_krealloc(const void *object, size_t size, gfp_t flags) +void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags) { struct page *page; @@ -416,13 +416,13 @@ void * __must_check kasan_krealloc(const void *object, size_t size, gfp_t flags) page = virt_to_head_page(object); if (unlikely(!PageSlab(page))) - return kasan_kmalloc_large(object, size, flags); + return __kasan_kmalloc_large(object, size, flags); else - return __kasan_kmalloc(page->slab_cache, object, size, + return ____kasan_kmalloc(page->slab_cache, object, size, flags, true); } -void kasan_poison_kfree(void *ptr, unsigned long ip) +void __kasan_poison_kfree(void *ptr, unsigned long ip) { struct page *page; @@ -435,11 +435,11 @@ void kasan_poison_kfree(void *ptr, unsigned long ip) } poison_range(ptr, page_size(page), KASAN_FREE_PAGE); } else { - __kasan_slab_free(page->slab_cache, ptr, ip, false); + ____kasan_slab_free(page->slab_cache, ptr, ip, false); } } -void kasan_kfree_large(void *ptr, unsigned long ip) +void __kasan_kfree_large(void *ptr, unsigned long ip) { if (ptr != page_address(virt_to_head_page(ptr))) kasan_report_invalid_free(ptr, ip); -- GitLab From eeb3160c2419e0f1045537acac7b19cba64112f4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:13 -0800 Subject: [PATCH 1078/1894] kasan, mm: rename kasan_poison_kfree Rename kasan_poison_kfree() to kasan_slab_free_mempool() as it better reflects what this annotation does. Also add a comment that explains the PageSlab() check. No functional changes. Link: https://lkml.kernel.org/r/141675fb493555e984c5dca555e9d9f768c7bbaa.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I5026f87364e556b506ef1baee725144bb04b8810 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 16 ++++++++-------- mm/kasan/common.c | 40 +++++++++++++++++++++++----------------- mm/mempool.c | 2 +- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9176849c49349..6f0c5d9aa43f2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -176,6 +176,13 @@ static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } +void __kasan_slab_free_mempool(void *ptr, unsigned long ip); +static __always_inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_slab_free_mempool(ptr, ip); +} + void * __must_check __kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); static __always_inline void * __must_check kasan_slab_alloc( @@ -216,13 +223,6 @@ static __always_inline void * __must_check kasan_krealloc(const void *object, return (void *)object; } -void __kasan_poison_kfree(void *ptr, unsigned long ip); -static __always_inline void kasan_poison_kfree(void *ptr, unsigned long ip) -{ - if (kasan_enabled()) - __kasan_poison_kfree(ptr, ip); -} - void __kasan_kfree_large(void *ptr, unsigned long ip); static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip) { @@ -261,6 +261,7 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, { return false; } +static inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) {} static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) { @@ -280,7 +281,6 @@ static inline void *kasan_krealloc(const void *object, size_t new_size, { return (void *)object; } -static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} #endif /* CONFIG_KASAN */ diff --git a/mm/kasan/common.c b/mm/kasan/common.c index ae0130cf9de3d..d0f8d7a955cd9 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -331,6 +331,29 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) return ____kasan_slab_free(cache, object, ip, true); } +void __kasan_slab_free_mempool(void *ptr, unsigned long ip) +{ + struct page *page; + + page = virt_to_head_page(ptr); + + /* + * Even though this function is only called for kmem_cache_alloc and + * kmalloc backed mempool allocations, those allocations can still be + * !PageSlab() when the size provided to kmalloc is larger than + * KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc. + */ + if (unlikely(!PageSlab(page))) { + if (ptr != page_address(page)) { + kasan_report_invalid_free(ptr, ip); + return; + } + poison_range(ptr, page_size(page), KASAN_FREE_PAGE); + } else { + ____kasan_slab_free(page->slab_cache, ptr, ip, false); + } +} + static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) { kasan_set_track(&kasan_get_alloc_meta(cache, object)->alloc_track, flags); @@ -422,23 +445,6 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag flags, true); } -void __kasan_poison_kfree(void *ptr, unsigned long ip) -{ - struct page *page; - - page = virt_to_head_page(ptr); - - if (unlikely(!PageSlab(page))) { - if (ptr != page_address(page)) { - kasan_report_invalid_free(ptr, ip); - return; - } - poison_range(ptr, page_size(page), KASAN_FREE_PAGE); - } else { - ____kasan_slab_free(page->slab_cache, ptr, ip, false); - } -} - void __kasan_kfree_large(void *ptr, unsigned long ip) { if (ptr != page_address(virt_to_head_page(ptr))) diff --git a/mm/mempool.c b/mm/mempool.c index 583a9865b1818..624ed51b060f0 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -104,7 +104,7 @@ static inline void poison_element(mempool_t *pool, void *element) static __always_inline void kasan_poison_element(mempool_t *pool, void *element) { if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) - kasan_poison_kfree(element, _RET_IP_); + kasan_slab_free_mempool(element, _RET_IP_); else if (pool->alloc == mempool_alloc_pages) kasan_free_pages(element, (unsigned long)pool->pool_data); } -- GitLab From d99f6a10c161227ae7a698470b1cff7b33734d4a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:17 -0800 Subject: [PATCH 1079/1894] kasan: don't round_up too much For hardware tag-based mode kasan_poison_memory() already rounds up the size. Do the same for software modes and remove round_up() from the common code. Link: https://lkml.kernel.org/r/47b232474f1f89dc072aeda0fa58daa6efade377.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ib397128fac6eba874008662b4964d65352db4aa4 Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 8 ++------ mm/kasan/shadow.c | 1 + 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index d0f8d7a955cd9..e3cfe156f6935 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -213,9 +213,7 @@ void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object) void __kasan_poison_object_data(struct kmem_cache *cache, void *object) { - poison_range(object, - round_up(cache->object_size, KASAN_GRANULE_SIZE), - KASAN_KMALLOC_REDZONE); + poison_range(object, cache->object_size, KASAN_KMALLOC_REDZONE); } /* @@ -288,7 +286,6 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object, { u8 tag; void *tagged_object; - unsigned long rounded_up_size; tag = get_tag(object); tagged_object = object; @@ -309,8 +306,7 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object, return true; } - rounded_up_size = round_up(cache->object_size, KASAN_GRANULE_SIZE); - poison_range(object, rounded_up_size, KASAN_KMALLOC_FREE); + poison_range(object, cache->object_size, KASAN_KMALLOC_FREE); if (!kasan_stack_collection_enabled()) return false; diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 44a2b748f9d3c..7c2c08c55f325 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -82,6 +82,7 @@ void poison_range(const void *address, size_t size, u8 value) * addresses to this function. */ address = kasan_reset_tag(address); + size = round_up(size, KASAN_GRANULE_SIZE); shadow_start = kasan_mem_to_shadow(address); shadow_end = kasan_mem_to_shadow(address + size); -- GitLab From 1ef3133bd3b8627a99af2535a923a488563737a6 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:20 -0800 Subject: [PATCH 1080/1894] kasan: simplify assign_tag and set_tag calls set_tag() already ignores the tag for the generic mode, so just call it as is. Add a check for the generic mode to assign_tag(), and simplify its call in ____kasan_kmalloc(). Link: https://lkml.kernel.org/r/121eeab245f98555862b289d2ba9269c868fbbcf.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I18905ca78fb4a3d60e1a34a4ca00247272480438 Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index e3cfe156f6935..17b22107f8eba 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -233,6 +233,9 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object) static u8 assign_tag(struct kmem_cache *cache, const void *object, bool init, bool keep_tag) { + if (IS_ENABLED(CONFIG_KASAN_GENERIC)) + return 0xff; + /* * 1. When an object is kmalloc()'ed, two hooks are called: * kasan_slab_alloc() and kasan_kmalloc(). We assign the @@ -275,8 +278,8 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, __memset(alloc_meta, 0, sizeof(*alloc_meta)); } - if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) || IS_ENABLED(CONFIG_KASAN_HW_TAGS)) - object = set_tag(object, assign_tag(cache, object, true, false)); + /* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */ + object = set_tag(object, assign_tag(cache, object, true, false)); return (void *)object; } @@ -360,7 +363,7 @@ static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object, { unsigned long redzone_start; unsigned long redzone_end; - u8 tag = 0xff; + u8 tag; if (gfpflags_allow_blocking(flags)) quarantine_reduce(); @@ -372,9 +375,7 @@ static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object, KASAN_GRANULE_SIZE); redzone_end = round_up((unsigned long)object + cache->object_size, KASAN_GRANULE_SIZE); - - if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) || IS_ENABLED(CONFIG_KASAN_HW_TAGS)) - tag = assign_tag(cache, object, false, keep_tag); + tag = assign_tag(cache, object, false, keep_tag); /* Tag is ignored in set_tag without CONFIG_KASAN_SW/HW_TAGS */ unpoison_range(set_tag(object, tag), size); -- GitLab From 3933c1757163e8fb471a2d306ba769a04a698900 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:24 -0800 Subject: [PATCH 1081/1894] kasan: clarify comment in __kasan_kfree_large Currently it says that the memory gets poisoned by page_alloc code. Clarify this by mentioning the specific callback that poisons the memory. Link: https://lkml.kernel.org/r/1c8380fe0332a3bcc720fe29f1e0bef2e2974416.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I1334dffb69b87d7986fab88a1a039cc3ea764725 Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 17b22107f8eba..1a8ccaed09162 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -446,5 +446,5 @@ void __kasan_kfree_large(void *ptr, unsigned long ip) { if (ptr != page_address(virt_to_head_page(ptr))) kasan_report_invalid_free(ptr, ip); - /* The object will be poisoned by page_alloc. */ + /* The object will be poisoned by kasan_free_pages(). */ } -- GitLab From 97593cad003c668e2532cb2939a24a031f8de52d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:28 -0800 Subject: [PATCH 1082/1894] kasan: sanitize objects when metadata doesn't fit KASAN marks caches that are sanitized with the SLAB_KASAN cache flag. Currently if the metadata that is appended after the object (stores e.g. stack trace ids) doesn't fit into KMALLOC_MAX_SIZE (can only happen with SLAB, see the comment in the patch), KASAN turns off sanitization completely. With this change sanitization of the object data is always enabled. However the metadata is only stored when it fits. Instead of checking for SLAB_KASAN flag accross the code to find out whether the metadata is there, use cache->kasan_info.alloc/free_meta_offset. As 0 can be a valid value for free_meta_offset, introduce KASAN_NO_FREE_META as an indicator that the free metadata is missing. Without this change all sanitized KASAN objects would be put into quarantine with generic KASAN. With this change, only the objects that have metadata (i.e. when it fits) are put into quarantine, the rest is freed right away. Along the way rework __kasan_cache_create() and add claryfying comments. Link: https://lkml.kernel.org/r/aee34b87a5e4afe586c2ac6a0b32db8dc4dcc2dc.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Icd947e2bea054cb5cfbdc6cf6652227d97032dcb Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 116 ++++++++++++++++++++++++-------------- mm/kasan/generic.c | 9 +-- mm/kasan/hw_tags.c | 6 +- mm/kasan/kasan.h | 17 +++++- mm/kasan/quarantine.c | 18 +++++- mm/kasan/report.c | 43 +++++++------- mm/kasan/report_sw_tags.c | 9 ++- mm/kasan/sw_tags.c | 4 ++ 8 files changed, 147 insertions(+), 75 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 1a8ccaed09162..0cd583d2fe1c2 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -114,9 +114,6 @@ void __kasan_free_pages(struct page *page, unsigned int order) */ static inline unsigned int optimal_redzone(unsigned int object_size) { - if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) - return 0; - return object_size <= 64 - 16 ? 16 : object_size <= 128 - 32 ? 32 : @@ -130,47 +127,77 @@ static inline unsigned int optimal_redzone(unsigned int object_size) void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) { - unsigned int orig_size = *size; - unsigned int redzone_size; - int redzone_adjust; + unsigned int ok_size; + unsigned int optimal_size; + + /* + * SLAB_KASAN is used to mark caches as ones that are sanitized by + * KASAN. Currently this flag is used in two places: + * 1. In slab_ksize() when calculating the size of the accessible + * memory within the object. + * 2. In slab_common.c to prevent merging of sanitized caches. + */ + *flags |= SLAB_KASAN; - if (!kasan_stack_collection_enabled()) { - *flags |= SLAB_KASAN; + if (!kasan_stack_collection_enabled()) return; - } - /* Add alloc meta. */ + ok_size = *size; + + /* Add alloc meta into redzone. */ cache->kasan_info.alloc_meta_offset = *size; *size += sizeof(struct kasan_alloc_meta); - /* Add free meta. */ - if (IS_ENABLED(CONFIG_KASAN_GENERIC) && - (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor || - cache->object_size < sizeof(struct kasan_free_meta))) { - cache->kasan_info.free_meta_offset = *size; - *size += sizeof(struct kasan_free_meta); + /* + * If alloc meta doesn't fit, don't add it. + * This can only happen with SLAB, as it has KMALLOC_MAX_SIZE equal + * to KMALLOC_MAX_CACHE_SIZE and doesn't fall back to page_alloc for + * larger sizes. + */ + if (*size > KMALLOC_MAX_SIZE) { + cache->kasan_info.alloc_meta_offset = 0; + *size = ok_size; + /* Continue, since free meta might still fit. */ } - redzone_size = optimal_redzone(cache->object_size); - redzone_adjust = redzone_size - (*size - cache->object_size); - if (redzone_adjust > 0) - *size += redzone_adjust; - - *size = min_t(unsigned int, KMALLOC_MAX_SIZE, - max(*size, cache->object_size + redzone_size)); + /* Only the generic mode uses free meta or flexible redzones. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + cache->kasan_info.free_meta_offset = KASAN_NO_FREE_META; + return; + } /* - * If the metadata doesn't fit, don't enable KASAN at all. + * Add free meta into redzone when it's not possible to store + * it in the object. This is the case when: + * 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can + * be touched after it was freed, or + * 2. Object has a constructor, which means it's expected to + * retain its content until the next allocation, or + * 3. Object is too small. + * Otherwise cache->kasan_info.free_meta_offset = 0 is implied. */ - if (*size <= cache->kasan_info.alloc_meta_offset || - *size <= cache->kasan_info.free_meta_offset) { - cache->kasan_info.alloc_meta_offset = 0; - cache->kasan_info.free_meta_offset = 0; - *size = orig_size; - return; + if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor || + cache->object_size < sizeof(struct kasan_free_meta)) { + ok_size = *size; + + cache->kasan_info.free_meta_offset = *size; + *size += sizeof(struct kasan_free_meta); + + /* If free meta doesn't fit, don't add it. */ + if (*size > KMALLOC_MAX_SIZE) { + cache->kasan_info.free_meta_offset = KASAN_NO_FREE_META; + *size = ok_size; + } } - *flags |= SLAB_KASAN; + /* Calculate size with optimal redzone. */ + optimal_size = cache->object_size + optimal_redzone(cache->object_size); + /* Limit it with KMALLOC_MAX_SIZE (relevant for SLAB only). */ + if (optimal_size > KMALLOC_MAX_SIZE) + optimal_size = KMALLOC_MAX_SIZE; + /* Use optimal size if the size with added metas is not large enough. */ + if (*size < optimal_size) + *size = optimal_size; } size_t __kasan_metadata_size(struct kmem_cache *cache) @@ -186,15 +213,21 @@ size_t __kasan_metadata_size(struct kmem_cache *cache) struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, const void *object) { + if (!cache->kasan_info.alloc_meta_offset) + return NULL; return kasan_reset_tag(object) + cache->kasan_info.alloc_meta_offset; } +#ifdef CONFIG_KASAN_GENERIC struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, const void *object) { BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32); + if (cache->kasan_info.free_meta_offset == KASAN_NO_FREE_META) + return NULL; return kasan_reset_tag(object) + cache->kasan_info.free_meta_offset; } +#endif void __kasan_poison_slab(struct page *page) { @@ -271,11 +304,9 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, struct kasan_alloc_meta *alloc_meta; if (kasan_stack_collection_enabled()) { - if (!(cache->flags & SLAB_KASAN)) - return (void *)object; - alloc_meta = kasan_get_alloc_meta(cache, object); - __memset(alloc_meta, 0, sizeof(*alloc_meta)); + if (alloc_meta) + __memset(alloc_meta, 0, sizeof(*alloc_meta)); } /* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */ @@ -314,15 +345,12 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object, if (!kasan_stack_collection_enabled()) return false; - if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine) || - unlikely(!(cache->flags & SLAB_KASAN))) + if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine)) return false; kasan_set_free_info(cache, object, tag); - quarantine_put(cache, object); - - return IS_ENABLED(CONFIG_KASAN_GENERIC); + return quarantine_put(cache, object); } bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) @@ -355,7 +383,11 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip) static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) { - kasan_set_track(&kasan_get_alloc_meta(cache, object)->alloc_track, flags); + struct kasan_alloc_meta *alloc_meta; + + alloc_meta = kasan_get_alloc_meta(cache, object); + if (alloc_meta) + kasan_set_track(&alloc_meta->alloc_track, flags); } static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object, @@ -382,7 +414,7 @@ static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object, poison_range((void *)redzone_start, redzone_end - redzone_start, KASAN_KMALLOC_REDZONE); - if (kasan_stack_collection_enabled() && (cache->flags & SLAB_KASAN)) + if (kasan_stack_collection_enabled()) set_alloc_info(cache, (void *)object, flags); return set_tag(object, tag); diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 4c233b3393012..1dd5a0f993726 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -348,11 +348,11 @@ void kasan_set_free_info(struct kmem_cache *cache, struct kasan_free_meta *free_meta; free_meta = kasan_get_free_meta(cache, object); - kasan_set_track(&free_meta->free_track, GFP_NOWAIT); + if (!free_meta) + return; - /* - * the object was freed and has free track set - */ + kasan_set_track(&free_meta->free_track, GFP_NOWAIT); + /* The object was freed and has free track set. */ *(u8 *)kasan_mem_to_shadow(object) = KASAN_KMALLOC_FREETRACK; } @@ -361,5 +361,6 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, { if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_KMALLOC_FREETRACK) return NULL; + /* Free meta must be present with KASAN_KMALLOC_FREETRACK. */ return &kasan_get_free_meta(cache, object)->free_track; } diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 4ccc3c873c82f..55bd6f09c70ff 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -187,7 +187,8 @@ void kasan_set_free_info(struct kmem_cache *cache, struct kasan_alloc_meta *alloc_meta; alloc_meta = kasan_get_alloc_meta(cache, object); - kasan_set_track(&alloc_meta->free_track[0], GFP_NOWAIT); + if (alloc_meta) + kasan_set_track(&alloc_meta->free_track[0], GFP_NOWAIT); } struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, @@ -196,5 +197,8 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, struct kasan_alloc_meta *alloc_meta; alloc_meta = kasan_get_alloc_meta(cache, object); + if (!alloc_meta) + return NULL; + return &alloc_meta->free_track[0]; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index ca43de9811de5..cc4d9e1d49b1d 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -155,20 +155,31 @@ struct kasan_alloc_meta { struct qlist_node { struct qlist_node *next; }; + +/* + * Generic mode either stores free meta in the object itself or in the redzone + * after the object. In the former case free meta offset is 0, in the latter + * case it has some sane value smaller than INT_MAX. Use INT_MAX as free meta + * offset when free meta isn't present. + */ +#define KASAN_NO_FREE_META INT_MAX + struct kasan_free_meta { +#ifdef CONFIG_KASAN_GENERIC /* This field is used while the object is in the quarantine. * Otherwise it might be used for the allocator freelist. */ struct qlist_node quarantine_link; -#ifdef CONFIG_KASAN_GENERIC struct kasan_track free_track; #endif }; struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, const void *object); +#ifdef CONFIG_KASAN_GENERIC struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, const void *object); +#endif #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) @@ -233,11 +244,11 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, #if defined(CONFIG_KASAN_GENERIC) && \ (defined(CONFIG_SLAB) || defined(CONFIG_SLUB)) -void quarantine_put(struct kmem_cache *cache, void *object); +bool quarantine_put(struct kmem_cache *cache, void *object); void quarantine_reduce(void); void quarantine_remove_cache(struct kmem_cache *cache); #else -static inline void quarantine_put(struct kmem_cache *cache, void *object) { } +static inline bool quarantine_put(struct kmem_cache *cache, void *object) { return false; } static inline void quarantine_reduce(void) { } static inline void quarantine_remove_cache(struct kmem_cache *cache) { } #endif diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 8fb71e11df3ff..55783125a7671 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -137,7 +137,12 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache) if (IS_ENABLED(CONFIG_SLAB)) local_irq_save(flags); + /* + * As the object now gets freed from the quaratine, assume that its + * free track is no longer valid. + */ *(u8 *)kasan_mem_to_shadow(object) = KASAN_KMALLOC_FREE; + ___cache_free(cache, object, _THIS_IP_); if (IS_ENABLED(CONFIG_SLAB)) @@ -163,13 +168,20 @@ static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache) qlist_init(q); } -void quarantine_put(struct kmem_cache *cache, void *object) +bool quarantine_put(struct kmem_cache *cache, void *object) { unsigned long flags; struct qlist_head *q; struct qlist_head temp = QLIST_INIT; struct kasan_free_meta *meta = kasan_get_free_meta(cache, object); + /* + * If there's no metadata for this object, don't put it into + * quarantine. + */ + if (!meta) + return false; + /* * Note: irq must be disabled until after we move the batch to the * global quarantine. Otherwise quarantine_remove_cache() can miss @@ -183,7 +195,7 @@ void quarantine_put(struct kmem_cache *cache, void *object) q = this_cpu_ptr(&cpu_quarantine); if (q->offline) { local_irq_restore(flags); - return; + return false; } qlist_put(q, &meta->quarantine_link, cache->size); if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { @@ -206,6 +218,8 @@ void quarantine_put(struct kmem_cache *cache, void *object) } local_irq_restore(flags); + + return true; } void quarantine_reduce(void) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index ed9555a814df0..c0fb217975501 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -168,32 +168,35 @@ static void describe_object_addr(struct kmem_cache *cache, void *object, static void describe_object_stacks(struct kmem_cache *cache, void *object, const void *addr, u8 tag) { - struct kasan_alloc_meta *alloc_meta = kasan_get_alloc_meta(cache, object); - - if (cache->flags & SLAB_KASAN) { - struct kasan_track *free_track; + struct kasan_alloc_meta *alloc_meta; + struct kasan_track *free_track; + alloc_meta = kasan_get_alloc_meta(cache, object); + if (alloc_meta) { print_track(&alloc_meta->alloc_track, "Allocated"); pr_err("\n"); - free_track = kasan_get_free_track(cache, object, tag); - if (free_track) { - print_track(free_track, "Freed"); - pr_err("\n"); - } + } + + free_track = kasan_get_free_track(cache, object, tag); + if (free_track) { + print_track(free_track, "Freed"); + pr_err("\n"); + } #ifdef CONFIG_KASAN_GENERIC - if (alloc_meta->aux_stack[0]) { - pr_err("Last potentially related work creation:\n"); - print_stack(alloc_meta->aux_stack[0]); - pr_err("\n"); - } - if (alloc_meta->aux_stack[1]) { - pr_err("Second to last potentially related work creation:\n"); - print_stack(alloc_meta->aux_stack[1]); - pr_err("\n"); - } -#endif + if (!alloc_meta) + return; + if (alloc_meta->aux_stack[0]) { + pr_err("Last potentially related work creation:\n"); + print_stack(alloc_meta->aux_stack[0]); + pr_err("\n"); } + if (alloc_meta->aux_stack[1]) { + pr_err("Second to last potentially related work creation:\n"); + print_stack(alloc_meta->aux_stack[1]); + pr_err("\n"); + } +#endif } static void describe_object(struct kmem_cache *cache, void *object, diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c index 7604b46239d40..1b026793ad57d 100644 --- a/mm/kasan/report_sw_tags.c +++ b/mm/kasan/report_sw_tags.c @@ -48,9 +48,12 @@ const char *get_bug_type(struct kasan_access_info *info) object = nearest_obj(cache, page, (void *)addr); alloc_meta = kasan_get_alloc_meta(cache, object); - for (i = 0; i < KASAN_NR_FREE_STACKS; i++) - if (alloc_meta->free_pointer_tag[i] == tag) - return "use-after-free"; + if (alloc_meta) { + for (i = 0; i < KASAN_NR_FREE_STACKS; i++) { + if (alloc_meta->free_pointer_tag[i] == tag) + return "use-after-free"; + } + } return "out-of-bounds"; } diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index e17de2619bbfc..5dcd830805b2f 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -170,6 +170,8 @@ void kasan_set_free_info(struct kmem_cache *cache, u8 idx = 0; alloc_meta = kasan_get_alloc_meta(cache, object); + if (!alloc_meta) + return; #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY idx = alloc_meta->free_track_idx; @@ -187,6 +189,8 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, int i = 0; alloc_meta = kasan_get_alloc_meta(cache, object); + if (!alloc_meta) + return NULL; #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY for (i = 0; i < KASAN_NR_FREE_STACKS; i++) { -- GitLab From e86f8b09f215e3755cd2d56930487dec2de02433 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:31 -0800 Subject: [PATCH 1083/1894] kasan, mm: allow cache merging with no metadata The reason cache merging is disabled with KASAN is because KASAN puts its metadata right after the allocated object. When the merged caches have slightly different sizes, the metadata ends up in different places, which KASAN doesn't support. It might be possible to adjust the metadata allocation algorithm and make it friendly to the cache merging code. Instead this change takes a simpler approach and allows merging caches when no metadata is present. Which is the case for hardware tag-based KASAN with kasan.mode=prod. Link: https://lkml.kernel.org/r/37497e940bfd4b32c0a93a702a9ae4cf061d5392.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ia114847dfb2244f297d2cb82d592bf6a07455dba Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 21 +++++++++++++++++++-- mm/kasan/common.c | 11 +++++++++++ mm/slab_common.c | 3 ++- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 6f0c5d9aa43f2..5e0655fb2a6f7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -82,17 +82,30 @@ struct kasan_cache { }; #ifdef CONFIG_KASAN_HW_TAGS + DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + static __always_inline bool kasan_enabled(void) { return static_branch_likely(&kasan_flag_enabled); } -#else + +#else /* CONFIG_KASAN_HW_TAGS */ + static inline bool kasan_enabled(void) { return true; } -#endif + +#endif /* CONFIG_KASAN_HW_TAGS */ + +slab_flags_t __kasan_never_merge(void); +static __always_inline slab_flags_t kasan_never_merge(void) +{ + if (kasan_enabled()) + return __kasan_never_merge(); + return 0; +} void __kasan_unpoison_range(const void *addr, size_t size); static __always_inline void kasan_unpoison_range(const void *addr, size_t size) @@ -239,6 +252,10 @@ static inline bool kasan_enabled(void) { return false; } +static inline slab_flags_t kasan_never_merge(void) +{ + return 0; +} static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 0cd583d2fe1c2..b25167664ead4 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -86,6 +86,17 @@ asmlinkage void kasan_unpoison_task_stack_below(const void *watermark) } #endif /* CONFIG_KASAN_STACK */ +/* + * Only allow cache merging when stack collection is disabled and no metadata + * is present. + */ +slab_flags_t __kasan_never_merge(void) +{ + if (kasan_stack_collection_enabled()) + return SLAB_KASAN; + return 0; +} + void __kasan_alloc_pages(struct page *page, unsigned int order) { u8 tag; diff --git a/mm/slab_common.c b/mm/slab_common.c index 573fbacd9ef50..e981c80d216c2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -53,7 +54,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ - SLAB_FAILSLAB | SLAB_KASAN) + SLAB_FAILSLAB | kasan_never_merge()) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_ACCOUNT) -- GitLab From 625d867347c9e84d1ac3c953e1b689f65b603bed Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:35 -0800 Subject: [PATCH 1084/1894] kasan: update documentation This change updates KASAN documentation to reflect the addition of boot parameters and also reworks and clarifies some of the existing sections, in particular: defines what a memory granule is, mentions quarantine, makes Kunit section more readable. Link: https://lkml.kernel.org/r/748daf013e17d925b0fe00c1c3b5dce726dd2430.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ib1f83e91be273264b25f42b04448ac96b858849f Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kasan.rst | 184 +++++++++++++++++++----------- 1 file changed, 115 insertions(+), 69 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 14ae8a9249994..e03513f6b0790 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -4,8 +4,9 @@ The Kernel Address Sanitizer (KASAN) Overview -------- -KernelAddressSANitizer (KASAN) is a dynamic memory error detector designed to -find out-of-bound and use-after-free bugs. KASAN has three modes: +KernelAddressSANitizer (KASAN) is a dynamic memory safety error detector +designed to find out-of-bound and use-after-free bugs. KASAN has three modes: + 1. generic KASAN (similar to userspace ASan), 2. software tag-based KASAN (similar to userspace HWASan), 3. hardware tag-based KASAN (based on hardware memory tagging). @@ -39,23 +40,13 @@ CONFIG_KASAN_INLINE. Outline and inline are compiler instrumentation types. The former produces smaller binary while the latter is 1.1 - 2 times faster. Both software KASAN modes work with both SLUB and SLAB memory allocators, -hardware tag-based KASAN currently only support SLUB. -For better bug detection and nicer reporting, enable CONFIG_STACKTRACE. +while the hardware tag-based KASAN currently only support SLUB. + +For better error reports that include stack traces, enable CONFIG_STACKTRACE. To augment reports with last allocation and freeing stack of the physical page, it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on. -To disable instrumentation for specific files or directories, add a line -similar to the following to the respective kernel Makefile: - -- For a single file (e.g. main.o):: - - KASAN_SANITIZE_main.o := n - -- For all files in one directory:: - - KASAN_SANITIZE := n - Error reports ~~~~~~~~~~~~~ @@ -140,22 +131,75 @@ freed (in case of a use-after-free bug report). Next comes a description of the accessed slab object and information about the accessed memory page. In the last section the report shows memory state around the accessed address. -Reading this part requires some understanding of how KASAN works. - -The state of each 8 aligned bytes of memory is encoded in one shadow byte. -Those 8 bytes can be accessible, partially accessible, freed or be a redzone. -We use the following encoding for each shadow byte: 0 means that all 8 bytes -of the corresponding memory region are accessible; number N (1 <= N <= 7) means -that the first N bytes are accessible, and other (8 - N) bytes are not; -any negative value indicates that the entire 8-byte word is inaccessible. -We use different negative values to distinguish between different kinds of -inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h). +Internally KASAN tracks memory state separately for each memory granule, which +is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the +memory state section of the report shows the state of one of the memory +granules that surround the accessed address. + +For generic KASAN the size of each memory granule is 8. The state of each +granule is encoded in one shadow byte. Those 8 bytes can be accessible, +partially accessible, freed or be a part of a redzone. KASAN uses the following +encoding for each shadow byte: 0 means that all 8 bytes of the corresponding +memory region are accessible; number N (1 <= N <= 7) means that the first N +bytes are accessible, and other (8 - N) bytes are not; any negative value +indicates that the entire 8-byte word is inaccessible. KASAN uses different +negative values to distinguish between different kinds of inaccessible memory +like redzones or freed memory (see mm/kasan/kasan.h). In the report above the arrows point to the shadow byte 03, which means that the accessed address is partially accessible. For tag-based KASAN this last report section shows the memory tags around the -accessed address (see Implementation details section). +accessed address (see `Implementation details`_ section). + +Boot parameters +~~~~~~~~~~~~~~~ + +Hardware tag-based KASAN mode (see the section about different mode below) is +intended for use in production as a security mitigation. Therefore it supports +boot parameters that allow to disable KASAN competely or otherwise control +particular KASAN features. + +The things that can be controlled are: + +1. Whether KASAN is enabled at all. +2. Whether KASAN collects and saves alloc/free stacks. +3. Whether KASAN panics on a detected bug or not. + +The ``kasan.mode`` boot parameter allows to choose one of three main modes: + +- ``kasan.mode=off`` - KASAN is disabled, no tag checks are performed +- ``kasan.mode=prod`` - only essential production features are enabled +- ``kasan.mode=full`` - all KASAN features are enabled + +The chosen mode provides default control values for the features mentioned +above. However it's also possible to override the default values by providing: + +- ``kasan.stacktrace=off`` or ``=on`` - enable alloc/free stack collection + (default: ``on`` for ``mode=full``, + otherwise ``off``) +- ``kasan.fault=report`` or ``=panic`` - only print KASAN report or also panic + (default: ``report``) + +If ``kasan.mode`` parameter is not provided, it defaults to ``full`` when +``CONFIG_DEBUG_KERNEL`` is enabled, and to ``prod`` otherwise. + +For developers +~~~~~~~~~~~~~~ + +Software KASAN modes use compiler instrumentation to insert validity checks. +Such instrumentation might be incompatible with some part of the kernel, and +therefore needs to be disabled. To disable instrumentation for specific files +or directories, add a line similar to the following to the respective kernel +Makefile: + +- For a single file (e.g. main.o):: + + KASAN_SANITIZE_main.o := n + +- For all files in one directory:: + + KASAN_SANITIZE := n Implementation details @@ -164,10 +208,10 @@ Implementation details Generic KASAN ~~~~~~~~~~~~~ -From a high level, our approach to memory error detection is similar to that -of kmemcheck: use shadow memory to record whether each byte of memory is safe -to access, and use compile-time instrumentation to insert checks of shadow -memory on each memory access. +From a high level perspective, KASAN's approach to memory error detection is +similar to that of kmemcheck: use shadow memory to record whether each byte of +memory is safe to access, and use compile-time instrumentation to insert checks +of shadow memory on each memory access. Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and offset to @@ -198,6 +242,9 @@ Generic KASAN also reports the last 2 call stacks to creation of work that potentially has access to an object. Call stacks for the following are shown: call_rcu() and workqueue queuing. +Generic KASAN is the only mode that delays the reuse of freed object via +quarantine (see mm/kasan/quarantine.c for implementation). + Software tag-based KASAN ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -305,15 +352,15 @@ therefore be wasteful. Furthermore, to ensure that different mappings use different shadow pages, mappings would have to be aligned to ``KASAN_GRANULE_SIZE * PAGE_SIZE``. -Instead, we share backing space across multiple mappings. We allocate +Instead, KASAN shares backing space across multiple mappings. It allocates a backing page when a mapping in vmalloc space uses a particular page of the shadow region. This page can be shared by other vmalloc mappings later on. -We hook in to the vmap infrastructure to lazily clean up unused shadow +KASAN hooks into the vmap infrastructure to lazily clean up unused shadow memory. -To avoid the difficulties around swapping mappings around, we expect +To avoid the difficulties around swapping mappings around, KASAN expects that the part of the shadow region that covers the vmalloc space will not be covered by the early shadow page, but will be left unmapped. This will require changes in arch-specific code. @@ -324,24 +371,31 @@ architectures that do not have a fixed module region. CONFIG_KASAN_KUNIT_TEST & CONFIG_TEST_KASAN_MODULE -------------------------------------------------- -``CONFIG_KASAN_KUNIT_TEST`` utilizes the KUnit Test Framework for testing. -This means each test focuses on a small unit of functionality and -there are a few ways these tests can be run. +KASAN tests consist on two parts: + +1. Tests that are integrated with the KUnit Test Framework. Enabled with +``CONFIG_KASAN_KUNIT_TEST``. These tests can be run and partially verified +automatically in a few different ways, see the instructions below. -Each test will print the KASAN report if an error is detected and then -print the number of the test and the status of the test: +2. Tests that are currently incompatible with KUnit. Enabled with +``CONFIG_TEST_KASAN_MODULE`` and can only be run as a module. These tests can +only be verified manually, by loading the kernel module and inspecting the +kernel log for KASAN reports. -pass:: +Each KUnit-compatible KASAN test prints a KASAN report if an error is detected. +Then the test prints its number and status. + +When a test passes:: ok 28 - kmalloc_double_kzfree -or, if kmalloc failed:: +When a test fails due to a failed ``kmalloc``:: # kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163 Expected ptr is not null, but is not ok 4 - kmalloc_large_oob_right -or, if a KASAN report was expected, but not found:: +When a test fails due to a missing KASAN report:: # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629 Expected kasan_data->report_expected == kasan_data->report_found, but @@ -349,46 +403,38 @@ or, if a KASAN report was expected, but not found:: kasan_data->report_found == 0 not ok 28 - kmalloc_double_kzfree -All test statuses are tracked as they run and an overall status will -be printed at the end:: +At the end the cumulative status of all KASAN tests is printed. On success:: ok 1 - kasan -or:: +Or, if one of the tests failed:: not ok 1 - kasan -(1) Loadable Module -~~~~~~~~~~~~~~~~~~~~ + +There are a few ways to run KUnit-compatible KASAN tests. + +1. Loadable module +~~~~~~~~~~~~~~~~~~ With ``CONFIG_KUNIT`` enabled, ``CONFIG_KASAN_KUNIT_TEST`` can be built as -a loadable module and run on any architecture that supports KASAN -using something like insmod or modprobe. The module is called ``test_kasan``. +a loadable module and run on any architecture that supports KASAN by loading +the module with insmod or modprobe. The module is called ``test_kasan``. -(2) Built-In -~~~~~~~~~~~~~ +2. Built-In +~~~~~~~~~~~ With ``CONFIG_KUNIT`` built-in, ``CONFIG_KASAN_KUNIT_TEST`` can be built-in -on any architecture that supports KASAN. These and any other KUnit -tests enabled will run and print the results at boot as a late-init -call. +on any architecure that supports KASAN. These and any other KUnit tests enabled +will run and print the results at boot as a late-init call. -(3) Using kunit_tool -~~~~~~~~~~~~~~~~~~~~~ +3. Using kunit_tool +~~~~~~~~~~~~~~~~~~~ -With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, we can also -use kunit_tool to see the results of these along with other KUnit -tests in a more readable way. This will not print the KASAN reports -of tests that passed. Use `KUnit documentation `_ for more up-to-date -information on kunit_tool. +With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, it's also +possible use ``kunit_tool`` to see the results of these and other KUnit tests +in a more readable way. This will not print the KASAN reports of the tests that +passed. Use `KUnit documentation `_ +for more up-to-date information on ``kunit_tool``. .. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html - -``CONFIG_TEST_KASAN_MODULE`` is a set of KASAN tests that could not be -converted to KUnit. These tests can be run only as a module with -``CONFIG_TEST_KASAN_MODULE`` built as a loadable module and -``CONFIG_KASAN`` built-in. The type of error expected and the -function being run is printed before the expression expected to give -an error. Then the error is printed, if found, and that test -should be interpreted to pass only if the error was the one expected -by the test. -- GitLab From e14fd4ba8fb47fcf5f244366ec01ae94490cd86a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 22 Dec 2020 09:40:10 -0800 Subject: [PATCH 1085/1894] x86/split-lock: Avoid returning with interrupts enabled When a split lock is detected always make sure to disable interrupts before returning from the trap handler. The kernel exit code assumes that all exits run with interrupts disabled, otherwise the SWAPGS sequence can race against interrupts and cause recursing page faults and later panics. The problem will only happen on CPUs with split lock disable functionality, so Icelake Server, Tiger Lake, Snow Ridge, Jacobsville. Fixes: ca4c6a9858c2 ("x86/traps: Make interrupt enable/disable symmetric in C code") Fixes: bce9b042ec73 ("x86/traps: Disable interrupts in exc_aligment_check()") # v5.8+ Signed-off-by: Andi Kleen Cc: Peter Zijlstra Cc: Fenghua Yu Cc: Tony Luck Reviewed-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- arch/x86/kernel/traps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fb55981f2a0d0..7f5aec758f0ee 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -303,11 +303,12 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check) local_irq_enable(); if (handle_user_split_lock(regs, error_code)) - return; + goto out; do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs, error_code, BUS_ADRALN, NULL); +out: local_irq_disable(); } -- GitLab From 942cb357ae7d9249088e3687ee6a00ed2745a0c7 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 22 Dec 2020 15:34:24 -0800 Subject: [PATCH 1086/1894] Smack: Handle io_uring kernel thread privileges Smack assumes that kernel threads are privileged for smackfs operations. This was necessary because the credential of the kernel thread was not related to a user operation. With io_uring the credential does reflect a user's rights and can be used. Suggested-by: Jens Axboe Acked-by: Jens Axboe Acked-by: Eric W. Biederman Signed-off-by: Casey Schaufler --- security/smack/smack_access.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index efe2406a39609..7eabb448acab4 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -688,9 +688,10 @@ bool smack_privileged_cred(int cap, const struct cred *cred) bool smack_privileged(int cap) { /* - * All kernel tasks are privileged + * Kernel threads may not have credentials we can use. + * The io_uring kernel threads do have reliable credentials. */ - if (unlikely(current->flags & PF_KTHREAD)) + if ((current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD) return true; return smack_privileged_cred(cap, current_cred()); -- GitLab From 9faadcc8abe4b83d0263216dc3a6321d5bbd616b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 21 Dec 2020 18:34:05 +0000 Subject: [PATCH 1087/1894] io_uring: fix double io_uring free Once we created a file for current context during setup, we should not call io_ring_ctx_wait_and_kill() directly as it'll be done by fput(file) Cc: stable@vger.kernel.org # 5.10 Reported-by: syzbot+c9937dfb2303a5f18640@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov [axboe: fix unused 'ret' for !CONFIG_UNIX] Signed-off-by: Jens Axboe --- fs/io_uring.c | 71 ++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 846c635d06200..7c0b77d49cd24 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9379,55 +9379,52 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, return 0; } +static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) +{ + int ret, fd; + + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) + return fd; + + ret = io_uring_add_task_file(ctx, file); + if (ret) { + put_unused_fd(fd); + return ret; + } + fd_install(fd, file); + return fd; +} + /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, * we have to tie this fd to a socket for file garbage collection purposes. */ -static int io_uring_get_fd(struct io_ring_ctx *ctx) +static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { struct file *file; +#if defined(CONFIG_UNIX) int ret; - int fd; -#if defined(CONFIG_UNIX) ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, &ctx->ring_sock); if (ret) - return ret; + return ERR_PTR(ret); #endif - ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (ret < 0) - goto err; - fd = ret; - file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC); - if (IS_ERR(file)) { - put_unused_fd(fd); - ret = PTR_ERR(file); - goto err; - } - #if defined(CONFIG_UNIX) - ctx->ring_sock->file = file; -#endif - ret = io_uring_add_task_file(ctx, file); - if (ret) { - fput(file); - put_unused_fd(fd); - goto err; + if (IS_ERR(file)) { + sock_release(ctx->ring_sock); + ctx->ring_sock = NULL; + } else { + ctx->ring_sock->file = file; } - fd_install(fd, file); - return fd; -err: -#if defined(CONFIG_UNIX) - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; #endif - return ret; + return file; } static int io_uring_create(unsigned entries, struct io_uring_params *p, @@ -9435,6 +9432,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, { struct user_struct *user = NULL; struct io_ring_ctx *ctx; + struct file *file; bool limit_mem; int ret; @@ -9582,13 +9580,22 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, goto err; } + file = io_uring_get_file(ctx); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err; + } + /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup */ - ret = io_uring_get_fd(ctx); - if (ret < 0) - goto err; + ret = io_uring_install_fd(ctx, file); + if (ret < 0) { + /* fput will clean it up */ + fput(file); + return ret; + } trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; -- GitLab From c07e6719511e77c4b289f62bfe96423eb6ea061d Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Mon, 14 Dec 2020 23:49:41 +0800 Subject: [PATCH 1088/1894] io_uring: hold uring_lock while completing failed polled io in io_wq_submit_work() io_iopoll_complete() does not hold completion_lock to complete polled io, so in io_wq_submit_work(), we can not call io_req_complete() directly, to complete polled io, otherwise there maybe concurrent access to cqring, defer_list, etc, which is not safe. Commit dad1b1242fd5 ("io_uring: always let io_iopoll_complete() complete polled io") has fixed this issue, but Pavel reported that IOPOLL apart from rw can do buf reg/unreg requests( IORING_OP_PROVIDE_BUFFERS or IORING_OP_REMOVE_BUFFERS), so the fix is not good. Given that io_iopoll_complete() is always called under uring_lock, so here for polled io, we can also get uring_lock to fix this issue. Fixes: dad1b1242fd5 ("io_uring: always let io_iopoll_complete() complete polled io") Cc: # 5.5+ Signed-off-by: Xiaoguang Wang Reviewed-by: Pavel Begunkov [axboe: don't deref 'req' after completing it'] Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7c0b77d49cd24..7e35283fc0b10 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6332,19 +6332,28 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) } if (ret) { + struct io_ring_ctx *lock_ctx = NULL; + + if (req->ctx->flags & IORING_SETUP_IOPOLL) + lock_ctx = req->ctx; + /* - * io_iopoll_complete() does not hold completion_lock to complete - * polled io, so here for polled io, just mark it done and still let - * io_iopoll_complete() complete it. + * io_iopoll_complete() does not hold completion_lock to + * complete polled io, so here for polled io, we can not call + * io_req_complete() directly, otherwise there maybe concurrent + * access to cqring, defer_list, etc, which is not safe. Given + * that io_iopoll_complete() is always called under uring_lock, + * so here for polled io, we also get uring_lock to complete + * it. */ - if (req->ctx->flags & IORING_SETUP_IOPOLL) { - struct kiocb *kiocb = &req->rw.kiocb; + if (lock_ctx) + mutex_lock(&lock_ctx->uring_lock); - kiocb_done(kiocb, ret, NULL); - } else { - req_set_fail_links(req); - io_req_complete(req, ret); - } + req_set_fail_links(req); + io_req_complete(req, ret); + + if (lock_ctx) + mutex_unlock(&lock_ctx->uring_lock); } return io_steal_work(req); -- GitLab From 9bfaf9c729a924c048eaf2032ce932b3c724dc27 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 21 Dec 2020 16:46:59 -0700 Subject: [PATCH 1089/1894] dt-bindings: Drop unnecessary *-supply schemas properties *-supply properties are always a single phandle, so binding schemas don't need a type $ref nor 'maxItems'. A meta-schema check for this is pending once these existing cases are fixed. Cc: Jonathan Cameron Cc: Dmitry Torokhov Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: dri-devel@lists.freedesktop.org Cc: linux-iio@vger.kernel.org Cc: linux-input@vger.kernel.org Cc: linux-media@vger.kernel.org Acked-by: Sam Ravnborg Reviewed-by: Laurent Pinchart Acked-by: Sakari Ailus Acked-by: Lee Jones Acked-by: Mark Brown Link: https://lore.kernel.org/r/20201221234659.824881-1-robh@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/bridge/anx6345.yaml | 2 -- .../devicetree/bindings/display/bridge/ite,it6505.yaml | 2 -- .../devicetree/bindings/display/bridge/lvds-codec.yaml | 3 +-- Documentation/devicetree/bindings/display/bridge/ps8640.yaml | 2 -- .../devicetree/bindings/display/bridge/simple-bridge.yaml | 1 - .../devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml | 1 - .../devicetree/bindings/display/bridge/toshiba,tc358775.yaml | 2 -- Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml | 3 +-- Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml | 3 +-- .../devicetree/bindings/input/fsl,mpr121-touchkey.yaml | 3 +-- .../devicetree/bindings/input/touchscreen/edt-ft5x06.yaml | 3 +-- Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml | 1 - Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml | 3 --- Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml | 3 --- Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml | 3 --- Documentation/devicetree/bindings/mfd/st,stmfx.yaml | 3 +-- .../devicetree/bindings/regulator/anatop-regulator.yaml | 1 - 17 files changed, 6 insertions(+), 33 deletions(-) diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml index 8c0e4f285fbcb..fccd63521a8ca 100644 --- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml +++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml @@ -26,11 +26,9 @@ properties: description: GPIO connected to active low reset dvdd12-supply: - maxItems: 1 description: Regulator for 1.2V digital core power. dvdd25-supply: - maxItems: 1 description: Regulator for 2.5V digital core power. ports: diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml index efbb3d0117dc6..02cfc0a3b5502 100644 --- a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml @@ -35,11 +35,9 @@ properties: maxItems: 1 ovdd-supply: - maxItems: 1 description: I/O voltage pwr18-supply: - maxItems: 1 description: core voltage interrupts: diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml index e5e3c72630cf8..66a14d60ce1d5 100644 --- a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml +++ b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml @@ -79,8 +79,7 @@ properties: The GPIO used to control the power down line of this device. maxItems: 1 - power-supply: - maxItems: 1 + power-supply: true required: - compatible diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml index 7e27cfcf770dd..763c7909473ef 100644 --- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml @@ -35,11 +35,9 @@ properties: description: GPIO connected to active low reset. vdd12-supply: - maxItems: 1 description: Regulator for 1.2V digital core power. vdd33-supply: - maxItems: 1 description: Regulator for 3.3V digital core power. ports: diff --git a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml index 3ddb35fcf0a2c..64e8a1c24b402 100644 --- a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml +++ b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml @@ -60,7 +60,6 @@ properties: description: GPIO controlling bridge enable vdd-supply: - maxItems: 1 description: Power supply for the bridge required: diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml index 469ac4a342739..3d5ce08a57927 100644 --- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml +++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml @@ -74,7 +74,6 @@ properties: description: Power down GPIO signal, pin name "/PDWN", active low. vcc-supply: - maxItems: 1 description: Power supply for the TTL output, TTL CLOCKOUT signal, LVDS input, PLL and digital circuitry. diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml index fd3113aa9ccdc..b5959cc78b8d3 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml @@ -28,11 +28,9 @@ properties: description: i2c address of the bridge, 0x0f vdd-supply: - maxItems: 1 description: 1.2V LVDS Power Supply vddio-supply: - maxItems: 1 description: 1.8V IO Power Supply stby-gpios: diff --git a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml index 6a991e9f78e28..2716d4e953296 100644 --- a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml +++ b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml @@ -17,8 +17,7 @@ properties: - lltc,ltc2496 vref-supply: - description: phandle to an external regulator providing the reference voltage - $ref: /schemas/types.yaml#/definitions/phandle + description: Power supply for the reference voltage reg: description: spi chipselect number according to the usual spi bindings diff --git a/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml b/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml index 7037f82ec7530..88384b69f917c 100644 --- a/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml +++ b/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml @@ -22,8 +22,7 @@ properties: - ti,hdc2010 - ti,hdc2080 - vdd-supply: - maxItems: 1 + vdd-supply: true reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml index 378a85c09d341..878464f128dc4 100644 --- a/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml +++ b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml @@ -31,8 +31,7 @@ properties: interrupts: maxItems: 1 - vdd-supply: - maxItems: 1 + vdd-supply: true linux,keycodes: minItems: 1 diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index 4ce109476a0ee..bfc3a8b5e1182 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -55,8 +55,7 @@ properties: wakeup-source: true - vcc-supply: - maxItems: 1 + vcc-supply: true gain: description: Allows setting the sensitivity in the range from 0 to 31. diff --git a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml index 9ea827092fdd9..68ee8c7d9e79d 100644 --- a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml +++ b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml @@ -40,7 +40,6 @@ properties: poc-supply: description: Regulator providing Power over Coax to the cameras - maxItems: 1 enable-gpios: description: GPIO connected to the \#PWDN pin with inverted polarity diff --git a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml index 0df0334d2d0db..bb3528315f20c 100644 --- a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml +++ b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml @@ -39,15 +39,12 @@ properties: vana-supply: description: Analogue voltage supply (VANA), sensor dependent. - maxItems: 1 vcore-supply: description: Core voltage supply (VCore), sensor dependent. - maxItems: 1 vio-supply: description: I/O voltage supply (VIO), sensor dependent. - maxItems: 1 clocks: description: External clock to the sensor. diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml index 1a3590dd0e98a..eb12526a462fa 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml @@ -37,15 +37,12 @@ properties: vdddo-supply: description: Chip digital IO regulator (1.8V). - maxItems: 1 vdda-supply: description: Chip analog regulator (2.7V). - maxItems: 1 vddd-supply: description: Chip digital core regulator (1.12V). - maxItems: 1 flash-leds: description: See ../video-interfaces.txt diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml index f697e1a20beb7..a66acb20d59be 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml @@ -33,15 +33,12 @@ properties: vana-supply: description: Sensor 2.8 V analog supply. - maxItems: 1 vdig-supply: description: Sensor 1.8 V digital core supply. - maxItems: 1 vddl-supply: description: Sensor digital IO 1.2 V supply. - maxItems: 1 port: type: object diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml index 888ab4b5df458..19e9afb385acb 100644 --- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml @@ -26,8 +26,7 @@ properties: drive-open-drain: true - vdd-supply: - maxItems: 1 + vdd-supply: true pinctrl: type: object diff --git a/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml index e7b3abe303639..0a66338c7e5ae 100644 --- a/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml @@ -59,7 +59,6 @@ properties: description: u32 value representing regulator enable bit offset. vin-supply: - $ref: '/schemas/types.yaml#/definitions/phandle' description: input supply phandle. required: -- GitLab From 246eedd70da91d57bf485bd558c50f7b2286c462 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 21 Dec 2020 21:01:21 -0700 Subject: [PATCH 1090/1894] dt-bindings: net: qcom,ipa: Drop unnecessary type ref on 'memory-region' 'memory-region' is a common property, so it doesn't need a type ref here. Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Alex Elder Cc: netdev@vger.kernel.org Acked-by: Alex Elder Link: https://lore.kernel.org/r/20201222040121.1314370-1-robh@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index d0cbbcf1b0e59..8a2d12644675b 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -121,7 +121,6 @@ properties: receive and act on notifications of modem up/down events. memory-region: - $ref: /schemas/types.yaml#/definitions/phandle-array maxItems: 1 description: If present, a phandle for a reserved memory area that holds -- GitLab From 2b8f061a4f505aad11fd36adb24c3138ad09b96b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 21 Dec 2020 21:06:45 -0700 Subject: [PATCH 1091/1894] dt-bindings: Drop redundant maxItems/items 'maxItems' equal to the 'items' list length is redundant. 'maxItems' is preferred for a single entry while greater than 1 should have an 'items' list. A meta-schema check for this is pending once these existing cases are fixed. Cc: Greg Kroah-Hartman Cc: dri-devel@lists.freedesktop.org Cc: dmaengine@vger.kernel.org Cc: alsa-devel@alsa-project.org Cc: linux-usb@vger.kernel.org Acked-by: Sam Ravnborg Reviewed-by: Laurent Pinchart Acked-by: Vinod Koul Acked-by: Jassi Brar Acked-by: Mark Brown Link: https://lore.kernel.org/r/20201222040645.1323611-1-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml | 1 - Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml | 1 - Documentation/devicetree/bindings/mailbox/arm,mhu.yaml | 1 - .../devicetree/bindings/sound/nvidia,tegra30-hda.yaml | 2 -- Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml | 1 - Documentation/devicetree/bindings/usb/renesas,usbhs.yaml | 3 --- 6 files changed, 9 deletions(-) diff --git a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml index 7b9d468c3e52c..403d57977ee73 100644 --- a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml +++ b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml @@ -98,7 +98,6 @@ properties: maxItems: 1 dmas: - maxItems: 4 items: - description: Video layer, plane 0 (RGB or luma) - description: Video layer, plane 1 (U/V or U) diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml index b548e4723936e..c07eb6f2fc8d2 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml @@ -73,7 +73,6 @@ properties: maxItems: 1 clock-names: - maxItems: 1 items: - const: fck diff --git a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml index d43791a2dde75..d07eb00b97c84 100644 --- a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml +++ b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml @@ -61,7 +61,6 @@ properties: - description: low-priority non-secure - description: high-priority non-secure - description: Secure - maxItems: 3 clocks: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml index e543a6123792c..b55775e21de67 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml @@ -44,7 +44,6 @@ properties: maxItems: 3 clock-names: - maxItems: 3 items: - const: hda - const: hda2hdmi @@ -54,7 +53,6 @@ properties: maxItems: 3 reset-names: - maxItems: 3 items: - const: hda - const: hda2hdmi diff --git a/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml b/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml index 0f078bd0a3e50..22603256ddf8f 100644 --- a/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml +++ b/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml @@ -51,7 +51,6 @@ properties: maxItems: 1 phy-names: - maxItems: 1 items: - const: usb diff --git a/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml index 737c1f47b7deb..54c361d4a7af0 100644 --- a/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml +++ b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml @@ -74,11 +74,8 @@ properties: phys: maxItems: 1 - items: - - description: phandle + phy specifier pair. phy-names: - maxItems: 1 items: - const: usb -- GitLab From f87777a3c30cf50c66a20e1d153f0e003bb30774 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 19 Dec 2020 14:50:36 +0100 Subject: [PATCH 1092/1894] net: stmmac: dwmac-meson8b: ignore the second clock input The dwmac glue registers on Amlogic Meson8b and newer SoCs has two clock inputs: - Meson8b and Meson8m2: MPLL2 and MPLL2 (the same parent is wired to both inputs) - GXBB, GXL, GXM, AXG, G12A, G12B, SM1: FCLK_DIV2 and MPLL2 All known vendor kernels and u-boots are using the first input only. We let the common clock framework automatically choose the "right" parent. For some boards this causes a problem though, specificially with G12A and newer SoCs. The clock input is used for generating the 125MHz RGMII TX clock. For the two input clocks this means on G12A: - FCLK_DIV2: 999999985Hz / 8 = 124999998.125Hz - MPLL2: 499999993Hz / 4 = 124999998.25Hz In theory MPLL2 is the "better" clock input because it's gets us 0.125Hz closer to the requested frequency than FCLK_DIV2. In reality however there is a resource conflict because MPLL2 is needed to generate some of the audio clocks. dwmac-meson8b probes first and sets up the clock tree with MPLL2. This works fine until the audio driver comes and "steals" the MPLL2 clocks and configures it with it's own rate (294909637Hz). The common clock framework happily changes the MPLL2 rate but does not reconfigure our RGMII TX clock tree, which then ends up at 73727409Hz, which is more than 40% off the requested 125MHz. Don't use the second clock input for now to force the common clock framework to always select the first parent. This mimics the behavior from the vendor driver and fixes the clock resource conflict with the audio driver on G12A boards. Once the common clock framework can handle this situation this change can be reverted again. Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC") Reported-by: Thomas Graichen Signed-off-by: Martin Blumenstingl Tested-by: thomas graichen Link: https://lore.kernel.org/r/20201219135036.3216017-1-martin.blumenstingl@googlemail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 459ae715b33d1..f184b00f51166 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -135,7 +135,7 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) struct device *dev = dwmac->dev; static const struct clk_parent_data mux_parents[] = { { .fw_name = "clkin0", }, - { .fw_name = "clkin1", }, + { .index = -1, }, }; static const struct clk_div_table div_table[] = { { .div = 2, .val = 2, }, -- GitLab From 8b0f64b113d617c995ffdf50196948c3e99c6e49 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 19 Dec 2020 10:55:38 -0800 Subject: [PATCH 1093/1894] MAINTAINERS: remove names from mailing list maintainers When searching for inactive maintainers it's useful to filter out mailing list addresses. Such "maintainers" will obviously never feature in a "From:" line of an email or a review tag. Since "L:" entries only provide the address of a mailing list without a fancy name extend this pattern to "M:" entries. Alternatively we could reserve M: entries for humans only and move the fake "maintainers" to L:. While I'd personally prefer to reserve M: for humans only, I'm not 100% that's a great choice either, given most L: entries are in fact open mailing lists with public archives. Link: https://lore.kernel.org/r/20201219185538.750076-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 28d7acdb05915..20cd4cc08dd10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -203,8 +203,8 @@ F: include/uapi/linux/nl80211.h F: net/wireless/ 8169 10/100/1000 GIGABIT ETHERNET DRIVER -M: Realtek linux nic maintainers M: Heiner Kallweit +M: nic_swsd@realtek.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/realtek/r8169* @@ -2119,7 +2119,7 @@ N: atmel ARM/Microchip Sparx5 SoC support M: Lars Povlsen M: Steen Hegelund -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported T: git git://github.com/microchip-ung/linux-upstream.git @@ -3958,7 +3958,7 @@ F: net/can/ CAN-J1939 NETWORK LAYER M: Robin van der Gracht M: Oleksij Rempel -R: Pengutronix Kernel Team +R: kernel@pengutronix.de L: linux-can@vger.kernel.org S: Maintained F: Documentation/networking/j1939.rst @@ -11651,7 +11651,7 @@ F: drivers/media/platform/atmel/atmel-isi.h MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER M: Woojung Huh -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml @@ -11661,7 +11661,7 @@ F: net/dsa/tag_ksz.c MICROCHIP LAN743X ETHERNET DRIVER M: Bryan Whitehead -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/microchip/lan743x_* @@ -11755,7 +11755,7 @@ F: drivers/net/wireless/microchip/wilc1000/ MICROSEMI MIPS SOCS M: Alexandre Belloni -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: linux-mips@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/mips/mscc.txt @@ -12809,10 +12809,10 @@ F: tools/objtool/ F: include/linux/objtool.h OCELOT ETHERNET SWITCH DRIVER -M: Microchip Linux Driver Support M: Vladimir Oltean M: Claudiu Manoil M: Alexandre Belloni +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Supported F: drivers/net/dsa/ocelot/* @@ -13874,7 +13874,7 @@ F: drivers/platform/x86/peaq-wmi.c PENSANDO ETHERNET DRIVERS M: Shannon Nelson -M: Pensando Drivers +M: drivers@pensando.io L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/pensando/ionic.rst @@ -14653,7 +14653,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath11k/ QUALCOMM ATHEROS ATH9K WIRELESS DRIVER -M: QCA ath9k Development +M: ath9k-devel@qca.qualcomm.com L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k @@ -18338,7 +18338,7 @@ F: include/linux/usb/isp116x.h USB LAN78XX ETHERNET DRIVER M: Woojung Huh -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/microchip,lan78xx.txt @@ -18452,7 +18452,7 @@ F: drivers/net/usb/smsc75xx.* USB SMSC95XX ETHERNET DRIVER M: Steve Glendinning -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/usb/smsc95xx.* @@ -18999,7 +18999,7 @@ F: drivers/input/mouse/vmmouse.h VMWARE VMXNET3 ETHERNET DRIVER M: Ronak Doshi -M: "VMware, Inc." +M: pv-drivers@vmware.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/vmxnet3/ -- GitLab From a0c8be56affa7d5ffbdec24c992223be54db3b6e Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Sat, 19 Dec 2020 15:39:19 -0600 Subject: [PATCH 1094/1894] ibmvnic: fix login buffer memory leak Commit 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") frees login_rsp_buffer in release_resources() and send_login() because handle_login_rsp() does not free it. Commit f3ae59c0c015 ("ibmvnic: store RX and TX subCRQ handle array in ibmvnic_adapter struct") frees login_rsp_buffer in handle_login_rsp(). It seems unnecessary to free it in release_resources() and send_login(). There are chances that handle_login_rsp returns earlier without freeing buffers. Double-checking the buffer is harmless since release_login_buffer and release_login_rsp_buffer will do nothing if buffer is already freed. Fixes: f3ae59c0c015 ("ibmvnic: store RX and TX subCRQ handle array in ibmvnic_adapter struct") Fixes: 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20201219213919.21045-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f302504faa8a5..bd0281020d519 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -955,6 +955,7 @@ static void release_resources(struct ibmvnic_adapter *adapter) release_rx_pools(adapter); release_napi(adapter); + release_login_buffer(adapter); release_login_rsp_buffer(adapter); } @@ -3873,7 +3874,9 @@ static int send_login(struct ibmvnic_adapter *adapter) return -1; } + release_login_buffer(adapter); release_login_rsp_buffer(adapter); + client_data_len = vnic_client_data_len(adapter); buffer_size = -- GitLab From 58f60329a6be35a5653edb3fd2023ccef9eb9943 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 20 Dec 2020 16:29:30 +0800 Subject: [PATCH 1095/1894] net: ethernet: mvneta: Fix error handling in mvneta_probe When mvneta_port_power_up() fails, we should execute cleanup functions after label err_netdev to avoid memleak. Fixes: 41c2b6b4f0f80 ("net: ethernet: mvneta: Add back interface mode validation") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20201220082930.21623-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 563ceac3060f9..3369ec717a51c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5255,7 +5255,7 @@ static int mvneta_probe(struct platform_device *pdev) err = mvneta_port_power_up(pp, pp->phy_interface); if (err < 0) { dev_err(&pdev->dev, "can't power up port\n"); - return err; + goto err_netdev; } /* Armada3700 network controller does not support per-cpu -- GitLab From 1d898b283576c38dedcb6b21fcbb65968ab03581 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 20 Dec 2020 10:49:47 +0200 Subject: [PATCH 1096/1894] docs: netdev-FAQ: fix question headers formatting Join adjacent questions to a single question line. This fixes the formatting of questions that were not part of the heading. Also, drop Q: and A: prefixes. We don't need them now that questions and answers are visually separated. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/f76078ba5547744f2ec178984c32fbc7dcd29a2b.1608454187.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- Documentation/networking/netdev-FAQ.rst | 126 +++++++++++------------- 1 file changed, 59 insertions(+), 67 deletions(-) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 4b9ed5874d5ad..ae2ae37cd9216 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -6,9 +6,9 @@ netdev FAQ ========== -Q: What is netdev? ------------------- -A: It is a mailing list for all network-related Linux stuff. This +What is netdev? +--------------- +It is a mailing list for all network-related Linux stuff. This includes anything found under net/ (i.e. core code like IPv6) and drivers/net (i.e. hardware specific drivers) in the Linux source tree. @@ -25,9 +25,9 @@ Aside from subsystems like that mentioned above, all network-related Linux development (i.e. RFC, review, comments, etc.) takes place on netdev. -Q: How do the changes posted to netdev make their way into Linux? ------------------------------------------------------------------ -A: There are always two trees (git repositories) in play. Both are +How do the changes posted to netdev make their way into Linux? +-------------------------------------------------------------- +There are always two trees (git repositories) in play. Both are driven by David Miller, the main network maintainer. There is the ``net`` tree, and the ``net-next`` tree. As you can probably guess from the names, the ``net`` tree is for fixes to existing code already in the @@ -37,9 +37,9 @@ for the future release. You can find the trees here: - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git -Q: How often do changes from these trees make it to the mainline Linus tree? ----------------------------------------------------------------------------- -A: To understand this, you need to know a bit of background information on +How often do changes from these trees make it to the mainline Linus tree? +------------------------------------------------------------------------- +To understand this, you need to know a bit of background information on the cadence of Linux development. Each new release starts off with a two week "merge window" where the main maintainers feed their new stuff to Linus for merging into the mainline tree. After the two weeks, the @@ -81,7 +81,8 @@ focus for ``net`` is on stabilization and bug fixes. Finally, the vX.Y gets released, and the whole cycle starts over. -Q: So where are we now in this cycle? +So where are we now in this cycle? +---------------------------------- Load the mainline (Linus) page here: @@ -91,9 +92,9 @@ and note the top of the "tags" section. If it is rc1, it is early in the dev cycle. If it was tagged rc7 a week ago, then a release is probably imminent. -Q: How do I indicate which tree (net vs. net-next) my patch should be in? -------------------------------------------------------------------------- -A: Firstly, think whether you have a bug fix or new "next-like" content. +How do I indicate which tree (net vs. net-next) my patch should be in? +---------------------------------------------------------------------- +Firstly, think whether you have a bug fix or new "next-like" content. Then once decided, assuming that you use git, use the prefix flag, i.e. :: @@ -105,48 +106,45 @@ in the above is just the subject text of the outgoing e-mail, and you can manually change it yourself with whatever MUA you are comfortable with. -Q: I sent a patch and I'm wondering what happened to it? --------------------------------------------------------- -Q: How can I tell whether it got merged? -A: Start by looking at the main patchworks queue for netdev: +I sent a patch and I'm wondering what happened to it - how can I tell whether it got merged? +-------------------------------------------------------------------------------------------- +Start by looking at the main patchworks queue for netdev: https://patchwork.kernel.org/project/netdevbpf/list/ The "State" field will tell you exactly where things are at with your patch. -Q: The above only says "Under Review". How can I find out more? ----------------------------------------------------------------- -A: Generally speaking, the patches get triaged quickly (in less than +The above only says "Under Review". How can I find out more? +------------------------------------------------------------- +Generally speaking, the patches get triaged quickly (in less than 48h). So be patient. Asking the maintainer for status updates on your patch is a good way to ensure your patch is ignored or pushed to the bottom of the priority list. -Q: I submitted multiple versions of the patch series ----------------------------------------------------- -Q: should I directly update patchwork for the previous versions of these -patch series? -A: No, please don't interfere with the patch status on patchwork, leave +I submitted multiple versions of the patch series. Should I directly update patchwork for the previous versions of these patch series? +-------------------------------------------------------------------------------------------------------------------------------------- +No, please don't interfere with the patch status on patchwork, leave it to the maintainer to figure out what is the most recent and current version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. -Q: I made changes to only a few patches in a patch series should I resend only those changed? ---------------------------------------------------------------------------------------------- -A: No, please resend the entire patch series and make sure you do number your +I made changes to only a few patches in a patch series should I resend only those changed? +------------------------------------------------------------------------------------------ +No, please resend the entire patch series and make sure you do number your patches such that it is clear this is the latest and greatest set of patches that can be applied. -Q: I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? -------------------------------------------------------------------------------------------------------------------------------------------- -A: There is no revert possible, once it is pushed out, it stays like that. +I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? +---------------------------------------------------------------------------------------------------------------------------------------- +There is no revert possible, once it is pushed out, it stays like that. Please send incremental versions on top of what has been merged in order to fix the patches the way they would look like if your latest patch series was to be merged. -Q: How can I tell what patches are queued up for backporting to the various stable releases? --------------------------------------------------------------------------------------------- -A: Normally Greg Kroah-Hartman collects stable commits himself, but for +How can I tell what patches are queued up for backporting to the various stable releases? +----------------------------------------------------------------------------------------- +Normally Greg Kroah-Hartman collects stable commits himself, but for networking, Dave collects up patches he deems critical for the networking subsystem, and then hands them off to Greg. @@ -169,11 +167,9 @@ simply clone the repo, and then git grep the mainline commit ID, e.g. releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch stable/stable-queue$ -Q: I see a network patch and I think it should be backported to stable. ------------------------------------------------------------------------ -Q: Should I request it via stable@vger.kernel.org like the references in -the kernel's Documentation/process/stable-kernel-rules.rst file say? -A: No, not for networking. Check the stable queues as per above first +I see a network patch and I think it should be backported to stable. Should I request it via stable@vger.kernel.org like the references in the kernel's Documentation/process/stable-kernel-rules.rst file say? +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +No, not for networking. Check the stable queues as per above first to see if it is already queued. If not, then send a mail to netdev, listing the upstream commit ID and why you think it should be a stable candidate. @@ -190,11 +186,9 @@ mainline, the better the odds that it is an OK candidate for stable. So scrambling to request a commit be added the day after it appears should be avoided. -Q: I have created a network patch and I think it should be backported to stable. --------------------------------------------------------------------------------- -Q: Should I add a Cc: stable@vger.kernel.org like the references in the -kernel's Documentation/ directory say? -A: No. See above answer. In short, if you think it really belongs in +I have created a network patch and I think it should be backported to stable. Should I add a Cc: stable@vger.kernel.org like the references in the kernel's Documentation/ directory say? +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +No. See above answer. In short, if you think it really belongs in stable, then ensure you write a decent commit log that describes who gets impacted by the bug fix and how it manifests itself, and when the bug was introduced. If you do that properly, then the commit will get @@ -207,18 +201,18 @@ marker line as described in :ref:`Documentation/process/submitting-patches.rst ` to temporarily embed that information into the patch that you send. -Q: Are all networking bug fixes backported to all stable releases? ------------------------------------------------------------------- -A: Due to capacity, Dave could only take care of the backports for the +Are all networking bug fixes backported to all stable releases? +--------------------------------------------------------------- +Due to capacity, Dave could only take care of the backports for the last two stable releases. For earlier stable releases, each stable branch maintainer is supposed to take care of them. If you find any patch is missing from an earlier stable branch, please notify stable@vger.kernel.org with either a commit ID or a formal patch backported, and CC Dave and other relevant networking developers. -Q: Is the comment style convention different for the networking content? ------------------------------------------------------------------------- -A: Yes, in a largely trivial way. Instead of this:: +Is the comment style convention different for the networking content? +--------------------------------------------------------------------- +Yes, in a largely trivial way. Instead of this:: /* * foobar blah blah blah @@ -231,32 +225,30 @@ it is requested that you make it look like this:: * another line of text */ -Q: I am working in existing code that has the former comment style and not the latter. --------------------------------------------------------------------------------------- -Q: Should I submit new code in the former style or the latter? -A: Make it the latter style, so that eventually all code in the domain +I am working in existing code that has the former comment style and not the latter. Should I submit new code in the former style or the latter? +----------------------------------------------------------------------------------------------------------------------------------------------- +Make it the latter style, so that eventually all code in the domain of netdev is of this format. -Q: I found a bug that might have possible security implications or similar. ---------------------------------------------------------------------------- -Q: Should I mail the main netdev maintainer off-list?** -A: No. The current netdev maintainer has consistently requested that +I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list? +--------------------------------------------------------------------------------------------------------------------------- +No. The current netdev maintainer has consistently requested that people use the mailing lists and not reach out directly. If you aren't OK with that, then perhaps consider mailing security@kernel.org or reading about http://oss-security.openwall.org/wiki/mailing-lists/distros as possible alternative mechanisms. -Q: What level of testing is expected before I submit my change? ---------------------------------------------------------------- -A: If your changes are against ``net-next``, the expectation is that you +What level of testing is expected before I submit my change? +------------------------------------------------------------ +If your changes are against ``net-next``, the expectation is that you have tested by layering your changes on top of ``net-next``. Ideally you will have done run-time testing specific to your change, but at a minimum, your changes should survive an ``allyesconfig`` and an ``allmodconfig`` build without new warnings or failures. -Q: How do I post corresponding changes to user space components? ----------------------------------------------------------------- -A: User space code exercising kernel features should be posted +How do I post corresponding changes to user space components? +------------------------------------------------------------- +User space code exercising kernel features should be posted alongside kernel patches. This gives reviewers a chance to see how any new interface is used and how well it works. @@ -280,9 +272,9 @@ to the mailing list, e.g.:: Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). -Q: Any other tips to help ensure my net/net-next patch gets OK'd? ------------------------------------------------------------------ -A: Attention to detail. Re-read your own work as if you were the +Any other tips to help ensure my net/net-next patch gets OK'd? +-------------------------------------------------------------- +Attention to detail. Re-read your own work as if you were the reviewer. You can start with using ``checkpatch.pl``, perhaps even with the ``--strict`` flag. But do not be mindlessly robotic in doing so. If your change is a bug fix, make sure your commit log indicates the -- GitLab From 5d5647dad259bb416fd5d3d87012760386d97530 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 21 Dec 2020 06:55:30 -0800 Subject: [PATCH 1097/1894] qede: fix offload for IPIP tunnel packets IPIP tunnels packets are unknown to device, hence these packets are incorrectly parsed and caused the packet corruption, so disable offlods for such packets at run time. Signed-off-by: Manish Chopra Signed-off-by: Sudarsana Kalluru Signed-off-by: Igor Russkikh Link: https://lore.kernel.org/r/20201221145530.7771-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_fp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index a2494bf850079..ca0ee29a57b50 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1799,6 +1799,11 @@ netdev_features_t qede_features_check(struct sk_buff *skb, ntohs(udp_hdr(skb)->dest) != gnv_port)) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } else if (l4_proto == IPPROTO_IPIP) { + /* IPIP tunnels are unknown to the device or at least unsupported natively, + * offloads for them can't be done trivially, so disable them for such skb. + */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } } -- GitLab From c5b840a28b5257173f65b756cc6f3d06870c7881 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Dec 2020 14:14:59 +0100 Subject: [PATCH 1098/1894] crypto: keembay - CRYPTO_DEV_KEEMBAY_OCS_AES_SM4 should depend on ARCH_KEEMBAY The Intel Keem Bay Offload and Crypto Subsystem (OCS) is only present on Intel Keem Bay SoCs. Hence add a dependency on ARCH_KEEMBAY, to prevent asking the user about this driver when configuring a kernel without Intel Keem Bay platform support. While at it, fix a misspelling of "cipher". Fixes: 88574332451380f4 ("crypto: keembay - Add support for Keem Bay OCS AES/SM4") Signed-off-by: Geert Uytterhoeven Acked-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/keembay/Kconfig b/drivers/crypto/keembay/Kconfig index 3c16797b25b94..6f62c838a3fa0 100644 --- a/drivers/crypto/keembay/Kconfig +++ b/drivers/crypto/keembay/Kconfig @@ -1,12 +1,12 @@ config CRYPTO_DEV_KEEMBAY_OCS_AES_SM4 tristate "Support for Intel Keem Bay OCS AES/SM4 HW acceleration" - depends on OF || COMPILE_TEST + depends on ARCH_KEEMBAY || COMPILE_TEST select CRYPTO_SKCIPHER select CRYPTO_AEAD select CRYPTO_ENGINE help Support for Intel Keem Bay Offload and Crypto Subsystem (OCS) AES and - SM4 cihper hardware acceleration for use with Crypto API. + SM4 cipher hardware acceleration for use with Crypto API. Provides HW acceleration for the following transformations: cbc(aes), ctr(aes), ccm(aes), gcm(aes), cbc(sm4), ctr(sm4), ccm(sm4) -- GitLab From 167316a5a08370064d7f8b96835dae271a8ce3ee Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Thu, 17 Dec 2020 16:35:10 +0000 Subject: [PATCH 1099/1894] crypto: keembay - Add dependency on HAS_IOMEM Add dependency for CRYPTO_DEV_KEEMBAY_OCS_AES_SM4 on HAS_IOMEM to prevent build failures. Fixes: 88574332451380f4 ("crypto: keembay - Add support for Keem Bay OCS AES/SM4") Reported-by: kernel test robot Signed-off-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/keembay/Kconfig b/drivers/crypto/keembay/Kconfig index 6f62c838a3fa0..f2e17b0c4fa05 100644 --- a/drivers/crypto/keembay/Kconfig +++ b/drivers/crypto/keembay/Kconfig @@ -1,5 +1,6 @@ config CRYPTO_DEV_KEEMBAY_OCS_AES_SM4 tristate "Support for Intel Keem Bay OCS AES/SM4 HW acceleration" + depends on HAS_IOMEM depends on ARCH_KEEMBAY || COMPILE_TEST select CRYPTO_SKCIPHER select CRYPTO_AEAD -- GitLab From c0e583ab2016de8dedfb73934d4c4e8ff5bd896c Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Tue, 22 Dec 2020 13:00:24 +0000 Subject: [PATCH 1100/1894] crypto: qat - add CRYPTO_AES to Kconfig dependencies This patch includes a missing dependency (CRYPTO_AES) which may lead to an "undefined reference to `aes_expandkey'" linking error. Fixes: 5106dfeaeabe ("crypto: qat - add AES-XTS support for QAT GEN4 devices") Reported-by: kernel test robot Signed-off-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index beb379b23dc37..846a3d90b41a3 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -11,6 +11,7 @@ config CRYPTO_DEV_QAT select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 + select CRYPTO_AES select FW_LOADER config CRYPTO_DEV_QAT_DH895xCC -- GitLab From f86de9b1c0663b0a3ca2dcddec9aa910ff0fbf2c Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 23 Oct 2020 14:46:47 +0800 Subject: [PATCH 1101/1894] ALSA: hda/realtek - Fix speaker volume control on Lenovo C940 Cannot adjust speaker's volume on Lenovo C940. Applying the alc298_fixup_speaker_volume function can fix the issue. [ Additional note: C940 has I2S amp for the speaker and this needs the same initialization as Dell machines. The patch was slightly modified so that the quirk entry is moved next to the corresponding Dell quirk entry. -- tiwai ] Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/ea25b4e5c468491aa2e9d6cb1f2fced3@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index dde5ba2095415..5b03259622a8a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6289,6 +6289,7 @@ enum { ALC221_FIXUP_HP_FRONT_MIC, ALC292_FIXUP_TPT460, ALC298_FIXUP_SPK_VOLUME, + ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, ALC221_FIXUP_HP_MIC_NO_PRESENCE, @@ -7119,6 +7120,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, }, + [ALC298_FIXUP_LENOVO_SPK_VOLUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_speaker_volume, + }, [ALC295_FIXUP_DISABLE_DAC3] = { .type = HDA_FIXUP_FUNC, .v.func = alc295_fixup_disable_dac3, @@ -8126,6 +8131,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), -- GitLab From c1e8952395c1f44a6304c71401519d19ed2ac56a Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 23 Dec 2020 15:34:57 +0800 Subject: [PATCH 1102/1894] ALSA: hda/realtek - Modify Dell platform name Dell platform SSID:0x0a58 change platform name. Use the generic name instead for avoiding confusion. Fixes: 150927c3674d ("ALSA: hda/realtek - Supported Dell fixed type headset") Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/efe7c196158241aa817229df7835d645@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5b03259622a8a..f49cf0f227c44 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7890,7 +7890,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), - SND_PCI_QUIRK(0x1028, 0x0a58, "Dell Precision 3650 Tower", ALC255_FIXUP_DELL_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -- GitLab From 117ae250cfa3718f21bd07df0650dfbe3bc3a823 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Wed, 23 Dec 2020 23:04:21 +0800 Subject: [PATCH 1103/1894] bcache:remove a superfluous check in register_bcache There have no reassign the bdev after check It is IS_ERR. the double check !IS_ERR(bdev) is superfluous. After commit 4e7b5671c6a8 ("block: remove i_bdev"), "Switch the block device lookup interfaces to directly work with a dev_t so that struct block_device references are only acquired by the blkdev_get variants (and the blk-cgroup special case). This means that we now don't need an extra reference in the inode and can generally simplify handling of struct block_device to keep the lookups contained in the core block layer code." so after lookup_bdev call, there no need to do bdput. remove a superfluous check the bdev & don't call bdput after lookup_bdev. Fixes: 4e7b5671c6a8("block: remove i_bdev") Signed-off-by: Yi Li Reviewed-by: Christoph Hellwig Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 0e06d721cd8e3..a4752ac410dc4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2535,8 +2535,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); - if (!IS_ERR(bdev)) - bdput(bdev); if (attr == &ksysfs_register_quiet) goto done; } -- GitLab From 46926127d76359b46659c556df7b4aa1b6325d90 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 23 Dec 2020 23:04:22 +0800 Subject: [PATCH 1104/1894] md/bcache: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Zheng Yongjun Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 554e3afc9b688..00a520c03f417 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -404,7 +404,7 @@ STORE(__cached_dev) if (!env) return -ENOMEM; add_uevent_var(env, "DRIVER=bcache"); - add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid), + add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid); add_uevent_var(env, "CACHED_LABEL=%s", buf); kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, -- GitLab From 3557ae187c32203d1bb8b48ee1e2e7bdb23d98d5 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:53 +0000 Subject: [PATCH 1105/1894] KVM: Documentation: Add arm64 KVM_RUN error codes The API documentation states that general error codes are not detailed, but errors with specific meanings are. On arm64, KVM_RUN can return error numbers with a different meaning than what is described by POSIX or the C99 standard (as taken from man 3 errno). Absent from the newly documented error codes is ERANGE which can be returned when making a change to the EL2 stage 1 tables if the address is larger than the largest supported input address. Assuming no bugs in the implementation, that is not possible because the input addresses which are mapped are the result of applying the macro kern_hyp_va() on kernel virtual addresses. CC: Paolo Bonzini Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-2-alexandru.elisei@arm.com --- Documentation/virt/kvm/api.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e00a66d723728..4e5316ed10e90 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -380,9 +380,14 @@ This ioctl is obsolete and has been removed. Errors: - ===== ============================= + ======= ============================================================== EINTR an unmasked signal is pending - ===== ============================= + ENOEXEC the vcpu hasn't been initialized or the guest tried to execute + instructions from device memory (arm64) + ENOSYS data abort outside memslots with no syndrome info and + KVM_CAP_ARM_NISV_TO_USER not enabled (arm64) + EPERM SVE feature set but not finalized (arm64) + ======= ============================================================== This ioctl is used to run a guest virtual cpu. While there are no explicit parameters, there is an implicit parameter block that can be -- GitLab From f16570ba47ff2b3766ebeaba6f4b80ad48cfd6a1 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:54 +0000 Subject: [PATCH 1106/1894] KVM: arm64: arch_timer: Remove VGIC initialization check kvm_timer_enable() is called in kvm_vcpu_first_run_init() after kvm_vgic_map_resources() if the VGIC wasn't ready. kvm_vgic_map_resources() is the only place where kvm->arch.vgic.ready is set to true. For a v2 VGIC, kvm_vgic_map_resources() will attempt to initialize the VGIC and set the initialized flag. For a v3 VGIC, kvm_vgic_map_resources() will return an error code if the VGIC isn't already initialized. The end result is that if we've reached kvm_timer_enable(), the VGIC is initialzed and ready and vgic_initialized() will always be true, so remove this check. Signed-off-by: Alexandru Elisei Reviewed-by: Eric Auger [maz: added comment about vgic initialisation, as suggested by Eric] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-3-alexandru.elisei@arm.com --- arch/arm64/kvm/arch_timer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 32ba6fbc38141..74e0699661e90 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1129,9 +1129,10 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) goto no_vgic; - if (!vgic_initialized(vcpu->kvm)) - return -ENODEV; - + /* + * At this stage, we have the guarantee that the vgic is both + * available and initialized. + */ if (!timer_irqs_are_valid(vcpu)) { kvm_debug("incorrectly configured timer irqs\n"); return -EINVAL; -- GitLab From 1c91f06d296de4f0c27022f5ec464e047d471215 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:55 +0000 Subject: [PATCH 1107/1894] KVM: arm64: Move double-checked lock to kvm_vgic_map_resources() kvm_vgic_map_resources() is called when a VCPU if first run and it maps all the VGIC MMIO regions. To prevent double-initialization, the VGIC uses the ready variable to keep track of the state of resources and the global KVM mutex to protect against concurrent accesses. After the lock is taken, the variable is checked again in case another VCPU took the lock between the current VCPU reading ready equals false and taking the lock. The double-checked lock pattern is spread across four different functions: in kvm_vcpu_first_run_init(), in kvm_vgic_map_resource() and in vgic_{v2,v3}_map_resources(), which makes it hard to reason about and introduces minor code duplication. Consolidate the checks in kvm_vgic_map_resources(), where the lock is taken. No functional change intended. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-4-alexandru.elisei@arm.com --- arch/arm64/kvm/arm.c | 8 +++----- arch/arm64/kvm/vgic/vgic-init.c | 6 ++++++ arch/arm64/kvm/vgic/vgic-v2.c | 3 --- arch/arm64/kvm/vgic/vgic-v3.c | 3 --- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e207e4541f552..ab782c480e9a5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -580,11 +580,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the * first time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { - ret = kvm_vgic_map_resources(kvm); - if (ret) - return ret; - } + ret = kvm_vgic_map_resources(kvm); + if (ret) + return ret; } else { /* * Tell the rest of the code that there are userspace irqchip diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 32e32d67a127f..a2f4d1c85f001 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -428,7 +428,13 @@ int kvm_vgic_map_resources(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; + if (likely(vgic_ready(kvm))) + return 0; + mutex_lock(&kvm->lock); + if (vgic_ready(kvm)) + goto out; + if (!irqchip_in_kernel(kvm)) goto out; diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index ebf53a4e12963..7f38c1a936395 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -306,9 +306,6 @@ int vgic_v2_map_resources(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; - if (vgic_ready(kvm)) - goto out; - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 9cdf39a94a635..35029c5cb0f12 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -500,9 +500,6 @@ int vgic_v3_map_resources(struct kvm *kvm) int ret = 0; int c; - if (vgic_ready(kvm)) - goto out; - kvm_for_each_vcpu(c, vcpu, kvm) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; -- GitLab From de33212f768c5d9e2fe791b008cb26f92f0aa31c Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Tue, 22 Dec 2020 21:54:21 -0500 Subject: [PATCH 1108/1894] virtio_net: Fix recursive call to cpus_read_lock() virtnet_set_channels can recursively call cpus_read_lock if CONFIG_XPS and CONFIG_HOTPLUG are enabled. The path is: virtnet_set_channels - calls get_online_cpus(), which is a trivial wrapper around cpus_read_lock() netif_set_real_num_tx_queues netif_reset_xps_queues_gt netif_reset_xps_queues - calls cpus_read_lock() This call chain and potential deadlock happens when the number of TX queues is reduced. This commit the removes netif_set_real_num_[tr]x_queues calls from inside the get/put_online_cpus section, as they don't require that it be held. Fixes: 47be24796c13 ("virtio-net: fix the set affinity bug when CPU IDs are not consecutive") Signed-off-by: Jeff Dike Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20201223025421.671-1-jdike@akamai.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 052975ea0af4c..ee611d1072945 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2093,14 +2093,16 @@ static int virtnet_set_channels(struct net_device *dev, get_online_cpus(); err = _virtnet_set_queues(vi, queue_pairs); - if (!err) { - netif_set_real_num_tx_queues(dev, queue_pairs); - netif_set_real_num_rx_queues(dev, queue_pairs); - - virtnet_set_affinity(vi); + if (err) { + put_online_cpus(); + goto err; } + virtnet_set_affinity(vi); put_online_cpus(); + netif_set_real_num_tx_queues(dev, queue_pairs); + netif_set_real_num_rx_queues(dev, queue_pairs); + err: return err; } -- GitLab From c06ccf3ebb7503706ea49fd248e709287ef385a3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Dec 2020 18:45:57 +0100 Subject: [PATCH 1109/1894] ALSA: usb-audio: Fix UBSAN warnings for MIDI jacks The calculation of in_cables and out_cables bitmaps are done with the bit shift by the value from the descriptor, which is an arbitrary value, and can lead to UBSAN shift-out-of-bounds warnings. Fix it by filtering the bad descriptor values with the check of the upper bound 0x10 (the cable bitmaps are 16 bits). Reported-by: syzbot+92e45ae45543f89e8c88@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20201223174557.10249-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index c8213652470c4..0c23fa6d8525d 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1889,6 +1889,8 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi, ms_ep = find_usb_ms_endpoint_descriptor(hostep); if (!ms_ep) continue; + if (ms_ep->bNumEmbMIDIJack > 0x10) + continue; if (usb_endpoint_dir_out(ep)) { if (endpoints[epidx].out_ep) { if (++epidx >= MIDI_MAX_ENDPOINTS) { @@ -2141,6 +2143,8 @@ static int snd_usbmidi_detect_roland(struct snd_usb_midi *umidi, cs_desc[1] == USB_DT_CS_INTERFACE && cs_desc[2] == 0xf1 && cs_desc[3] == 0x02) { + if (cs_desc[4] > 0x10 || cs_desc[5] > 0x10) + continue; endpoint->in_cables = (1 << cs_desc[4]) - 1; endpoint->out_cables = (1 << cs_desc[5]) - 1; return snd_usbmidi_detect_endpoints(umidi, endpoint, 1); -- GitLab From b250bf5f924f7b42725fc9e4135aa0b667dfb119 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 09:16:13 -0600 Subject: [PATCH 1110/1894] net: ipa: fix interconnect enable bug When the core clock rate and interconnect bandwidth specifications were moved into configuration data, a copy/paste bug was introduced, causing the memory interconnect bandwidth to be set three times rather than enabling the three different interconnects. Fix this bug. Fixes: 91d02f9551501 ("net: ipa: use config data for clocking") Signed-off-by: Alex Elder Reviewed-by: Georgi Djakov Link: https://lore.kernel.org/r/20201222151613.5730-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 9dcf16f399b7a..135c393437f12 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -115,13 +115,13 @@ static int ipa_interconnect_enable(struct ipa *ipa) return ret; data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(clock->memory_path, data->average_rate, + ret = icc_set_bw(clock->imem_path, data->average_rate, data->peak_rate); if (ret) goto err_memory_path_disable; data = &clock->interconnect_data[IPA_INTERCONNECT_CONFIG]; - ret = icc_set_bw(clock->memory_path, data->average_rate, + ret = icc_set_bw(clock->config_path, data->average_rate, data->peak_rate); if (ret) goto err_imem_path_disable; -- GitLab From 35b14475257f553a7cd60ce4b2571304644f652b Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 21 Nov 2020 21:01:47 +0800 Subject: [PATCH 1111/1894] drm/amdgpu: check number of gfx ring before init cp gfx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check number of gfx ring, rather than asic type, before cp gfx engine initialization so driver just need to make sure number of gfx ring is initialized correctly in gfx early_init phase. No need to add additional asic type check everywhere when there is new asic with gfx pipe removed. Signed-off-by: Hawking Zhang Reviewed-by: Feifei Xu Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fc9bb94eaaf42..ef430f285472b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1647,7 +1647,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } /* No CPG in Arcturus */ - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); if (r) return r; @@ -3822,7 +3822,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) gfx_v9_0_enable_gui_idle_interrupt(adev, false); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { /* legacy firmware loading */ r = gfx_v9_0_cp_gfx_load_microcode(adev); if (r) @@ -3838,7 +3838,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { r = gfx_v9_0_cp_gfx_resume(adev); if (r) return r; @@ -3848,7 +3848,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { ring = &adev->gfx.gfx_ring[0]; r = amdgpu_ring_test_helper(ring); if (r) @@ -3884,7 +3884,7 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { - if (adev->asic_type != CHIP_ARCTURUS) + if (adev->gfx.num_gfx_rings) gfx_v9_0_cp_gfx_enable(adev, enable); gfx_v9_0_cp_compute_enable(adev, enable); } @@ -4025,7 +4025,7 @@ static int gfx_v9_0_soft_reset(void *handle) /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); - if (adev->asic_type != CHIP_ARCTURUS) + if (adev->gfx.num_gfx_rings) /* Disable GFX parsing/prefetching */ gfx_v9_0_cp_gfx_enable(adev, false); -- GitLab From d0f2f634f59d8f35e70644daf956bf04d2ff2d0c Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 21 Nov 2020 21:07:12 +0800 Subject: [PATCH 1112/1894] drm/amdgpu: remove unnecessary asic type check The number of crtc should be 0 for ASICs that don't have display engine. Remove the unnecessary asic type check then. Signed-off-by: Hawking Zhang Reviewed-by: Feifei Xu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e1531d97f486f..e22268f9dba73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1577,13 +1577,10 @@ static int gmc_v9_0_hw_init(void *handle) gmc_v9_0_init_golden_registers(adev); if (adev->mode_info.num_crtc) { - if (adev->asic_type != CHIP_ARCTURUS) { - /* Lockout access through VGA aperture*/ - WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - - /* disable VGA render */ - WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - } + /* Lockout access through VGA aperture*/ + WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + /* disable VGA render */ + WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); } amdgpu_device_program_register_sequence(adev, -- GitLab From 462fbeb1fcfcd35e453eeaa80d6d3d26464269fd Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 21 Nov 2020 21:58:19 +0800 Subject: [PATCH 1113/1894] drm/amdgpu: check gfx pipe availability before toggling its interrupts GUI_IDLE interrupts controlled by CP_INT_CNTL_RING0 are only applicable to me0 pipe0. For ASICs that have gfx pipe removed, don't toggle those bits. Signed-off-by: Hawking Zhang Reviewed-by: Feifei Xu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ef430f285472b..5f4805e4d04ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2633,7 +2633,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + u32 tmp; + + /* don't toggle interrupts that are only applicable + * to me0 pipe0 on AISCs that have me0 removed */ + if (!adev->gfx.num_gfx_rings) + return; + + tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); -- GitLab From ea96b12aa4fa116aa8ff4cf8de839ea65a2bb3ef Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Fri, 4 Dec 2020 10:55:13 -0500 Subject: [PATCH 1114/1894] drm/amd/display: handler not correctly checked at remove_irq_handler [why] handler is supposedly passed in as a function pointer; however, the entire struct amdgpu_dm_irq_handler_data gets from the list is used to check match. [how] use the interrupt_handler within amdgpu_dm_irq_handler_data for checking match. Signed-off-by: Qingqing Zhuo Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 357778556b06c..26ed70e5538ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -165,7 +165,10 @@ static struct list_head *remove_irq_handler(struct amdgpu_device *adev, handler = list_entry(entry, struct amdgpu_dm_irq_handler_data, list); - if (ih == handler) { + if (handler == NULL) + continue; + + if (ih == handler->handler) { /* Found our handler. Remove it from the list. */ list_del(&handler->list); handler_removed = true; -- GitLab From 2da94e2808bd7df30ace134991ed0fbd95188acd Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Mon, 7 Dec 2020 11:46:08 -0500 Subject: [PATCH 1115/1894] drm/amd/display: Interfaces for hubp blank and soft reset [WHY] HUBP blanking sequence on DCN30 requires us to check if HUBP is in blank and also toggle HUBP_DISABLE, which should instead be called HUBP_SOFT_RESET for what it does in HW. Signed-off-by: Wesley Chalmers Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 18 ++++++++++++++++++ .../gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h | 4 ++++ .../gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 2 ++ .../gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c | 2 ++ drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 2 ++ 5 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 41679ad531c54..9e796dfeac204 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1241,6 +1241,22 @@ void hubp1_vtg_sel(struct hubp *hubp, uint32_t otg_inst) REG_UPDATE(DCHUBP_CNTL, HUBP_VTG_SEL, otg_inst); } +bool hubp1_in_blank(struct hubp *hubp) +{ + uint32_t in_blank; + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_GET(DCHUBP_CNTL, HUBP_IN_BLANK, &in_blank); + return in_blank ? true : false; +} + +void hubp1_soft_reset(struct hubp *hubp, bool reset) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCHUBP_CNTL, HUBP_DISABLE, reset ? 1 : 0); +} + void hubp1_init(struct hubp *hubp) { //do nothing @@ -1272,6 +1288,8 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .dmdata_set_attributes = NULL, .dmdata_load = NULL, + .hubp_soft_reset = hubp1_soft_reset, + .hubp_in_blank = hubp1_in_blank, }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 780af5b3c16f0..a9a6ed7f4f993 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -260,6 +260,7 @@ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_IN_BLANK, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_BANKS, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\ @@ -455,6 +456,7 @@ type HUBP_VTG_SEL;\ type HUBP_UNDERFLOW_STATUS;\ type HUBP_UNDERFLOW_CLEAR;\ + type HUBP_IN_BLANK;\ type NUM_PIPES;\ type NUM_BANKS;\ type PIPE_INTERLEAVE;\ @@ -772,5 +774,7 @@ void hubp1_vready_workaround(struct hubp *hubp, void hubp1_init(struct hubp *hubp); void hubp1_read_state_common(struct hubp *hubp); +bool hubp1_in_blank(struct hubp *hubp); +void hubp1_soft_reset(struct hubp *hubp, bool reset); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index b7e44e53a3425..0df0da2e6a4d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1595,6 +1595,8 @@ static struct hubp_funcs dcn20_hubp_funcs = { .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp1_init, .validate_dml_output = hubp2_validate_dml_output, + .hubp_in_blank = hubp1_in_blank, + .hubp_soft_reset = hubp1_soft_reset, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index af462fe4260de..88ffa9ff1ed15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -509,6 +509,8 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_clear_underflow = hubp2_clear_underflow, .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp3_init, + .hubp_in_blank = hubp1_in_blank, + .hubp_soft_reset = hubp1_soft_reset, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 315e3061c5924..22f3f643ed1b8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -188,6 +188,8 @@ struct hubp_funcs { void (*set_unbounded_requesting)( struct hubp *hubp, bool enable); + bool (*hubp_in_blank)(struct hubp *hubp); + void (*hubp_soft_reset)(struct hubp *hubp, bool reset); }; -- GitLab From c2d61e309171437e042f4c859e88077fffee18e5 Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Thu, 3 Dec 2020 10:47:11 +0800 Subject: [PATCH 1116/1894] drm/amd/display: Modify the hdcp device count check condition [why] Some MST display may not report the internal panel to DEVICE_COUNT, that makes the check condition always failed. [how] To update this condition with the reported device count + 1 (because the immediate repeater's internal panel is possibly not included in DEVICE_COUNT) Signed-off-by: Martin Tsai Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c | 8 ++++++-- .../gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index f244b72e74e06..73ca49f05bd32 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -128,8 +128,12 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { - /* device count must be greater than or equal to tracked hdcp displays */ - return (get_device_count(hdcp) < get_active_display_count(hdcp)) ? + /* Some MST display may choose to report the internal panel as an HDCP RX. + * To update this condition with 1(because the immediate repeater's internal + * panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). + * Device count must be greater than or equal to tracked hdcp displays. + */ + return ((1 + get_device_count(hdcp)) < get_active_display_count(hdcp)) ? MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE : MOD_HDCP_STATUS_SUCCESS; } diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index 549c113abcf7f..a0895a7efda2c 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -207,8 +207,11 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { - /* device count must be greater than or equal to tracked hdcp displays */ - return (get_device_count(hdcp) < get_active_display_count(hdcp)) ? + /* Some MST display may choose to report the internal panel as an HDCP RX. */ + /* To update this condition with 1(because the immediate repeater's internal */ + /* panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). */ + /* Device count must be greater than or equal to tracked hdcp displays. */ + return ((1 + get_device_count(hdcp)) < get_active_display_count(hdcp)) ? MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE : MOD_HDCP_STATUS_SUCCESS; } -- GitLab From 9413b23fadad3861f5afd626ac44ef83ad8068ab Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Wed, 2 Dec 2020 20:22:13 +0800 Subject: [PATCH 1117/1894] drm/amd/display: To modify the condition in indicating branch device [why] The sink count change HPD_IRQ will be ignored if the branch device has only DP DFP. [how] To remove the port type restriction. Signed-off-by: Martin Tsai Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 6b11d4af54af4..2fc12239b22cb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3173,13 +3173,7 @@ static void get_active_converter_info( } /* DPCD 0x5 bit 0 = 1, it indicate it's branch device */ - if (ds_port.fields.PORT_TYPE == DOWNSTREAM_DP) { - link->dpcd_caps.is_branch_dev = false; - } - - else { - link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; - } + link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; switch (ds_port.fields.PORT_TYPE) { case DOWNSTREAM_VGA: -- GitLab From e8e91f9395ef13cf054860f8ccd757333d9b6d0d Mon Sep 17 00:00:00 2001 From: Rizvi Date: Wed, 2 Dec 2020 14:52:22 -0700 Subject: [PATCH 1118/1894] drm/amd/display: gradually ramp ABM intensity [Why] Need driver to pass values of backlight ramp start and ramp reduction so that intensity can be ramped down appropriately. [How] Using abm_parameters structure to get these values from driver. Signed-off-by: Rizvi Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../amd/display/modules/power/power_helpers.c | 35 +++++++++++++------ .../amd/display/modules/power/power_helpers.h | 1 + 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index cc983f6621575..4fd8bce95d843 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -82,22 +82,24 @@ struct abm_parameters { unsigned char deviation_gain; unsigned char min_knee; unsigned char max_knee; + unsigned short blRampReduction; + unsigned short blRampStart; }; static const struct abm_parameters abm_settings_config0[abm_defines_max_level] = { -// min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee - {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xe0}, - {0xde, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xdf}, - {0xb0, 0x50, 0x20, 0x00, 0xc0, 0x88, 0x78, 0x70, 0xa0}, - {0x82, 0x40, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, +// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blStart blRed + {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xe0, 0xCCCC, 0xCCCC}, + {0xde, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xdf, 0xCCCC, 0xCCCC}, + {0xb0, 0x50, 0x20, 0x00, 0xc0, 0x88, 0x78, 0x70, 0xa0, 0xCCCC, 0xCCCC}, + {0x82, 0x40, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, }; static const struct abm_parameters abm_settings_config1[abm_defines_max_level] = { -// min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee - {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0x99, 0x65, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, +// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blStart blRed + {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, + {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, + {0x99, 0x65, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, + {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, }; static const struct abm_parameters * const abm_settings[] = { @@ -662,6 +664,7 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, { struct iram_table_v_2_2 ram_table; struct abm_config_table config; + unsigned int set = params.set; bool result = false; uint32_t i, j = 0; @@ -710,6 +713,18 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, config.max_knee[i] = ram_table.max_knee[i]; } + if (params.backlight_ramping_override) { + for (i = 0; i < NUM_AGGR_LEVEL; i++) { + config.blRampReduction[i] = params.backlight_ramping_reduction; + config.blRampStart[i] = params.backlight_ramping_start; + } + } else { + for (i = 0; i < NUM_AGGR_LEVEL; i++) { + config.blRampReduction[i] = abm_settings[set][i].blRampReduction; + config.blRampStart[i] = abm_settings[set][i].blRampStart; + } + } + config.min_abm_backlight = ram_table.min_abm_backlight; #if defined(CONFIG_DRM_AMD_DC_DCN) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index fa4728d880920..6f2eecce6baa7 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -39,6 +39,7 @@ enum abm_defines { struct dmcu_iram_parameters { unsigned int *backlight_lut_array; unsigned int backlight_lut_array_size; + bool backlight_ramping_override; unsigned int backlight_ramping_reduction; unsigned int backlight_ramping_start; unsigned int min_abm_backlight; -- GitLab From cf7fc75523b32a9a119a466dcff325f1fda38c7d Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Wed, 9 Dec 2020 16:56:51 -0500 Subject: [PATCH 1119/1894] drm/amd/display: change SMU repsonse timeout to 2s. [Why] there is some garbage showing up during reboot test. Reason: SMU might handle display driver msg defered and driver will send next msg to SMU after 10ms timeout, once SMU FW handle previous msg, parameters are changed to next one, which result in a wrong value be programmed. [How] Extend timeout to 2s so SMU will have enough time to handle driver msg. Signed-off-by: Yongqiang Sun Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 11a7b583d5616..7deeec9d1c7c7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -99,7 +99,7 @@ int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned /* Trigger the message transaction by writing the message ID */ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id); - result = rn_smu_wait_for_response(clk_mgr, 10, 1000); + result = rn_smu_wait_for_response(clk_mgr, 10, 200000); ASSERT(result == VBIOSSMC_Result_OK || result == VBIOSSMC_Result_UnknownCmd); -- GitLab From e82632356d531dbc575377d594e85e65aa1293f9 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Mon, 30 Nov 2020 12:14:00 -0500 Subject: [PATCH 1120/1894] drm/amd/display: Update RN/VGH active display count workaround [WHY] Virtual signals were previously counted as a workaround to S0i2 hang which is fixed on Renoir. This blocks S0i3 diags testing. [HOW] Stop counting virtual signals as S0i2 hang is fixed on Renoir. Signed-off-by: Michael Strauss Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 9 +-------- .../gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 9 +-------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index d00b02553d62e..9aa1b63bb1619 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -75,15 +75,8 @@ int rn_get_active_display_cnt_wa( for (i = 0; i < dc->link_count; i++) { const struct dc_link *link = dc->links[i]; - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */ - if (link->connector_signal == SIGNAL_TYPE_VIRTUAL || - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + if (link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 9a8e66bba9c05..991b9c5beaa30 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -74,15 +74,8 @@ int vg_get_active_display_cnt_wa( for (i = 0; i < dc->link_count; i++) { const struct dc_link *link = dc->links[i]; - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */ - if (link->connector_signal == SIGNAL_TYPE_VIRTUAL || - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + if (link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } -- GitLab From cbac53f7fc90754b898e79ab2d5c11052ce1b640 Mon Sep 17 00:00:00 2001 From: Eryk Brol Date: Tue, 8 Dec 2020 12:52:36 -0500 Subject: [PATCH 1121/1894] drm/amd/display: Remove unnecessary NULL check [Why] new_crtc_state is already dereferenced earlier in the function [How] Remove the check Signed-off-by: Eryk Brol Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2c4dbdeec46a6..8fac6116591bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9367,7 +9367,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (ret) goto fail; - if (dm_old_crtc_state->dsc_force_changed && new_crtc_state) + if (dm_old_crtc_state->dsc_force_changed) new_crtc_state->mode_changed = true; } -- GitLab From a71e5529d2674584fda0fa09a7de4efc8e17160d Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Thu, 10 Dec 2020 12:11:32 -0500 Subject: [PATCH 1122/1894] drm/amd/display: Multi-display underflow observed [Why] FP2 programming not happening when topology changes occur with multiple displays. [How] Ensure FP2 is programmed whenever global sync changes occur but wait for VACTIVE first to avoid underflow. Signed-off-by: Aric Cyr Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 20 ------------------- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 12 ++++++++--- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7339d9855ec8c..58eb0d69873a6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2625,26 +2625,6 @@ static void commit_planes_for_stream(struct dc *dc, } } - if (update_type != UPDATE_TYPE_FAST) { - // If changing VTG FP2: wait until back in vactive to program FP2 - // Need to ensure that pipe unlock happens soon after to minimize race condition - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->top_pipe || pipe_ctx->stream != stream) - continue; - - if (!pipe_ctx->update_flags.bits.global_sync) - continue; - - pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); - pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); - - pipe_ctx->stream_res.tg->funcs->set_vtg_params( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); - } - } - if ((update_type != UPDATE_TYPE_FAST) && dc->hwss.interdependent_update_lock) dc->hwss.interdependent_update_lock(dc, context, false); else diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 31a477194d3b5..cb822df21b7c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1586,7 +1586,10 @@ static void dcn20_program_pipe( && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe) hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible); - if (pipe_ctx->update_flags.bits.global_sync) { + /* Only update TG on top pipe */ + if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe + && !pipe_ctx->prev_odm_pipe) { + pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, pipe_ctx->pipe_dlg_param.vready_offset, @@ -1594,8 +1597,11 @@ static void dcn20_program_pipe( pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->set_vtg_params( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); if (hws->funcs.setup_vupdate_interrupt) hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); @@ -2570,4 +2576,4 @@ void dcn20_set_disp_pattern_generator(const struct dc *dc, { pipe_ctx->stream_res.opp->funcs->opp_set_disp_pattern_generator(pipe_ctx->stream_res.opp, test_pattern, color_space, color_depth, solid_color, width, height, offset); -} \ No newline at end of file +} -- GitLab From 73d48f0851847268482260eb955ed8d928b7f19c Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Wed, 9 Dec 2020 14:58:59 -0500 Subject: [PATCH 1123/1894] drm/amd/display: Acquire DSC during split stream for ODM only if top_pipe [WHY] DSC should only be acquired per OPP. Therefore, DSC should only be acquired for the top_pipe when ODM is enabled. Not doing this check may lead to acquiring more DSC's than needed when doing MPO + ODM Combine. [HOW] Only acquire DSC if pipe is top_pipe. Signed-off-by: Sung Lee Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index ff36db5edf6c5..e04ecf0fc0dbc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1933,7 +1933,7 @@ bool dcn20_split_stream_for_odm( next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; else next_odm_pipe->stream_res.opp = next_odm_pipe->top_pipe->stream_res.opp; - if (next_odm_pipe->stream->timing.flags.DSC == 1) { + if (next_odm_pipe->stream->timing.flags.DSC == 1 && !next_odm_pipe->top_pipe) { dcn20_acquire_dsc(dc, res_ctx, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx); ASSERT(next_odm_pipe->stream_res.dsc); if (next_odm_pipe->stream_res.dsc == NULL) -- GitLab From 1e7445dcc17444569d9f0acce227aadf095ac989 Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Wed, 9 Dec 2020 18:00:18 -0500 Subject: [PATCH 1124/1894] drm/amd/display: updated wm table for Renoir [Why] For certain timings, Renoir may underflow due to sr exit latency being too slow. [How] Updated wm table for renoir. Signed-off-by: Jake Wang Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 9aa1b63bb1619..bad30217c7b47 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -731,32 +731,32 @@ static struct wm_table ddr4_wm_table_rn = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9.09, - .sr_enter_plus_exit_time_us = 10.14, + .sr_exit_time_us = 11.90, + .sr_enter_plus_exit_time_us = 12.80, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.12, - .sr_enter_plus_exit_time_us = 12.48, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.12, - .sr_enter_plus_exit_time_us = 12.48, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.12, - .sr_enter_plus_exit_time_us = 12.48, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, .valid = true, }, } -- GitLab From c277925cca8c534ddcf1fb0ec9b9e4ca35b1d064 Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Fri, 11 Dec 2020 15:34:30 -0500 Subject: [PATCH 1125/1894] drm/amd/display: [FW Promotion] Release 0.0.47 - restore lvtma_pwrseq_delay2 from vbios integrated info table - restore MVID/NVID after power up. - Enable timer wake up mask when enable timer interrupt. Signed-off-by: Yongqiang Sun Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index f512bda96917d..249a076d6f69c 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -47,10 +47,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0xa18e25995 +#define DMUB_FW_VERSION_GIT_HASH 0xf51b86a #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 46 +#define DMUB_FW_VERSION_REVISION 47 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 -- GitLab From 4aa9d658d21cf192fa12227591526d06fec114e0 Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Fri, 11 Dec 2020 16:53:57 -0500 Subject: [PATCH 1126/1894] drm/amd/display: always program DPPDTO unless not safe to lower [Why] We defer clock updates to after pipes have been programmed. In some instances we use DPPCLK that have been previously set to be "unused". This results in a brief window of time where underflow could occur. [How] During prepare bandwidth allow rn_update_clocks_update_dpp_dto to check each instance and compare previous clock to new clock. If new clock is higher than previous clock, program DPPDTO. Signed-off-by: Jake Wang Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index bad30217c7b47..01b1853b7750d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -227,12 +227,11 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); // always update dtos unless clock is lowered and not safe to lower - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - rn_update_clocks_update_dpp_dto( - clk_mgr, - context, - clk_mgr_base->clks.actual_dppclk_khz, - safe_to_lower); + rn_update_clocks_update_dpp_dto( + clk_mgr, + context, + clk_mgr_base->clks.actual_dppclk_khz, + safe_to_lower); } if (update_dispclk && -- GitLab From 110b055b282736e277298141c42227595408f606 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Fri, 11 Dec 2020 00:09:11 -0500 Subject: [PATCH 1127/1894] drm/amd/display: add getter routine to retrieve mpcc mux [Why & How] Add function to identify which MPCC is providing input to a specified OPP Signed-off-by: Josip Pavic Acked-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 12 ++++++++++++ drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 1 + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 4 ++++ 5 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 3fcd408e91032..a46cb20596fe8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -467,6 +467,17 @@ void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock) REG_SET(CUR[opp_id], 0, CUR_VUPDATE_LOCK_SET, lock ? 1 : 0); } +unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + uint32_t val; + + if (opp_id < MAX_OPP && REG(MUX[opp_id])) + REG_GET(MUX[opp_id], MPC_OUT_MUX, &val); + + return val; +} + static const struct mpc_funcs dcn10_mpc_funcs = { .read_mpcc_state = mpc1_read_mpcc_state, .insert_plane = mpc1_insert_plane, @@ -483,6 +494,7 @@ static const struct mpc_funcs dcn10_mpc_funcs = { .set_denorm_clamp = NULL, .set_output_csc = NULL, .set_output_gamma = NULL, + .get_mpc_out_mux = mpc1_get_mpc_out_mux, }; void dcn10_mpc_construct(struct dcn10_mpc *mpc10, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h index 66a4719c22a0c..dbfffc6383dcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h @@ -200,4 +200,5 @@ void mpc1_read_mpcc_state( void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock); +unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 99cc095dc33c7..6a99fdd55e8c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -556,6 +556,7 @@ const struct mpc_funcs dcn20_mpc_funcs = { .set_ocsc_default = mpc2_set_ocsc_default, .set_output_gamma = mpc2_set_output_gamma, .power_on_mpc_mem_pwr = mpc20_power_on_ogam_lut, + .get_mpc_out_mux = mpc1_get_mpc_out_mux, }; void dcn20_mpc_construct(struct dcn20_mpc *mpc20, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index d7d053fc6e91e..3e6f76096119c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -1428,6 +1428,7 @@ const struct mpc_funcs dcn30_mpc_funcs = { .program_3dlut = mpc3_program_3dlut, .release_rmu = mpcc3_release_rmu, .power_on_mpc_mem_pwr = mpc20_power_on_ogam_lut, + .get_mpc_out_mux = mpc1_get_mpc_out_mux, }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 879f502ae5303..75c77ad9cbfee 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -359,6 +359,10 @@ struct mpc_funcs { int (*release_rmu)(struct mpc *mpc, int mpcc_id); + unsigned int (*get_mpc_out_mux)( + struct mpc *mpc, + int opp_id); + }; #endif -- GitLab From e75a9db3c59e923f54a36870a7cc339afe9e611b Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 18 Dec 2020 12:38:50 +0800 Subject: [PATCH 1128/1894] drm/amd/pm: bump Sienna Cichlid smu_driver_if version to match latest pmfw This can suppress the annoying but unharmful prompts. Signed-off-by: Evan Quan Reviewed-by: Guchun Chen Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index e5aa0725147cb..13de692a42136 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -30,7 +30,7 @@ #define SMU11_DRIVER_IF_VERSION_NV10 0x36 #define SMU11_DRIVER_IF_VERSION_NV12 0x36 #define SMU11_DRIVER_IF_VERSION_NV14 0x36 -#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3B +#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3D #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xC #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x02 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF -- GitLab From 05211e7fbbf042dd7f51155ebe64eb2ecacb25cb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 17 Dec 2020 12:11:36 -0500 Subject: [PATCH 1129/1894] drm/amdgpu: only set DP subconnector type on DP and eDP connectors Fixes a crash in drm_object_property_set_value() because the property is not set for internal DP ports that connect to a bridge chips (e.g., DP to VGA or DP to LVDS). Bug: https://bugzilla.kernel.org/show_bug.cgi?id=210739 Fixes: 65bf2cf95d3ade ("drm/amdgpu: utilize subconnector property for DP through atombios") Tested-By: Kris Karas Cc: Oleg Vasilev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 65d1b23d7e746..b9c11c2b2885a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1414,10 +1414,12 @@ out: pm_runtime_put_autosuspend(connector->dev->dev); } - drm_dp_set_subconnector_property(&amdgpu_connector->base, - ret, - amdgpu_dig_connector->dpcd, - amdgpu_dig_connector->downstream_ports); + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector->connector_type == DRM_MODE_CONNECTOR_eDP) + drm_dp_set_subconnector_property(&amdgpu_connector->base, + ret, + amdgpu_dig_connector->dpcd, + amdgpu_dig_connector->downstream_ports); return ret; } -- GitLab From 505199a3b714aeb9d13dd0a04c33db9f5d99482a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Dec 2020 11:19:30 -0500 Subject: [PATCH 1130/1894] drm/amdgpu: Fix a copy-pasta comment This is not a scsi driver. Reviewed-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7d2f7a2240b89..1cb7d73f7317b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5069,8 +5069,7 @@ out: * @pdev: pointer to PCI device * * Called when the error recovery driver tells us that its - * OK to resume normal operation. Use completion to allow - * halted scsi ops to resume. + * OK to resume normal operation. */ void amdgpu_pci_resume(struct pci_dev *pdev) { -- GitLab From a135a1b4c4db1f3b8cbed9676a40ede39feb3362 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Tue, 10 Nov 2020 15:40:06 +0800 Subject: [PATCH 1131/1894] drm/amd/display: Fix memory leaks in S3 resume EDID parsing in S3 resume pushes new display modes to probed_modes list but doesn't consolidate to actual mode list. This creates a race condition when amdgpu_dm_connector_ddc_get_modes() re-initializes the list head without walking the list and results in memory leak. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=209987 Acked-by: Harry Wentland Acked-by: Alex Deucher Reviewed-by: Nicholas Kazlauskas Signed-off-by: Stylon Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8fac6116591bf..519080e9a2338 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2386,7 +2386,8 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); - drm_add_edid_modes(connector, aconnector->edid); + aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid); + drm_connector_list_update(connector); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, -- GitLab From 8450e23f142f629e40bd67afc8375c86c7fbf8f1 Mon Sep 17 00:00:00 2001 From: Noor Azura Ahmad Tarmizi Date: Wed, 23 Dec 2020 00:03:37 +0800 Subject: [PATCH 1132/1894] stmmac: intel: Add PCI IDs for TGL-H platform Add TGL-H PCI info and PCI IDs for the new TSN Controller to the list of supported devices. Signed-off-by: Noor Azura Ahmad Tarmizi Signed-off-by: Voon Weifeng Signed-off-by: Muhammad Husaini Zulkifli Link: https://lore.kernel.org/r/20201222160337.30870-1-muhammad.husaini.zulkifli@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index a2e80c89de2d4..9a6a519426a08 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -721,6 +721,8 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, #define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G_ID 0x4bb0 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G_ID 0x4bb1 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5_ID 0x4bb2 +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0_ID 0x43ac +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1_ID 0x43a2 #define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID 0xa0ac static const struct pci_device_id intel_eth_pci_id_table[] = { @@ -735,6 +737,8 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); -- GitLab From 94ad8f3ac6aff5acde3f6c4719997efc61e0dccf Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 12:00:10 -0600 Subject: [PATCH 1133/1894] net: ipa: clear pending interrupts before enabling We enable the completion interrupt for channel or event ring commands only when we issue them. The interrupt is disabled after the interrupt has fired, or after we have timed out waiting for it. If we time out, the command could complete after the interrupt has been disabled, causing a state change in the channel or event ring. The interrupt associated with that state change would be delivered the next time the completion interrupt is enabled. To avoid previous command completions interfering with new commands, clear all pending completion interrupts before re-enabling them for a new command. Fixes: b4175f8731f78 ("net: ipa: only enable GSI event control IRQs when needed") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index c4795249719d4..4aee60d62ab09 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -340,7 +340,13 @@ static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id, * is issued here. Only permit *this* event ring to trigger * an interrupt, and only enable the event control IRQ type * when we expect it to occur. + * + * There's a small chance that a previous command completed + * after the interrupt was disabled, so make sure we have no + * pending interrupts before we enable them. */ + iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET); + val = BIT(evt_ring_id); iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); gsi_irq_type_enable(gsi, GSI_EV_CTRL); @@ -453,7 +459,13 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) * issued here. So we only permit *this* channel to trigger * an interrupt and only enable the channel control IRQ type * when we expect it to occur. + * + * There's a small chance that a previous command completed + * after the interrupt was disabled, so make sure we have no + * pending interrupts before we enable them. */ + iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET); + val = BIT(channel_id); iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); gsi_irq_type_enable(gsi, GSI_CH_CTRL); -- GitLab From 6ffddf3b3d182d886d754cfafdf909ccb14f464b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 12:00:11 -0600 Subject: [PATCH 1134/1894] net: ipa: use state to determine channel command success The result of issuing a channel control command should be that the channel changes state. If enabled, a completion interrupt signals that the channel state has changed. This interrupt is enabled by gsi_channel_command() and disabled again after the command has completed (or we time out). There is a window of time--after the completion interrupt is disabled but before the channel state is read--during which the command could complete successfully without interrupting. This would cause the channel to transition to the desired new state. So whether a channel command ends via completion interrupt or timeout, we can consider the command successful if the channel has entered the desired state (and a failure if it has not, regardless of the cause). Fixes: d6c9e3f506ae8 ("net: ipa: only enable generic command completion IRQ when needed"); Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 4aee60d62ab09..4f0e791764237 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -505,15 +505,15 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) ret = gsi_channel_command(channel, GSI_CH_ALLOCATE); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) { - dev_err(dev, "channel %u bad state %u after alloc\n", - channel_id, state); - ret = -EIO; - } + if (state == GSI_CHANNEL_STATE_ALLOCATED) + return 0; - return ret; + dev_err(dev, "channel %u bad state %u after alloc\n", + channel_id, state); + + return -EIO; } /* Start an ALLOCATED channel */ @@ -533,15 +533,15 @@ static int gsi_channel_start_command(struct gsi_channel *channel) ret = gsi_channel_command(channel, GSI_CH_START); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_STARTED) { - dev_err(dev, "channel %u bad state %u after start\n", - gsi_channel_id(channel), state); - ret = -EIO; - } + if (state == GSI_CHANNEL_STATE_STARTED) + return 0; - return ret; + dev_err(dev, "channel %u bad state %u after start\n", + gsi_channel_id(channel), state); + + return -EIO; } /* Stop a GSI channel in STARTED state */ @@ -568,10 +568,10 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) ret = gsi_channel_command(channel, GSI_CH_STOP); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (ret || state == GSI_CHANNEL_STATE_STOPPED) - return ret; + if (state == GSI_CHANNEL_STATE_STOPPED) + return 0; /* We may have to try again if stop is in progress */ if (state == GSI_CHANNEL_STATE_STOP_IN_PROC) @@ -604,9 +604,9 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) ret = gsi_channel_command(channel, GSI_CH_RESET); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) + if (state != GSI_CHANNEL_STATE_ALLOCATED) dev_err(dev, "channel %u bad state %u after reset\n", gsi_channel_id(channel), state); } @@ -628,9 +628,10 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) ret = gsi_channel_command(channel, GSI_CH_DE_ALLOC); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_NOT_ALLOCATED) + + if (state != GSI_CHANNEL_STATE_NOT_ALLOCATED) dev_err(dev, "channel %u bad state %u after dealloc\n", channel_id, state); } -- GitLab From 428b448ee764a264b7a2eeed295b282755114aa7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 12:00:12 -0600 Subject: [PATCH 1135/1894] net: ipa: use state to determine event ring command success This patch implements the same basic fix for event rings as the previous one does for channels. The result of issuing an event ring control command should be that the event ring changes state. If enabled, a completion interrupt signals that the event ring state has changed. This interrupt is enabled by gsi_evt_ring_command() and disabled again after the command has completed (or we time out). There is a window of time during which the command could complete successfully without interrupting. This would cause the event ring to transition to the desired new state. So whether a event ring command ends via completion interrupt or timeout, we can consider the command successful if the event ring has entered the desired state (and a failure if it has not, regardless of the cause). Fixes: b4175f8731f78 ("net: ipa: only enable GSI event control IRQs when needed") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 4f0e791764237..579cc3e516775 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -384,13 +384,15 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); - if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) { - dev_err(gsi->dev, "event ring %u bad state %u after alloc\n", - evt_ring_id, evt_ring->state); - ret = -EIO; - } - return ret; + /* If successful the event ring state will have changed */ + if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) + return 0; + + dev_err(gsi->dev, "event ring %u bad state %u after alloc\n", + evt_ring_id, evt_ring->state); + + return -EIO; } /* Reset a GSI event ring in ALLOCATED or ERROR state. */ @@ -408,9 +410,13 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); - if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) - dev_err(gsi->dev, "event ring %u bad state %u after reset\n", - evt_ring_id, evt_ring->state); + + /* If successful the event ring state will have changed */ + if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) + return; + + dev_err(gsi->dev, "event ring %u bad state %u after reset\n", + evt_ring_id, evt_ring->state); } /* Issue a hardware de-allocation request for an allocated event ring */ @@ -426,9 +432,13 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); - if (!ret && evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) - dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n", - evt_ring_id, evt_ring->state); + + /* If successful the event ring state will have changed */ + if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED) + return; + + dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n", + evt_ring_id, evt_ring->state); } /* Fetch the current state of a channel from hardware */ -- GitLab From 826f328e2b7e8854dd42ea44e6519cd75018e7b1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 22 Dec 2020 22:49:44 +0100 Subject: [PATCH 1136/1894] net: dcb: Validate netlink message in DCB handler DCB uses the same handler function for both RTM_GETDCB and RTM_SETDCB messages. dcb_doit() bounces RTM_SETDCB mesasges if the user does not have the CAP_NET_ADMIN capability. However, the operation to be performed is not decided from the DCB message type, but from the DCB command. Thus DCB_CMD_*_GET commands are used for reading DCB objects, the corresponding SET and DEL commands are used for manipulation. The assumption is that set-like commands will be sent via an RTM_SETDCB message, and get-like ones via RTM_GETDCB. However, this assumption is not enforced. It is therefore possible to manipulate DCB objects without CAP_NET_ADMIN capability by sending the corresponding command in an RTM_GETDCB message. That is a bug. Fix it by validating the type of the request message against the type used for the response. Fixes: 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver") Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/a2a9b88418f3a58ef211b718f2970128ef9e3793.1608673640.git.me@pmachata.org Signed-off-by: Jakub Kicinski --- net/dcb/dcbnl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 084e159a12ba6..7d49b6fd6cef9 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1765,6 +1765,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, fn = &reply_funcs[dcb->cmd]; if (!fn->cb) return -EOPNOTSUPP; + if (fn->type != nlh->nlmsg_type) + return -EPERM; if (!tb[DCB_ATTR_IFNAME]) return -EINVAL; -- GitLab From 427c940558560bff2583d07fc119a21094675982 Mon Sep 17 00:00:00 2001 From: John Wang Date: Wed, 23 Dec 2020 13:55:23 +0800 Subject: [PATCH 1137/1894] net/ncsi: Use real net-device for response handler When aggregating ncsi interfaces and dedicated interfaces to bond interfaces, the ncsi response handler will use the wrong net device to find ncsi_dev, so that the ncsi interface will not work properly. Here, we use the original net device to fix it. Fixes: 138635cc27c9 ("net/ncsi: NCSI response packet handler") Signed-off-by: John Wang Link: https://lore.kernel.org/r/20201223055523.2069-1-wangzhiqiang.bj@bytedance.com Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-rsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 5b1f4ec66dd98..888ccc2d4e34b 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1120,7 +1120,7 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, int payload, i, ret; /* Find the NCSI device */ - nd = ncsi_find_dev(dev); + nd = ncsi_find_dev(orig_dev); ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; if (!ndp) return -ENODEV; -- GitLab From 5d41f9b7ee7a5a5138894f58846a4ffed601498a Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 23 Dec 2020 19:06:12 +0800 Subject: [PATCH 1138/1894] net: ethernet: Fix memleak in ethoc_probe When mdiobus_register() fails, priv->mdio allocated by mdiobus_alloc() has not been freed, which leads to memleak. Fixes: e7f4dc3536a4 ("mdio: Move allocation of interrupts into core") Signed-off-by: Dinghao Liu Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201223110615.31389-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ethoc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 0981fe9652e50..3d9b0b161e241 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1211,7 +1211,7 @@ static int ethoc_probe(struct platform_device *pdev) ret = mdiobus_register(priv->mdio); if (ret) { dev_err(&netdev->dev, "failed to register MDIO bus\n"); - goto free2; + goto free3; } ret = ethoc_mdio_probe(netdev); @@ -1243,6 +1243,7 @@ error2: netif_napi_del(&priv->napi); error: mdiobus_unregister(priv->mdio); +free3: mdiobus_free(priv->mdio); free2: clk_disable_unprepare(priv->clk); -- GitLab From 1f45dc22066797479072978feeada0852502e180 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 23 Dec 2020 14:49:04 -0600 Subject: [PATCH 1139/1894] ibmvnic: continue fatal error reset after passive init Commit f9c6cea0b385 ("ibmvnic: Skip fatal error reset after passive init") says "If the passive CRQ initialization occurs before the FATAL reset task is processed, the FATAL error reset task would try to access a CRQ message queue that was freed, causing an oops. The problem may be most likely to occur during DLPAR add vNIC with a non-default MTU, because the DLPAR process will automatically issue a change MTU request. Fix this by not processing fatal error reset if CRQ is passively initialized after client-driven CRQ initialization fails." The original commit skips a specific reset condition, but that does not fix the problem it claims to fix, and misses a reset condition. The effective fix is commit 0e435befaea4 ("ibmvnic: fix NULL pointer dereference in ibmvic_reset_crq") and commit a0faaa27c716 ("ibmvnic: fix NULL pointer dereference in reset_sub_crq_queues"). With above two fixes, there are no more crashes seen as described even without the original commit, so I would like to revert the original commit. Fixes: f9c6cea0b385 ("ibmvnic: Skip fatal error reset after passive init") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20201223204904.12677-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bd0281020d519..91ebcde30292c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2342,8 +2342,7 @@ static void __ibmvnic_reset(struct work_struct *work) set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } - } else if (!(rwi->reset_reason == VNIC_RESET_FATAL && - adapter->from_passive_init)) { + } else { rc = do_reset(adapter, rwi, reset_state); } kfree(rwi); -- GitLab From 808e0d8832cc81738f3e8df12dff0688352baf50 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:32 -0600 Subject: [PATCH 1140/1894] e1000e: Only run S0ix flows if shutdown succeeded If the shutdown failed, the part will be thawed and running S0ix flows will put it into an undefined state. Reported-by: Alexander Duyck Reviewed-by: Alexander Duyck Signed-off-by: Mario Limonciello Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 128ab6898070e..6588f5d4a2be8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6970,13 +6970,14 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) e1000e_pm_freeze(dev); rc = __e1000_shutdown(pdev, false); - if (rc) + if (rc) { e1000e_pm_thaw(dev); - - /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) - e1000e_s0ix_entry_flow(adapter); + } else { + /* Introduce S0ix implementation */ + if (hw->mac.type >= e1000_pch_cnp && + !e1000e_check_me(hw->adapter->pdev->device)) + e1000e_s0ix_entry_flow(adapter); + } return rc; } -- GitLab From 3cf31b1a9effd859bb3d6ff9f8b5b0d5e6cac952 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:33 -0600 Subject: [PATCH 1141/1894] e1000e: bump up timeout to wait when ME un-configures ULP mode Per guidance from Intel ethernet architecture team, it may take up to 1 second for unconfiguring ULP mode. However in practice this seems to be taking up to 2 seconds on some Lenovo machines. Detect scenarios that take more than 1 second but less than 2.5 seconds and emit a warning on resume for those scenarios. Suggested-by: Aaron Ma Suggested-by: Sasha Netfin Suggested-by: Hans de Goede CC: Mark Pearson Fixes: f15bb6dde738cc8fa0 ("e1000e: Add support for S0ix") BugLink: https://bugs.launchpad.net/bugs/1865570 Link: https://patchwork.ozlabs.org/project/intel-wired-lan/patch/20200323191639.48826-1-aaron.ma@canonical.com/ Link: https://lkml.org/lkml/2020/12/13/15 Link: https://lkml.org/lkml/2020/12/14/708 Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9aa6fad8ed477..6fb46682b058a 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1240,6 +1240,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) return 0; if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + struct e1000_adapter *adapter = hw->adapter; + bool firmware_bug = false; + if (force) { /* Request ME un-configure ULP mode in the PHY */ mac_reg = er32(H2ME); @@ -1248,16 +1251,24 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) ew32(H2ME, mac_reg); } - /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ + /* Poll up to 2.5 seconds for ME to clear ULP_CFG_DONE. + * If this takes more than 1 second, show a warning indicating a + * firmware bug + */ while (er32(FWSM) & E1000_FWSM_ULP_CFG_DONE) { - if (i++ == 30) { + if (i++ == 250) { ret_val = -E1000_ERR_PHY; goto out; } + if (i > 100 && !firmware_bug) + firmware_bug = true; usleep_range(10000, 11000); } - e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); + if (firmware_bug) + e_warn("ULP_CONFIG_DONE took %dmsec. This is a firmware bug\n", i * 10); + else + e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); if (force) { mac_reg = er32(H2ME); -- GitLab From 6cecf02e77ab9bf97e9252f9fcb8f0738a6de12c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:34 -0600 Subject: [PATCH 1142/1894] Revert "e1000e: disable s0ix entry and exit flows for ME systems" commit e086ba2fccda ("e1000e: disable s0ix entry and exit flows for ME systems") disabled s0ix flows for systems that have various incarnations of the i219-LM ethernet controller. This changed caused power consumption regressions on the following shipping Dell Comet Lake based laptops: * Latitude 5310 * Latitude 5410 * Latitude 5410 * Latitude 5510 * Precision 3550 * Latitude 5411 * Latitude 5511 * Precision 3551 * Precision 7550 * Precision 7750 This commit was introduced because of some regressions on certain Thinkpad laptops. This comment was potentially caused by an earlier commit 632fbd5eb5b0e ("e1000e: fix S0ix flows for cable connected case"). or it was possibly caused by a system not meeting platform architectural requirements for low power consumption. Other changes made in the driver with extended timeouts are expected to make the driver more impervious to platform firmware behavior. Fixes: e086ba2fccda ("e1000e: disable s0ix entry and exit flows for ME systems") Reviewed-by: Alexander Duyck Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 45 +--------------------- 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 6588f5d4a2be8..b9800ba2006ca 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -103,45 +103,6 @@ static const struct e1000_reg_info e1000_reg_info_tbl[] = { {0, NULL} }; -struct e1000e_me_supported { - u16 device_id; /* supported device ID */ -}; - -static const struct e1000e_me_supported me_supported[] = { - {E1000_DEV_ID_PCH_LPT_I217_LM}, - {E1000_DEV_ID_PCH_LPTLP_I218_LM}, - {E1000_DEV_ID_PCH_I218_LM2}, - {E1000_DEV_ID_PCH_I218_LM3}, - {E1000_DEV_ID_PCH_SPT_I219_LM}, - {E1000_DEV_ID_PCH_SPT_I219_LM2}, - {E1000_DEV_ID_PCH_LBG_I219_LM3}, - {E1000_DEV_ID_PCH_SPT_I219_LM4}, - {E1000_DEV_ID_PCH_SPT_I219_LM5}, - {E1000_DEV_ID_PCH_CNP_I219_LM6}, - {E1000_DEV_ID_PCH_CNP_I219_LM7}, - {E1000_DEV_ID_PCH_ICP_I219_LM8}, - {E1000_DEV_ID_PCH_ICP_I219_LM9}, - {E1000_DEV_ID_PCH_CMP_I219_LM10}, - {E1000_DEV_ID_PCH_CMP_I219_LM11}, - {E1000_DEV_ID_PCH_CMP_I219_LM12}, - {E1000_DEV_ID_PCH_TGP_I219_LM13}, - {E1000_DEV_ID_PCH_TGP_I219_LM14}, - {E1000_DEV_ID_PCH_TGP_I219_LM15}, - {0} -}; - -static bool e1000e_check_me(u16 device_id) -{ - struct e1000e_me_supported *id; - - for (id = (struct e1000e_me_supported *)me_supported; - id->device_id; id++) - if (device_id == id->device_id) - return true; - - return false; -} - /** * __ew32_prepare - prepare to write to MAC CSR register on certain parts * @hw: pointer to the HW structure @@ -6974,8 +6935,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) e1000e_pm_thaw(dev); } else { /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) + if (hw->mac.type >= e1000_pch_cnp) e1000e_s0ix_entry_flow(adapter); } @@ -6991,8 +6951,7 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) int rc; /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) + if (hw->mac.type >= e1000_pch_cnp) e1000e_s0ix_exit_flow(adapter); rc = __e1000_resume(pdev); -- GitLab From 3c98cbf22a96c1b12f48c1b2a4680dfe5cb280f9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:35 -0600 Subject: [PATCH 1143/1894] e1000e: Export S0ix flags to ethtool This flag can be used by an end user to disable S0ix flows on a buggy system or by an OEM for development purposes. If you need this flag to be persisted across reboots, it's suggested to use a udev rule to call adjust it until the kernel could have your configuration in a disallow list. Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 1 + drivers/net/ethernet/intel/e1000e/ethtool.c | 46 +++++++++++++++++++++ drivers/net/ethernet/intel/e1000e/netdev.c | 9 ++-- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index ba7a0f8f69376..5b2143f4b1f85 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -436,6 +436,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); #define FLAG2_DFLT_CRC_STRIPPING BIT(12) #define FLAG2_CHECK_RX_HWTSTAMP BIT(13) #define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14) +#define FLAG2_ENABLE_S0IX_FLOWS BIT(15) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 03215b0aee4bd..06442e6bef731 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -23,6 +23,13 @@ struct e1000_stats { int stat_offset; }; +static const char e1000e_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define E1000E_PRIV_FLAGS_S0IX_ENABLED BIT(0) + "s0ix-enabled", +}; + +#define E1000E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(e1000e_priv_flags_strings) + #define E1000_STAT(str, m) { \ .stat_string = str, \ .type = E1000_STATS, \ @@ -1776,6 +1783,8 @@ static int e1000e_get_sset_count(struct net_device __always_unused *netdev, return E1000_TEST_LEN; case ETH_SS_STATS: return E1000_STATS_LEN; + case ETH_SS_PRIV_FLAGS: + return E1000E_PRIV_FLAGS_STR_LEN; default: return -EOPNOTSUPP; } @@ -2097,6 +2106,10 @@ static void e1000_get_strings(struct net_device __always_unused *netdev, p += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, e1000e_priv_flags_strings, + E1000E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); + break; } } @@ -2305,6 +2318,37 @@ static int e1000e_get_ts_info(struct net_device *netdev, return 0; } +static u32 e1000e_get_priv_flags(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) + priv_flags |= E1000E_PRIV_FLAGS_S0IX_ENABLED; + + return priv_flags; +} + +static int e1000e_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + unsigned int flags2 = adapter->flags2; + + flags2 &= ~FLAG2_ENABLE_S0IX_FLOWS; + if (priv_flags & E1000E_PRIV_FLAGS_S0IX_ENABLED) { + struct e1000_hw *hw = &adapter->hw; + + if (hw->mac.type < e1000_pch_cnp) + return -EINVAL; + flags2 |= FLAG2_ENABLE_S0IX_FLOWS; + } + + if (flags2 != adapter->flags2) + adapter->flags2 = flags2; + + return 0; +} + static const struct ethtool_ops e1000_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = e1000_get_drvinfo, @@ -2336,6 +2380,8 @@ static const struct ethtool_ops e1000_ethtool_ops = { .set_eee = e1000e_set_eee, .get_link_ksettings = e1000_get_link_ksettings, .set_link_ksettings = e1000_set_link_ksettings, + .get_priv_flags = e1000e_get_priv_flags, + .set_priv_flags = e1000e_set_priv_flags, }; void e1000e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index b9800ba2006ca..e9b82c209c2df 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6923,7 +6923,6 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; int rc; e1000e_flush_lpic(pdev); @@ -6935,7 +6934,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) e1000e_pm_thaw(dev); } else { /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp) + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) e1000e_s0ix_entry_flow(adapter); } @@ -6947,11 +6946,10 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; int rc; /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp) + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) e1000e_s0ix_exit_flow(adapter); rc = __e1000_resume(pdev); @@ -7615,6 +7613,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!(adapter->flags & FLAG_HAS_AMT)) e1000e_get_hw_control(adapter); + if (hw->mac.type >= e1000_pch_cnp) + adapter->flags2 |= FLAG2_ENABLE_S0IX_FLOWS; + strlcpy(netdev->name, "eth%d", sizeof(netdev->name)); err = register_netdev(netdev); if (err) -- GitLab From 11b844b0b7c7c3dc8e8f4d0bbaad5e798351862c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 23 Dec 2020 12:06:52 -0800 Subject: [PATCH 1144/1894] selftests/bpf: Work-around EBUSY errors from hashmap update/delete 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") introduced a possibility of getting EBUSY error on lock contention, which seems to happen very deterministically in test_maps when running 1024 threads on low-CPU machine. In libbpf CI case, it's a 2 CPU VM and it's hitting this 100% of the time. Work around by retrying on EBUSY (and EAGAIN, while we are at it) after a small sleep. sched_yield() is too agressive and fails even after 20 retries, so I went with usleep(1) for backoff. Also log actual error returned to make it easier to see what's going on. Fixes: 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201223200652.3417075-1-andrii@kernel.org --- tools/testing/selftests/bpf/test_maps.c | 48 +++++++++++++++++++++---- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 0ad3e6305ff03..51adc42b2b40e 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1312,22 +1312,58 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 +#define MAP_RETRIES 20 + +static int map_update_retriable(int map_fd, const void *key, const void *value, + int flags, int attempts) +{ + while (bpf_map_update_elem(map_fd, key, value, flags)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + +static int map_delete_retriable(int map_fd, const void *key, int attempts) +{ + while (bpf_map_delete_elem(map_fd, key)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + static void test_update_delete(unsigned int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; - int i, key, value; + int i, key, value, err; for (i = fn; i < MAP_SIZE; i += TASKS) { key = value = i; if (do_update) { - assert(bpf_map_update_elem(fd, &key, &value, - BPF_NOEXIST) == 0); - assert(bpf_map_update_elem(fd, &key, &value, - BPF_EXIST) == 0); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } else { - assert(bpf_map_delete_elem(fd, &key) == 0); + err = map_delete_retriable(fd, &key, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } } } -- GitLab From 69ca310f34168eae0ada434796bfc22fb4a0fa26 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 18 Dec 2020 10:50:30 -0800 Subject: [PATCH 1145/1894] bpf: Save correct stopping point in file seq iteration On some systems, some variant of the following splat is repeatedly seen. The common factor in all traces seems to be the entry point to task_file_seq_next(). With the patch, all warnings go away. rcu: INFO: rcu_sched self-detected stall on CPU rcu: \x0926-....: (20992 ticks this GP) idle=d7e/1/0x4000000000000002 softirq=81556231/81556231 fqs=4876 \x09(t=21033 jiffies g=159148529 q=223125) NMI backtrace for cpu 26 CPU: 26 PID: 2015853 Comm: bpftool Kdump: loaded Not tainted 5.6.13-0_fbk4_3876_gd8d1f9bf80bb #1 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A12 10/08/2018 Call Trace: dump_stack+0x50/0x70 nmi_cpu_backtrace.cold.6+0x13/0x50 ? lapic_can_unplug_cpu.cold.30+0x40/0x40 nmi_trigger_cpumask_backtrace+0xba/0xca rcu_dump_cpu_stacks+0x99/0xc7 rcu_sched_clock_irq.cold.90+0x1b4/0x3aa ? tick_sched_do_timer+0x60/0x60 update_process_times+0x24/0x50 tick_sched_timer+0x37/0x70 __hrtimer_run_queues+0xfe/0x270 hrtimer_interrupt+0xf4/0x210 smp_apic_timer_interrupt+0x5e/0x120 apic_timer_interrupt+0xf/0x20 RIP: 0010:get_pid_task+0x38/0x80 Code: 89 f6 48 8d 44 f7 08 48 8b 00 48 85 c0 74 2b 48 83 c6 55 48 c1 e6 04 48 29 f0 74 19 48 8d 78 20 ba 01 00 00 00 f0 0f c1 50 20 <85> d2 74 27 78 11 83 c2 01 78 0c 48 83 c4 08 c3 31 c0 48 83 c4 08 RSP: 0018:ffffc9000d293dc8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13 RAX: ffff888637c05600 RBX: ffffc9000d293e0c RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000550 RDI: ffff888637c05620 RBP: ffffffff8284eb80 R08: ffff88831341d300 R09: ffff88822ffd8248 R10: ffff88822ffd82d0 R11: 00000000003a93c0 R12: 0000000000000001 R13: 00000000ffffffff R14: ffff88831341d300 R15: 0000000000000000 ? find_ge_pid+0x1b/0x20 task_seq_get_next+0x52/0xc0 task_file_seq_get_next+0x159/0x220 task_file_seq_next+0x4f/0xa0 bpf_seq_read+0x159/0x390 vfs_read+0x8a/0x140 ksys_read+0x59/0xd0 do_syscall_64+0x42/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f95ae73e76e Code: Bad RIP value. RSP: 002b:00007ffc02c1dbf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000000000170faa0 RCX: 00007f95ae73e76e RDX: 0000000000001000 RSI: 00007ffc02c1dc30 RDI: 0000000000000007 RBP: 00007ffc02c1ec70 R08: 0000000000000005 R09: 0000000000000006 R10: fffffffffffff20b R11: 0000000000000246 R12: 00000000019112a0 R13: 0000000000000000 R14: 0000000000000007 R15: 00000000004283c0 If unable to obtain the file structure for the current task, proceed to the next task number after the one returned from task_seq_get_next(), instead of the next task number from the original iterator. Also, save the stopping task number from task_seq_get_next() on failure in case of restarts. Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets") Signed-off-by: Jonathan Lemon Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201218185032.2464558-2-jonathan.lemon@gmail.com --- kernel/bpf/task_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 0458a40edf10a..8033ab19138a2 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -158,13 +158,14 @@ again: if (!curr_task) { info->task = NULL; info->files = NULL; + info->tid = curr_tid; return NULL; } curr_files = get_files_struct(curr_task); if (!curr_files) { put_task_struct(curr_task); - curr_tid = ++(info->tid); + curr_tid = curr_tid + 1; info->fd = 0; goto again; } -- GitLab From a61daaf351da7c8493f2586437617d60c24350b0 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 18 Dec 2020 10:50:31 -0800 Subject: [PATCH 1146/1894] bpf: Use thread_group_leader() Instead of directly comparing task->tgid and task->pid, use the thread_group_leader() helper. This helps with readability, and there should be no functional change. Signed-off-by: Jonathan Lemon Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201218185032.2464558-3-jonathan.lemon@gmail.com --- kernel/bpf/task_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 8033ab19138a2..dc4007f1843b2 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -37,7 +37,7 @@ retry: if (!task) { ++*tid; goto retry; - } else if (skip_if_dup_files && task->tgid != task->pid && + } else if (skip_if_dup_files && !thread_group_leader(task) && task->files == task->group_leader->files) { put_task_struct(task); task = NULL; -- GitLab From d2ee8447e1bed7def30bab1748c876b8bd4e0876 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 24 Dec 2020 12:45:18 +0100 Subject: [PATCH 1147/1894] coccinelle: update expiring email addresses Signed-off-by: Julia Lawall --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b516bb34a8d5a..48cd0f73484a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4324,8 +4324,8 @@ T: git git://linuxtv.org/media_tree.git F: drivers/media/pci/cobalt/ COCCINELLE/Semantic Patches (SmPL) -M: Julia Lawall -M: Gilles Muller +M: Julia Lawall +M: Gilles Muller M: Nicolas Palix M: Michal Marek L: cocci@systeme.lip6.fr (moderated for non-subscribers) -- GitLab From d8f6e5c6c83737cfdad46077e614885a3db9e809 Mon Sep 17 00:00:00 2001 From: Sumera Priyadarsini Date: Wed, 25 Nov 2020 02:02:12 +0530 Subject: [PATCH 1148/1894] scripts: coccicheck: Correct usage of make coccicheck The command "make coccicheck C=1 CHECK=scripts/coccicheck" results in the error: ./scripts/coccicheck: line 65: -1: shift count out of range This happens because every time the C variable is specified, the shell arguments need to be "shifted" in order to take only the last argument, which is the C file to test. These shell arguments mostly comprise flags that have been set in the Makefile. However, when coccicheck is specified in the make command as a rule, the number of shell arguments is zero, thus passing the invalid value -1 to the shift command, resulting in an error. Modify coccicheck to print correct usage of make coccicheck so as to avoid the error. Signed-off-by: Sumera Priyadarsini Signed-off-by: Julia Lawall --- scripts/coccicheck | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/coccicheck b/scripts/coccicheck index d7f6b7ff130ab..65fee63aeadba 100755 --- a/scripts/coccicheck +++ b/scripts/coccicheck @@ -60,6 +60,18 @@ COCCIINCLUDE=${COCCIINCLUDE// -include/ --include} if [ "$C" = "1" -o "$C" = "2" ]; then ONLINE=1 + if [[ $# -le 0 ]]; then + echo '' + echo 'Specifying both the variable "C" and rule "coccicheck" in the make +command results in a shift count error.' + echo '' + echo 'Try specifying "scripts/coccicheck" as a value for the CHECK variable instead.' + echo '' + echo 'Example: make C=2 CHECK=scripts/coccicheck drivers/net/ethernet/ethoc.o' + echo '' + exit 1 + fi + # Take only the last argument, which is the C file to test shift $(( $# - 1 )) OPTIONS="$COCCIINCLUDE $1" -- GitLab From 6e5192143ab571dbefb584edf900565098bdfd23 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:03:04 -0300 Subject: [PATCH 1149/1894] tools headers UAPI: Update epoll_pwait2 affected files To pick the changes from: b0a0c2615f6f199a ("epoll: wire up syscall epoll_pwait2") That addresses these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Willem de Bruijn Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 4 +++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index fc48c64700eb8..728752917785e 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_epoll_pwait2 441 +__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #undef __NR_syscalls -#define __NR_syscalls 441 +#define __NR_syscalls 442 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 379819244b91d..78672124d28be 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -362,6 +362,7 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 # # Due to a historical design error, certain syscalls are numbered differently -- GitLab From 7f3905f00a2025591a6883ee6880f928029b4d96 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:04:54 -0300 Subject: [PATCH 1150/1894] tools headers cpufeatures: Sync with the kernel sources To pick the changes in: 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") e1b35da5e624f8b0 ("x86: Enumerate AVX512 FP16 CPUID feature flag") That causes only these 'perf bench' objects to rebuild: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kyung Min Park Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index f5ef2d5b9231c..84b887825f126 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -237,6 +237,7 @@ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -376,6 +377,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -- GitLab From fde668244d1d8d490b5b9daf53fe4f92a6751773 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:07:36 -0300 Subject: [PATCH 1151/1894] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: Fixes: 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") That cause these changes in tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-12-21 09:09:05.593005003 -0300 +++ after 2020-12-21 09:12:48.436994802 -0300 @@ -21,7 +21,7 @@ [0x0000004f] = "PPIN", [0x00000060] = "LBR_CORE_TO", [0x00000079] = "IA32_UCODE_WRITE", - [0x0000008b] = "IA32_UCODE_REV", + [0x0000008b] = "AMD64_PATCH_LEVEL", [0x0000008C] = "IA32_SGXLEPUBKEYHASH0", [0x0000008D] = "IA32_SGXLEPUBKEYHASH1", [0x0000008E] = "IA32_SGXLEPUBKEYHASH2", @@ -286,6 +286,7 @@ [0xc0010114 - x86_AMD_V_KVM_MSRs_offset] = "VM_CR", [0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE", [0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA", + [0xc001011e - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VM_PAGE_FLUSH", [0xc001011f - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VIRT_SPEC_CTRL", [0xc0010130 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV_ES_GHCB", [0xc0010131 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV", $ The new MSR has a pattern that wasn't matched to avoid a clash with IA32_UCODE_REV, change the regex to prefer the more relevant AMD_ prefixed ones to catch this new AMD64_VM_PAGE_FLUSH MSR. Which causes these parts of tools/perf/ to be rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf This addresses this perf tools build warning: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 1 + tools/perf/trace/beauty/tracepoints/x86_msr.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2b5fc9accec48..546d6ecf0a35b 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -472,6 +472,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh index 831c02cf0586c..27ee1ea1fe941 100755 --- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh @@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h printf "static const char *x86_MSRs[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' -egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \ +egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \ sed -r "s/$regex/\2 \1/g" | sort -n | \ xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" -- GitLab From 288807fc3a5f19ed77cb8c25342323bbe58a75a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:20:52 -0300 Subject: [PATCH 1152/1894] tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: fb04a1eddb1a65b6 ("KVM: X86: Implement ring-based dirty memory tracking") That result in these change in tooling: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ cp arch/x86/include/uapi/asm/kvm.h tools/arch/x86/include/uapi/asm/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2020-12-21 11:55:45.229737066 -0300 +++ after 2020-12-21 11:55:56.379983393 -0300 @@ -90,6 +90,7 @@ [0xc0] = "CLEAR_DIRTY_LOG", [0xc1] = "GET_SUPPORTED_HV_CPUID", [0xc6] = "X86_SET_MSR_FILTER", + [0xc7] = "RESET_DIRTY_RINGS", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ Now one can use that string in filters when tracing ioctls, etc. And silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Xu Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + tools/include/uapi/linux/kvm.h | 56 ++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba86..8e76d3701db3f 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -12,6 +12,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define DE_VECTOR 0 #define DB_VECTOR 1 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index ca41220b40b8b..886802b8ffba3 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1053,6 +1054,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 #ifdef KVM_CAP_IRQ_ROUTING @@ -1511,7 +1514,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ @@ -1557,6 +1560,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1710,4 +1716,52 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + #endif /* __LINUX_KVM_H */ -- GitLab From cd97448db80e0238a819dc6b733da6ec0173cadd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 12:51:03 -0300 Subject: [PATCH 1153/1894] tools headers UAPI: Sync KVM's vmx.h header with the kernel sources To pick the changes in: bf0cd88ce363a2de ("KVM: x86: emulate wait-for-SIPI and SIPI-VMExit") That makes 'perf kvm-stat' aware of this new SIPI_SIGNAL exit reason, thus addressing the following perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Yadong Qi Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d51..ada955c5ebb60 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -32,6 +32,7 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 +#define EXIT_REASON_SIPI_SIGNAL 4 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -94,6 +95,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ -- GitLab From 9880e71cbaa8a0e826d8f144704301476b2d6cf9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 12:53:44 -0300 Subject: [PATCH 1154/1894] tools kvm headers: Update KVM headers from the kernel sources To pick the changes from: 8d14797b53f044fd ("KVM: arm64: Move 'struct kvm_arch_memory_slot' out of uapi/") That don't causes any changes in tooling, only addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Cc: Marc Zyngier Cc: Will Deacon Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 1c17c3a24411d..24223adae150c 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -156,9 +156,6 @@ struct kvm_sync_regs { __u64 device_irq_level; }; -struct kvm_arch_memory_slot { -}; - /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. -- GitLab From b71df82d05b7a38f38c4b1109c57b209b8ed43ff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 20:04:45 -0300 Subject: [PATCH 1155/1894] tools headers UAPI: Synch KVM's svm.h header with the kernel To pick up the changes from: d1949b93c60504b3 ("KVM: SVM: Add support for CR8 write traps for an SEV-ES guest") 5b51cb13160ae0ba ("KVM: SVM: Add support for CR4 write traps for an SEV-ES guest") f27ad38aac23263c ("KVM: SVM: Add support for CR0 write traps for an SEV-ES guest") 2985afbcdbb1957a ("KVM: SVM: Add support for EFER write traps for an SEV-ES guest") 291bd20d5d88814a ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Picking these new SVM exit reasons: + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ And address this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/svm.h | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index f1d8307454e0c..554f75fe013cf 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -77,10 +77,28 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_EFER_WRITE_TRAP 0x08f +#define SVM_EXIT_CR0_WRITE_TRAP 0x090 +#define SVM_EXIT_CR1_WRITE_TRAP 0x091 +#define SVM_EXIT_CR2_WRITE_TRAP 0x092 +#define SVM_EXIT_CR3_WRITE_TRAP 0x093 +#define SVM_EXIT_CR4_WRITE_TRAP 0x094 +#define SVM_EXIT_CR5_WRITE_TRAP 0x095 +#define SVM_EXIT_CR6_WRITE_TRAP 0x096 +#define SVM_EXIT_CR7_WRITE_TRAP 0x097 +#define SVM_EXIT_CR8_WRITE_TRAP 0x098 +#define SVM_EXIT_CR9_WRITE_TRAP 0x099 +#define SVM_EXIT_CR10_WRITE_TRAP 0x09a +#define SVM_EXIT_CR11_WRITE_TRAP 0x09b +#define SVM_EXIT_CR12_WRITE_TRAP 0x09c +#define SVM_EXIT_CR13_WRITE_TRAP 0x09d +#define SVM_EXIT_CR14_WRITE_TRAP 0x09e +#define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +#define SVM_EXIT_VMGEXIT 0x403 /* SEV-ES software-defined VMGEXIT events */ #define SVM_VMGEXIT_MMIO_READ 0x80000001 @@ -183,10 +201,20 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } -- GitLab From 9bad32b2c63c985fc9f04b29186974ad5bb0b74c Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 18 Dec 2020 15:59:21 +0800 Subject: [PATCH 1156/1894] perf powerpc: Move syscall.tbl check to check-headers.sh It is better to check syscall.tbl for powerpc in check-headers.sh, it is similar with commit c9b51a017065 ("perf tools: Move syscall_64.tbl check into check-headers.sh"). Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/Makefile | 7 ------- tools/perf/check-headers.sh | 1 + 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index e58d00d62f021..840ea0e59287d 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -14,7 +14,6 @@ PERF_HAVE_JITDUMP := 1 out := $(OUTPUT)arch/powerpc/include/generated/asm header32 := $(out)/syscalls_32.c header64 := $(out)/syscalls_64.c -syskrn := $(srctree)/arch/powerpc/kernel/syscalls/syscall.tbl sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl @@ -23,15 +22,9 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header64): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' '64' $(sysdef) > $@ $(header32): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' '32' $(sysdef) > $@ clean:: diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 15ecb1803fb99..3c96fd3a73705 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -144,6 +144,7 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl +check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl for i in $BEAUTY_FILES; do beauty_check $i -B -- GitLab From 22ffc3f5598d2a51e2da4ea5e07e734715bde782 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 18 Dec 2020 15:59:22 +0800 Subject: [PATCH 1157/1894] perf s390: Move syscall.tbl check into check-headers.sh It is better to check syscall.tbl for s390 in check-headers.sh, it is similar with commit c9b51a017065 ("perf tools: Move syscall_64.tbl check into check-headers.sh"). Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-3-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/Makefile | 4 ---- tools/perf/check-headers.sh | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 6ac8887be7c94..74bffbea03e20 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -12,7 +12,6 @@ PERF_HAVE_JITDUMP := 1 out := $(OUTPUT)arch/s390/include/generated/asm header := $(out)/syscalls_64.c -syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl @@ -21,9 +20,6 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ clean:: diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 3c96fd3a73705..dded93a2bc89a 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -145,6 +145,7 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl +check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl for i in $BEAUTY_FILES; do beauty_check $i -B -- GitLab From c5ef52944a2d80017092cdf6aa474b2f4d596072 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Dec 2020 13:13:16 -0300 Subject: [PATCH 1158/1894] perf tools: Update powerpc's syscall.tbl copy from the kernel sources This silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' Just make them same: cp arch/powerpc/kernel/syscalls/syscall.tbl tools/perf/arch/powerpc/entry/syscalls/syscall.tbl Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-4-git-send-email-yangtiezhu@loongson.cn [ There were updates after Tiezhu's post, so I just updated the copy ] Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/powerpc/entry/syscalls/syscall.tbl | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index b168364ac0500..f744eb5cba887 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -9,7 +9,9 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 nospu fork ppc_fork +2 32 fork ppc_fork sys_fork +2 64 fork sys_fork +2 spu fork sys_ni_syscall 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -158,7 +160,9 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 nospu clone ppc_clone +120 32 clone ppc_clone sys_clone +120 64 clone sys_clone +120 spu clone sys_ni_syscall 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -240,7 +244,9 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 nospu vfork ppc_vfork +189 32 vfork ppc_vfork sys_vfork +189 64 vfork sys_vfork +189 spu vfork sys_ni_syscall 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -316,8 +322,8 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext ppc32_swapcontext -249 64 swapcontext ppc64_swapcontext +249 32 swapcontext ppc_swapcontext compat_sys_swapcontext +249 64 swapcontext sys_swapcontext 249 spu swapcontext sys_ni_syscall 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 @@ -456,7 +462,7 @@ 361 common bpf sys_bpf 362 nospu execveat sys_execveat compat_sys_execveat 363 32 switch_endian sys_ni_syscall -363 64 switch_endian ppc_switch_endian +363 64 switch_endian sys_switch_endian 363 spu switch_endian sys_ni_syscall 364 common userfaultfd sys_userfaultfd 365 common membarrier sys_membarrier @@ -516,6 +522,12 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 nospu clone3 ppc_clone3 +435 32 clone3 ppc_clone3 sys_clone3 +435 64 clone3 sys_clone3 +435 spu clone3 sys_ni_syscall +436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -- GitLab From b27d20ab1c6a1a7738c02419c28287d260ca8036 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Dec 2020 13:24:35 -0300 Subject: [PATCH 1159/1894] perf tools: Update s390's syscall.tbl copy from the kernel sources This silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' Just make them same: cp arch/s390/kernel/syscalls/syscall.tbl tools/perf/arch/s390/entry/syscalls/syscall.tbl Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-5-git-send-email-yangtiezhu@loongson.cn [ There were updates after Tiezhu's post, so I just updated the copy ] Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/arch/s390/entry/syscalls/syscall.tbl | 396 ++++++++++-------- 1 file changed, 226 insertions(+), 170 deletions(-) diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index d2fa9647ce252..d443423495e56 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -15,86 +15,86 @@ 5 common open sys_open compat_sys_open 6 common close sys_close sys_close 7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat compat_sys_creat -9 common link sys_link compat_sys_link -10 common unlink sys_unlink compat_sys_unlink +8 common creat sys_creat sys_creat +9 common link sys_link sys_link +10 common unlink sys_unlink sys_unlink 11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir compat_sys_chdir -13 32 time - compat_sys_time -14 common mknod sys_mknod compat_sys_mknod -15 common chmod sys_chmod compat_sys_chmod -16 32 lchown - compat_sys_s390_lchown16 +12 common chdir sys_chdir sys_chdir +13 32 time - sys_time32 +14 common mknod sys_mknod sys_mknod +15 common chmod sys_chmod sys_chmod +16 32 lchown - sys_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid -21 common mount sys_mount -22 common umount sys_oldumount compat_sys_oldumount -23 32 setuid - compat_sys_s390_setuid16 -24 32 getuid - compat_sys_s390_getuid16 -25 32 stime - compat_sys_stime +21 common mount sys_mount sys_mount +22 common umount sys_oldumount sys_oldumount +23 32 setuid - sys_setuid16 +24 32 getuid - sys_getuid16 +25 32 stime - sys_stime32 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm sys_alarm 29 common pause sys_pause sys_pause -30 common utime sys_utime compat_sys_utime -33 common access sys_access compat_sys_access +30 common utime sys_utime sys_utime32 +33 common access sys_access sys_access 34 common nice sys_nice sys_nice 36 common sync sys_sync sys_sync 37 common kill sys_kill sys_kill -38 common rename sys_rename compat_sys_rename -39 common mkdir sys_mkdir compat_sys_mkdir -40 common rmdir sys_rmdir compat_sys_rmdir +38 common rename sys_rename sys_rename +39 common mkdir sys_mkdir sys_mkdir +40 common rmdir sys_rmdir sys_rmdir 41 common dup sys_dup sys_dup -42 common pipe sys_pipe compat_sys_pipe +42 common pipe sys_pipe sys_pipe 43 common times sys_times compat_sys_times -45 common brk sys_brk compat_sys_brk -46 32 setgid - compat_sys_s390_setgid16 -47 32 getgid - compat_sys_s390_getgid16 -48 common signal sys_signal compat_sys_signal -49 32 geteuid - compat_sys_s390_geteuid16 -50 32 getegid - compat_sys_s390_getegid16 -51 common acct sys_acct compat_sys_acct -52 common umount2 sys_umount compat_sys_umount +45 common brk sys_brk sys_brk +46 32 setgid - sys_setgid16 +47 32 getgid - sys_getgid16 +48 common signal sys_signal sys_signal +49 32 geteuid - sys_geteuid16 +50 32 getegid - sys_getegid16 +51 common acct sys_acct sys_acct +52 common umount2 sys_umount sys_umount 54 common ioctl sys_ioctl compat_sys_ioctl 55 common fcntl sys_fcntl compat_sys_fcntl 57 common setpgid sys_setpgid sys_setpgid 60 common umask sys_umask sys_umask -61 common chroot sys_chroot compat_sys_chroot +61 common chroot sys_chroot sys_chroot 62 common ustat sys_ustat compat_sys_ustat 63 common dup2 sys_dup2 sys_dup2 64 common getppid sys_getppid sys_getppid 65 common getpgrp sys_getpgrp sys_getpgrp 66 common setsid sys_setsid sys_setsid 67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - compat_sys_s390_setreuid16 -71 32 setregid - compat_sys_s390_setregid16 -72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend +70 32 setreuid - sys_setreuid16 +71 32 setregid - sys_setregid16 +72 common sigsuspend sys_sigsuspend sys_sigsuspend 73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname compat_sys_sethostname +74 common sethostname sys_sethostname sys_sethostname 75 common setrlimit sys_setrlimit compat_sys_setrlimit 76 32 getrlimit - compat_sys_old_getrlimit 77 common getrusage sys_getrusage compat_sys_getrusage 78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday 79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - compat_sys_s390_getgroups16 -81 32 setgroups - compat_sys_s390_setgroups16 -83 common symlink sys_symlink compat_sys_symlink -85 common readlink sys_readlink compat_sys_readlink -86 common uselib sys_uselib compat_sys_uselib -87 common swapon sys_swapon compat_sys_swapon -88 common reboot sys_reboot compat_sys_reboot +80 32 getgroups - sys_getgroups16 +81 32 setgroups - sys_setgroups16 +83 common symlink sys_symlink sys_symlink +85 common readlink sys_readlink sys_readlink +86 common uselib sys_uselib sys_uselib +87 common swapon sys_swapon sys_swapon +88 common reboot sys_reboot sys_reboot 89 common readdir - compat_sys_old_readdir 90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap compat_sys_munmap +91 common munmap sys_munmap sys_munmap 92 common truncate sys_truncate compat_sys_truncate 93 common ftruncate sys_ftruncate compat_sys_ftruncate 94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - compat_sys_s390_fchown16 +95 32 fchown - sys_fchown16 96 common getpriority sys_getpriority sys_getpriority 97 common setpriority sys_setpriority sys_setpriority 99 common statfs sys_statfs compat_sys_statfs 100 common fstatfs sys_fstatfs compat_sys_fstatfs 101 32 ioperm - - 102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog compat_sys_syslog +103 common syslog sys_syslog sys_syslog 104 common setitimer sys_setitimer compat_sys_setitimer 105 common getitimer sys_getitimer compat_sys_getitimer 106 common stat sys_newstat compat_sys_newstat @@ -104,76 +104,76 @@ 111 common vhangup sys_vhangup sys_vhangup 112 common idle - - 114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff compat_sys_swapoff +115 common swapoff sys_swapoff sys_swapoff 116 common sysinfo sys_sysinfo compat_sys_sysinfo 117 common ipc sys_s390_ipc compat_sys_s390_ipc 118 common fsync sys_fsync sys_fsync 119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone compat_sys_clone -121 common setdomainname sys_setdomainname compat_sys_setdomainname -122 common uname sys_newuname compat_sys_newuname -124 common adjtimex sys_adjtimex compat_sys_adjtimex -125 common mprotect sys_mprotect compat_sys_mprotect +120 common clone sys_clone sys_clone +121 common setdomainname sys_setdomainname sys_setdomainname +122 common uname sys_newuname sys_newuname +124 common adjtimex sys_adjtimex sys_adjtimex_time32 +125 common mprotect sys_mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 common create_module - - -128 common init_module sys_init_module compat_sys_init_module -129 common delete_module sys_delete_module compat_sys_delete_module +128 common init_module sys_init_module sys_init_module +129 common delete_module sys_delete_module sys_delete_module 130 common get_kernel_syms - - -131 common quotactl sys_quotactl compat_sys_quotactl +131 common quotactl sys_quotactl sys_quotactl 132 common getpgid sys_getpgid sys_getpgid 133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_bdflush compat_sys_bdflush -135 common sysfs sys_sysfs compat_sys_sysfs +134 common bdflush sys_bdflush sys_bdflush +135 common sysfs sys_sysfs sys_sysfs 136 common personality sys_s390_personality sys_s390_personality 137 common afs_syscall - - -138 32 setfsuid - compat_sys_s390_setfsuid16 -139 32 setfsgid - compat_sys_s390_setfsgid16 -140 32 _llseek - compat_sys_llseek +138 32 setfsuid - sys_setfsuid16 +139 32 setfsgid - sys_setfsgid16 +140 32 _llseek - sys_llseek 141 common getdents sys_getdents compat_sys_getdents 142 32 _newselect - compat_sys_select 142 64 select sys_select - 143 common flock sys_flock sys_flock -144 common msync sys_msync compat_sys_msync -145 common readv sys_readv -146 common writev sys_writev +144 common msync sys_msync sys_msync +145 common readv sys_readv sys_readv +146 common writev sys_writev sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync 149 common _sysctl - - -150 common mlock sys_mlock compat_sys_mlock -151 common munlock sys_munlock compat_sys_munlock +150 common mlock sys_mlock sys_mlock +151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall 153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam -155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler +154 common sched_setparam sys_sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler 157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler 158 common sched_yield sys_sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep -163 common mremap sys_mremap compat_sys_mremap -164 32 setresuid - compat_sys_s390_setresuid16 -165 32 getresuid - compat_sys_s390_getresuid16 +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 +163 common mremap sys_mremap sys_mremap +164 32 setresuid - sys_setresuid16 +165 32 getresuid - sys_getresuid16 167 common query_module - - -168 common poll sys_poll compat_sys_poll +168 common poll sys_poll sys_poll 169 common nfsservctl - - -170 32 setresgid - compat_sys_s390_setresgid16 -171 32 getresgid - compat_sys_s390_getresgid16 -172 common prctl sys_prctl compat_sys_prctl +170 32 setresgid - sys_setresgid16 +171 32 getresgid - sys_getresgid16 +172 common prctl sys_prctl sys_prctl 173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common pread64 sys_pread64 compat_sys_s390_pread64 181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - compat_sys_s390_chown16 -183 common getcwd sys_getcwd compat_sys_getcwd -184 common capget sys_capget compat_sys_capget -185 common capset sys_capset compat_sys_capset +182 32 chown - sys_chown16 +183 common getcwd sys_getcwd sys_getcwd +184 common capget sys_capget sys_capget +185 common capset sys_capset sys_capset 186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack 187 common sendfile sys_sendfile64 compat_sys_sendfile 188 common getpmsg - - @@ -187,7 +187,7 @@ 195 32 stat64 - compat_sys_s390_stat64 196 32 lstat64 - compat_sys_s390_lstat64 197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - compat_sys_lchown +198 32 lchown32 - sys_lchown 198 64 lchown sys_lchown - 199 32 getuid32 - sys_getuid 199 64 getuid sys_getuid - @@ -201,21 +201,21 @@ 203 64 setreuid sys_setreuid - 204 32 setregid32 - sys_setregid 204 64 setregid sys_setregid - -205 32 getgroups32 - compat_sys_getgroups +205 32 getgroups32 - sys_getgroups 205 64 getgroups sys_getgroups - -206 32 setgroups32 - compat_sys_setgroups +206 32 setgroups32 - sys_setgroups 206 64 setgroups sys_setgroups - 207 32 fchown32 - sys_fchown 207 64 fchown sys_fchown - 208 32 setresuid32 - sys_setresuid 208 64 setresuid sys_setresuid - -209 32 getresuid32 - compat_sys_getresuid +209 32 getresuid32 - sys_getresuid 209 64 getresuid sys_getresuid - 210 32 setresgid32 - sys_setresgid 210 64 setresgid sys_setresgid - -211 32 getresgid32 - compat_sys_getresgid +211 32 getresgid32 - sys_getresgid 211 64 getresgid sys_getresgid - -212 32 chown32 - compat_sys_chown +212 32 chown32 - sys_chown 212 64 chown sys_chown - 213 32 setuid32 - sys_setuid 213 64 setuid sys_setuid - @@ -225,166 +225,222 @@ 215 64 setfsuid sys_setfsuid - 216 32 setfsgid32 - sys_setfsgid 216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root compat_sys_pivot_root -218 common mincore sys_mincore compat_sys_mincore -219 common madvise sys_madvise compat_sys_madvise -220 common getdents64 sys_getdents64 compat_sys_getdents64 +217 common pivot_root sys_pivot_root sys_pivot_root +218 common mincore sys_mincore sys_mincore +219 common madvise sys_madvise sys_madvise +220 common getdents64 sys_getdents64 sys_getdents64 221 32 fcntl64 - compat_sys_fcntl64 222 common readahead sys_readahead compat_sys_s390_readahead 223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr compat_sys_setxattr -225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr -226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr -227 common getxattr sys_getxattr compat_sys_getxattr -228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr -229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr -230 common listxattr sys_listxattr compat_sys_listxattr -231 common llistxattr sys_llistxattr compat_sys_llistxattr -232 common flistxattr sys_flistxattr compat_sys_flistxattr -233 common removexattr sys_removexattr compat_sys_removexattr -234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr -235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr +224 common setxattr sys_setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr sys_listxattr +231 common llistxattr sys_llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr sys_flistxattr +233 common removexattr sys_removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr sys_fremovexattr 236 common gettid sys_gettid sys_gettid 237 common tkill sys_tkill sys_tkill -238 common futex sys_futex compat_sys_futex +238 common futex sys_futex sys_futex_time32 239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 common tgkill sys_tgkill sys_tgkill 243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy compat_sys_io_destroy -245 common io_getevents sys_io_getevents compat_sys_io_getevents +244 common io_destroy sys_io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents sys_io_getevents_time32 246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel compat_sys_io_cancel +247 common io_cancel sys_io_cancel sys_io_cancel 248 common exit_group sys_exit_group sys_exit_group 249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl -251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait -252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address +250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address sys_set_tid_address 253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime compat_sys_timer_settime -256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +255 common timer_settime sys_timer_settime sys_timer_settime32 +256 common timer_gettime sys_timer_gettime sys_timer_gettime32 257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun 258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime compat_sys_clock_settime -260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -261 common clock_getres sys_clock_getres compat_sys_clock_getres -262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +259 common clock_settime sys_clock_settime sys_clock_settime32 +260 common clock_gettime sys_clock_gettime sys_clock_gettime32 +261 common clock_getres sys_clock_getres sys_clock_getres_time32 +262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages +267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages 268 common mbind sys_mbind compat_sys_mbind 269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +272 common mq_unlink sys_mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 275 common mq_notify sys_mq_notify compat_sys_mq_notify 276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key compat_sys_add_key -279 common request_key sys_request_key compat_sys_request_key +278 common add_key sys_add_key sys_add_key +279 common request_key sys_request_key sys_request_key 280 common keyctl sys_keyctl compat_sys_keyctl 281 common waitid sys_waitid compat_sys_waitid 282 common ioprio_set sys_ioprio_set sys_ioprio_set 283 common ioprio_get sys_ioprio_get sys_ioprio_get 284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch +285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch 287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages 288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat compat_sys_mkdirat -290 common mknodat sys_mknodat compat_sys_mknodat -291 common fchownat sys_fchownat compat_sys_fchownat -292 common futimesat sys_futimesat compat_sys_futimesat +289 common mkdirat sys_mkdirat sys_mkdirat +290 common mknodat sys_mknodat sys_mknodat +291 common fchownat sys_fchownat sys_fchownat +292 common futimesat sys_futimesat sys_futimesat_time32 293 32 fstatat64 - compat_sys_s390_fstatat64 293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat compat_sys_unlinkat -295 common renameat sys_renameat compat_sys_renameat -296 common linkat sys_linkat compat_sys_linkat -297 common symlinkat sys_symlinkat compat_sys_symlinkat -298 common readlinkat sys_readlinkat compat_sys_readlinkat -299 common fchmodat sys_fchmodat compat_sys_fchmodat -300 common faccessat sys_faccessat compat_sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6 -302 common ppoll sys_ppoll compat_sys_ppoll -303 common unshare sys_unshare compat_sys_unshare +294 common unlinkat sys_unlinkat sys_unlinkat +295 common renameat sys_renameat sys_renameat +296 common linkat sys_linkat sys_linkat +297 common symlinkat sys_symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat sys_fchmodat +300 common faccessat sys_faccessat sys_faccessat +301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +302 common ppoll sys_ppoll compat_sys_ppoll_time32 +303 common unshare sys_unshare sys_unshare 304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice compat_sys_splice +306 common splice sys_splice sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee compat_sys_tee +308 common tee sys_tee sys_tee 309 common vmsplice sys_vmsplice sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages -311 common getcpu sys_getcpu compat_sys_getcpu +311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes compat_sys_utimes +313 common utimes sys_utimes sys_utimes_time32 314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat compat_sys_utimensat +315 common utimensat sys_utimensat sys_utimensat_time32 316 common signalfd sys_signalfd compat_sys_signalfd 317 common timerfd - - 318 common eventfd sys_eventfd sys_eventfd 319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 323 common eventfd2 sys_eventfd2 sys_eventfd2 324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 compat_sys_pipe2 +325 common pipe2 sys_pipe2 sys_pipe2 326 common dup3 sys_dup3 sys_dup3 327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 328 common preadv sys_preadv compat_sys_preadv 329 common pwritev sys_pwritev compat_sys_pwritev 330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open +331 common perf_event_open sys_perf_event_open sys_perf_event_open 332 common fanotify_init sys_fanotify_init sys_fanotify_init 333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 compat_sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at +334 common prlimit64 sys_prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at 336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns 340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv 341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp compat_sys_kcmp -344 common finit_module sys_finit_module compat_sys_finit_module -345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr -346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr -347 common renameat2 sys_renameat2 compat_sys_renameat2 -348 common seccomp sys_seccomp compat_sys_seccomp -349 common getrandom sys_getrandom compat_sys_getrandom -350 common memfd_create sys_memfd_create compat_sys_memfd_create -351 common bpf sys_bpf compat_sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read +343 common kcmp sys_kcmp sys_kcmp +344 common finit_module sys_finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 sys_renameat2 +348 common seccomp sys_seccomp sys_seccomp +349 common getrandom sys_getrandom sys_getrandom +350 common memfd_create sys_memfd_create sys_memfd_create +351 common bpf sys_bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read 354 common execveat sys_execveat compat_sys_execveat 355 common userfaultfd sys_userfaultfd sys_userfaultfd 356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair compat_sys_socketpair -361 common bind sys_bind compat_sys_bind -362 common connect sys_connect compat_sys_connect +360 common socketpair sys_socketpair sys_socketpair +361 common bind sys_bind sys_bind +362 common connect sys_connect sys_connect 363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 compat_sys_accept4 +364 common accept4 sys_accept4 sys_accept4 365 common getsockopt sys_getsockopt sys_getsockopt 366 common setsockopt sys_setsockopt sys_setsockopt -367 common getsockname sys_getsockname compat_sys_getsockname -368 common getpeername sys_getpeername compat_sys_getpeername -369 common sendto sys_sendto compat_sys_sendto +367 common getsockname sys_getsockname sys_getsockname +368 common getpeername sys_getpeername sys_getpeername +369 common sendto sys_sendto sys_sendto 370 common sendmsg sys_sendmsg compat_sys_sendmsg 371 common recvfrom sys_recvfrom compat_sys_recvfrom 372 common recvmsg sys_recvmsg compat_sys_recvmsg 373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 compat_sys_mlock2 -375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range +374 common mlock2 sys_mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range sys_copy_file_range 376 common preadv2 sys_preadv2 compat_sys_preadv2 377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage -379 common statx sys_statx compat_sys_statx -380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi +378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx sys_statx +380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +383 common rseq sys_rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free sys_pkey_free +# room for arch specific syscalls +392 64 semtimedop sys_semtimedop - +393 common semget sys_semget sys_semget +394 common semctl sys_semctl compat_sys_semctl +395 common shmget sys_shmget sys_shmget +396 common shmctl sys_shmctl compat_sys_shmctl +397 common shmat sys_shmat compat_sys_shmat +398 common shmdt sys_shmdt sys_shmdt +399 common msgget sys_msgget sys_msgget +400 common msgsnd sys_msgsnd compat_sys_msgsnd +401 common msgrcv sys_msgrcv compat_sys_msgrcv +402 common msgctl sys_msgctl compat_sys_msgctl +403 32 clock_gettime64 - sys_clock_gettime +404 32 clock_settime64 - sys_clock_settime +405 32 clock_adjtime64 - sys_clock_adjtime +406 32 clock_getres_time64 - sys_clock_getres +407 32 clock_nanosleep_time64 - sys_clock_nanosleep +408 32 timer_gettime64 - sys_timer_gettime +409 32 timer_settime64 - sys_timer_settime +410 32 timerfd_gettime64 - sys_timerfd_gettime +411 32 timerfd_settime64 - sys_timerfd_settime +412 32 utimensat_time64 - sys_utimensat +413 32 pselect6_time64 - compat_sys_pselect6_time64 +414 32 ppoll_time64 - compat_sys_ppoll_time64 +416 32 io_pgetevents_time64 - sys_io_pgetevents +417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 +418 32 mq_timedsend_time64 - sys_mq_timedsend +419 32 mq_timedreceive_time64 - sys_mq_timedreceive +420 32 semtimedop_time64 - sys_semtimedop +421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 +422 32 futex_time64 - sys_futex +423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree sys_open_tree +429 common move_mount sys_move_mount sys_move_mount +430 common fsopen sys_fsopen sys_fsopen +431 common fsconfig sys_fsconfig sys_fsconfig +432 common fsmount sys_fsmount sys_fsmount +433 common fspick sys_fspick sys_fspick +434 common pidfd_open sys_pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 sys_clone3 +436 common close_range sys_close_range sys_close_range +437 common openat2 sys_openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -- GitLab From 23331eeb731a503aaa74d167055eeedc2073ff09 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:17 +0200 Subject: [PATCH 1160/1894] perf tests: Improve topology test to check all aggregation types Improve the topology test to check all aggregation types. This is to lock down the behaviour before 'id' is changed into a struct in later commits. Committer testing: $ perf test topology 41: Session topology: Ok $ $ perf test -v topology 41: Session topology: --- start --- test child forked, pid 965552 templ file: /tmp/perf-test-mO7NtI Problems creating module maps, continuing anyway... CPU 0, core 0, socket 0 CPU 1, core 1, socket 0 CPU 2, core 2, socket 0 CPU 3, core 4, socket 0 CPU 4, core 5, socket 0 CPU 5, core 6, socket 0 CPU 6, core 8, socket 0 CPU 7, core 9, socket 0 CPU 8, core 10, socket 0 CPU 9, core 12, socket 0 CPU 10, core 13, socket 0 CPU 11, core 14, socket 0 CPU 12, core 0, socket 0 CPU 13, core 1, socket 0 CPU 14, core 2, socket 0 CPU 15, core 4, socket 0 CPU 16, core 5, socket 0 CPU 17, core 6, socket 0 CPU 18, core 8, socket 0 CPU 19, core 9, socket 0 CPU 20, core 10, socket 0 CPU 21, core 12, socket 0 CPU 22, core 13, socket 0 CPU 23, core 14, socket 0 test child finished with 0 ---- end ---- Session topology: Ok $ Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 53 ++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 165feedc78634..f988b3109afc2 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -64,10 +64,11 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) .path = path, .mode = PERF_DATA_MODE_READ, }; - int i; + int i, id; session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); + cpu__setup_cpunode_map(); /* On platforms with large numbers of CPUs process_cpu_topology() * might issue an error while reading the perf.data file section @@ -85,11 +86,18 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * "socket_id number is too big. You may need to upgrade the * perf tool." * - * This is the reason why this test might be skipped. + * This is the reason why this test might be skipped. aarch64 and + * s390 always write this part of the header, even when the above + * condition is true (see do_core_id_test in header.c). So always + * run this test on those platforms. */ - if (!session->header.env.cpu) + if (!session->header.env.cpu + && strncmp(session->header.env.arch, "s390", 4) + && strncmp(session->header.env.arch, "aarch64", 7)) return TEST_SKIP; + TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); + for (i = 0; i < session->header.env.nr_cpus_avail; i++) { if (!cpu_map__has(map, i)) continue; @@ -98,14 +106,45 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[i].socket_id); } + // Test that core ID contains socket, die and core + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_core(map, i, NULL); + TEST_ASSERT_VAL("Core map - Core ID doesn't match", + session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id)); + + TEST_ASSERT_VAL("Core map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == + cpu_map__id_to_socket(id)); + + TEST_ASSERT_VAL("Core map - Die ID doesn't match", + session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id)); + } + + // Test that die ID contains socket and die for (i = 0; i < map->nr; i++) { - TEST_ASSERT_VAL("Core ID doesn't match", - (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff))); + id = cpu_map__get_die(map, i, NULL); + TEST_ASSERT_VAL("Die map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == + cpu_map__id_to_socket(id << 16)); + + TEST_ASSERT_VAL("Die map - Die ID doesn't match", + session->header.env.cpu[map->map[i]].die_id == + cpu_map__id_to_die(id << 16)); + } - TEST_ASSERT_VAL("Socket ID doesn't match", - (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL))); + // Test that socket ID contains only socket + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_socket(map, i, NULL); + TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == id); } + // Test that node ID contains only node + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_node(map, i, NULL); + TEST_ASSERT_VAL("Node map - Node ID doesn't match", + cpu__get_node(map->map[i]) == id); + } perf_session__delete(session); return 0; -- GitLab From 91585846f105ef2e3f479a5124a264ebb770f6ab Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:18 +0200 Subject: [PATCH 1161/1894] perf cpumap: Use existing allocator to avoid using malloc Use the existing allocator for perf_cpu_map to avoid use of raw malloc. This could cause an issue in later commits where the size of perf_cpu_map is changed. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index dc5c5e6fc502d..20e3a75953fce 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -132,15 +132,16 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, int (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data) { - struct perf_cpu_map *c; int nr = cpus->nr; + struct perf_cpu_map *c = perf_cpu_map__empty_new(nr); int cpu, s1, s2; - /* allocate as much as possible */ - c = calloc(1, sizeof(*c) + nr * sizeof(int)); if (!c) return -1; + /* Reset size as it may only be partially filled */ + c->nr = 0; + for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { @@ -155,7 +156,6 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); - refcount_set(&c->refcnt, 1); *res = c; return 0; } -- GitLab From fa265e59b81a09fa3d88f3322b1e44d583cac9b0 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:19 +0200 Subject: [PATCH 1162/1894] perf cpumap: Add new struct for cpu aggregation This struct currently has only a single int member so that it can be used as a drop in replacement for the existing behaviour. Comparison and constructor functions have also been added that will replace usages of '==' and '= -1'. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 18 ++++++++++++++++++ tools/perf/util/cpumap.h | 8 ++++++++ 2 files changed, 26 insertions(+) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 20e3a75953fce..8624948b4f1df 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -586,3 +586,21 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ return online; } + +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) +{ + return a.id == b.id; +} + +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) +{ + return a.id == -1; +} + +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) +{ + struct aggr_cpu_id ret = { + .id = -1 + }; + return ret; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 3a442f0214684..1cdccc69cd4b1 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -7,6 +7,10 @@ #include #include +struct aggr_cpu_id { + int id; +}; + struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); @@ -64,4 +68,8 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b); +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); + #endif /* __PERF_CPUMAP_H */ -- GitLab From 2760f5a14fe7aa466e38bbb92d0284fffc0e4da0 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:20 +0200 Subject: [PATCH 1163/1894] perf stat: Replace aggregation ID with a struct Replace all occurences of the usage of int with the new struct cpu_aggr_id. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 76 +++++++++++++---------- tools/perf/tests/topology.c | 17 +++--- tools/perf/util/cpumap.c | 82 ++++++++++++++----------- tools/perf/util/cpumap.h | 10 +-- tools/perf/util/stat-display.c | 108 +++++++++++++++++++-------------- tools/perf/util/stat.c | 2 +- tools/perf/util/stat.h | 5 +- 7 files changed, 173 insertions(+), 127 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 89c32692f40c7..1ff76d36d1c92 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1186,65 +1186,67 @@ static struct option stat_options[] = { OPT_END() }; -static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_socket(map, cpu, NULL); } -static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_die(map, cpu, NULL); } -static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_core(map, cpu, NULL); } -static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_node(map, cpu, NULL); } -static int perf_stat__get_aggr(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) { int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx >= map->nr) - return -1; + return id; cpu = map->map[idx]; if (config->cpus_aggr_map->map[cpu] == -1) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); + config->cpus_aggr_map->map[cpu] = get_id(config, map, idx).id; - return config->cpus_aggr_map->map[cpu]; + id.id = config->cpus_aggr_map->map[cpu]; + return id; } -static int perf_stat__get_socket_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } -static int perf_stat__get_die_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); } -static int perf_stat__get_core_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } -static int perf_stat__get_node_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); @@ -1345,18 +1347,23 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *m return cpu; } -static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + + if (cpu != -1) + id.id = env->cpu[cpu].socket_id; - return cpu == -1 ? -1 : env->cpu[cpu].socket_id; + return id; } -static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* @@ -1366,21 +1373,22 @@ static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) * socket + die id */ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + id.id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); } - return die_id; + return id; } -static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - int core = -1, cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* @@ -1391,27 +1399,29 @@ static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) * socket + die id + core id */ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - core = (env->cpu[cpu].socket_id << 24) | + id.id = (env->cpu[cpu].socket_id << 24) | (env->cpu[cpu].die_id << 16) | (env->cpu[cpu].core_id & 0xffff); } - return core; + return id; } -static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) { int cpu = perf_env__get_cpu(data, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - return perf_env__numa_node(data, cpu); + id.id = perf_env__numa_node(data, cpu); + return id; } static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, @@ -1438,24 +1448,24 @@ static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *c return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); } -static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_die(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_core(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_node(map, idx, &perf_stat.session->header.env); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index f988b3109afc2..7a7d16b3950a3 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -64,7 +64,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) .path = path, .mode = PERF_DATA_MODE_READ, }; - int i, id; + int i; + struct aggr_cpu_id id; session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); @@ -110,14 +111,14 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_core(map, i, NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id)); + session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id)); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id)); + cpu_map__id_to_socket(id.id)); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id)); + session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); } // Test that die ID contains socket and die @@ -125,25 +126,25 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) id = cpu_map__get_die(map, i, NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id << 16)); + cpu_map__id_to_socket(id.id << 16)); TEST_ASSERT_VAL("Die map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == - cpu_map__id_to_die(id << 16)); + cpu_map__id_to_die(id.id << 16)); } // Test that socket ID contains only socket for (i = 0; i < map->nr; i++) { id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id); + session->header.env.cpu[map->map[i]].socket_id == id.id); } // Test that node ID contains only node for (i = 0; i < map->nr; i++) { id = cpu_map__get_node(map, i, NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", - cpu__get_node(map->map[i]) == id); + cpu__get_node(map->map[i]) == id.id); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8624948b4f1df..e05a12bde0731 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -111,30 +111,37 @@ int cpu_map__get_socket_id(int cpu) return ret ?: value; } -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, + void *data __maybe_unused) { int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; - return cpu_map__get_socket_id(cpu); + id.id = cpu_map__get_socket_id(cpu); + return id; } -static int cmp_ids(const void *a, const void *b) +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) { - return *(int *)a - *(int *)b; + struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; + struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; + + return a->id - b->id; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, - int (*f)(struct perf_cpu_map *map, int cpu, void *data), + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data) { int nr = cpus->nr; struct perf_cpu_map *c = perf_cpu_map__empty_new(nr); - int cpu, s1, s2; + int cpu, s2; + struct aggr_cpu_id s1; if (!c) return -1; @@ -145,16 +152,16 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { - if (s1 == c->map[s2]) + if (s1.id == c->map[s2]) break; } if (s2 == c->nr) { - c->map[c->nr] = s1; + c->map[c->nr] = s1.id; c->nr++; } } /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(int), cmp_ids); + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id); *res = c; return 0; @@ -167,23 +174,24 @@ int cpu_map__get_die_id(int cpu) return ret ?: value; } -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) { - int cpu, die_id, s; + int cpu, s; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; - die_id = cpu_map__get_die_id(cpu); + id.id = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die_id == -1) - die_id = 0; + if (id.id == -1) + id.id = 0; - s = cpu_map__get_socket(map, idx, data); + s = cpu_map__get_socket(map, idx, data).id; if (s == -1) - return -1; + return cpu_map__empty_aggr_cpu_id(); /* * Encode socket in bit range 15:8 @@ -191,13 +199,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) * we need a global id. So we combine * socket + die id */ - if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n")) - return -1; + if (WARN_ONCE(id.id >> 8, "The die id number is too big.\n")) + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - return (s << 8) | (die_id & 0xff); + id.id = (s << 8) | (id.id & 0xff); + return id; } int cpu_map__get_core_id(int cpu) @@ -211,21 +220,22 @@ int cpu_map__get_node_id(int cpu) return cpu__get_node(cpu); } -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) { - int cpu, s_die; + int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; cpu = cpu_map__get_core_id(cpu); - /* s_die is the combination of socket + die id */ - s_die = cpu_map__get_die(map, idx, data); - if (s_die == -1) - return -1; + /* cpu_map__get_die returns the combination of socket + die id */ + id = cpu_map__get_die(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) + return id; /* * encode socket in bit range 31:24 @@ -235,17 +245,21 @@ int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) * socket + die id + core id */ if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - return (s_die << 16) | (cpu & 0xffff); + id.id = (id.id << 16) | (cpu & 0xffff); + return id; } -int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) { + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + if (idx < 0 || idx >= map->nr) - return -1; + return id; - return cpu_map__get_node_id(map->map[idx]); + id.id = cpu_map__get_node_id(map->map[idx]); + return id; } int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 1cdccc69cd4b1..b8c2288a3f6dd 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -19,13 +19,13 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_die_id(int cpu); -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_node_id(int cpu); -int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp); int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep); int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep); @@ -62,7 +62,7 @@ int cpu__max_present_cpu(void); int cpu__get_node(int cpu); int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, - int (*f)(struct perf_cpu_map *map, int cpu, void *data), + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data); int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index fee7543843a8d..8f4ee3621863e 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -68,15 +68,15 @@ static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) static void aggr_printout(struct perf_stat_config *config, - struct evsel *evsel, int id, int nr) + struct evsel *evsel, struct aggr_cpu_id id, int nr) { switch (config->aggr_mode) { case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id), - cpu_map__id_to_die(id), + cpu_map__id_to_socket(id.id), + cpu_map__id_to_die(id.id), config->csv_output ? 0 : -8, - cpu_map__id_to_cpu(id), + cpu_map__id_to_cpu(id.id), config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -84,9 +84,9 @@ static void aggr_printout(struct perf_stat_config *config, break; case AGGR_DIE: fprintf(config->output, "S%d-D%*d%s%*d%s", - cpu_map__id_to_socket(id << 16), + cpu_map__id_to_socket(id.id << 16), config->csv_output ? 0 : -8, - cpu_map__id_to_die(id << 16), + cpu_map__id_to_die(id.id << 16), config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, - id, + id.id, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -104,7 +104,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NODE: fprintf(config->output, "N%*d%s%*d%s", config->csv_output ? 0 : -5, - id, + id.id, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -113,23 +113,23 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", - cpu_map__id_to_socket(id), - cpu_map__id_to_die(id), + cpu_map__id_to_socket(id.id), + cpu_map__id_to_die(id.id), config->csv_output ? 0 : -3, - cpu_map__id_to_cpu(id), config->csv_sep); - } else if (id > -1) { + cpu_map__id_to_cpu(id.id), config->csv_sep); + } else if (id.id > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id], + evsel__cpus(evsel)->map[id.id], config->csv_sep); } break; case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id), + perf_thread_map__comm(evsel->core.threads, id.id), config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id), + perf_thread_map__pid(evsel->core.threads, id.id), config->csv_sep); break; case AGGR_GLOBAL: @@ -144,7 +144,8 @@ struct outstate { bool newline; const char *prefix; int nfields; - int id, nr; + int nr; + struct aggr_cpu_id id; struct evsel *evsel; }; @@ -319,13 +320,13 @@ static void print_metric_header(struct perf_stat_config *config, } static int first_shadow_cpu(struct perf_stat_config *config, - struct evsel *evsel, int id) + struct evsel *evsel, struct aggr_cpu_id id) { struct evlist *evlist = evsel->evlist; int i; if (config->aggr_mode == AGGR_NONE) - return id; + return id.id; if (!config->aggr_get_id) return 0; @@ -333,14 +334,17 @@ static int first_shadow_cpu(struct perf_stat_config *config, for (i = 0; i < evsel__nr_cpus(evsel); i++) { int cpu2 = evsel__cpus(evsel)->map[i]; - if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id) + if (cpu_map__compare_aggr_cpu_id( + config->aggr_get_id(config, evlist->core.cpus, cpu2), + id)) { return cpu2; + } } return 0; } static void abs_printout(struct perf_stat_config *config, - int id, int nr, struct evsel *evsel, double avg) + struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) { FILE *output = config->output; double sc = evsel->scale; @@ -393,7 +397,7 @@ static bool is_mixed_hw_group(struct evsel *counter) return false; } -static void printout(struct perf_stat_config *config, int id, int nr, +static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st) @@ -496,17 +500,18 @@ static void printout(struct perf_stat_config *config, int id, int nr, static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int cpu, s2, id, s; + int cpu, s; + struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; + id.id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); - if (s2 != id) + if (!cpu_map__compare_aggr_cpu_id(s2, id)) continue; val += perf_counts(counter->counts, cpu, 0)->val; } @@ -584,7 +589,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter, struct aggr_data { u64 ena, run, val; - int id; + struct aggr_cpu_id id; int nr; int cpu; }; @@ -593,13 +598,14 @@ static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int cpu, s2; + int cpu; + struct aggr_cpu_id s2; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); - if (s2 != ad->id) + if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) continue; if (first) ad->nr++; @@ -628,10 +634,11 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_data ad; FILE *output = config->output; u64 ena, run, val; - int id, nr; + int nr; + struct aggr_cpu_id id; double uval; - ad.id = id = config->aggr_map->map[s]; + ad.id.id = id.id = config->aggr_map->map[s]; ad.val = ad.ena = ad.run = 0; ad.nr = 0; if (!collect_data(config, counter, aggr_cb, &ad)) @@ -649,8 +656,12 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + if (cpu != -1) { + id = cpu_map__empty_aggr_cpu_id(); + id.id = cpu; + } + printout(config, id, nr, counter, uval, + prefix, run, ena, 1.0, &rt_stat); if (!metric_only) fputc('\n', output); } @@ -728,7 +739,8 @@ static struct perf_aggr_thread_value *sort_aggr_thread( continue; buf[i].counter = counter; - buf[i].id = thread; + buf[i].id = cpu_map__empty_aggr_cpu_id(); + buf[i].id.id = thread; buf[i].uval = uval; buf[i].val = val; buf[i].run = run; @@ -751,7 +763,8 @@ static void print_aggr_thread(struct perf_stat_config *config, FILE *output = config->output; int nthreads = perf_thread_map__nr(counter->core.threads); int ncpus = perf_cpu_map__nr(counter->core.cpus); - int thread, sorted_threads, id; + int thread, sorted_threads; + struct aggr_cpu_id id; struct perf_aggr_thread_value *buf; buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); @@ -768,7 +781,7 @@ static void print_aggr_thread(struct perf_stat_config *config, if (config->stats) printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, - &config->stats[id]); + &config->stats[id.id]); else printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, @@ -814,8 +827,8 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, - cd.avg, &rt_stat); + printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running, + cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -842,6 +855,7 @@ static void print_counter(struct perf_stat_config *config, u64 ena, run, val; double uval; int cpu; + struct aggr_cpu_id id; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; @@ -856,8 +870,10 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + id = cpu_map__empty_aggr_cpu_id(); + id.id = cpu; + printout(config, id, 0, counter, uval, prefix, + run, ena, 1.0, &rt_stat); fputc('\n', output); } @@ -872,6 +888,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evsel *counter; u64 ena, run, val; double uval; + struct aggr_cpu_id id; nrcpus = evlist->core.cpus->nr; for (cpu = 0; cpu < nrcpus; cpu++) { @@ -880,8 +897,10 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { + id = cpu_map__empty_aggr_cpu_id(); + id.id = cpu; if (first) { - aggr_printout(config, counter, cpu, 0); + aggr_printout(config, counter, id, 0); first = false; } val = perf_counts(counter->counts, cpu, 0)->val; @@ -889,8 +908,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + printout(config, id, 0, counter, uval, prefix, + run, ena, 1.0, &rt_stat); } fputc('\n', config->output); } @@ -1140,14 +1159,15 @@ static void print_footer(struct perf_stat_config *config) static void print_percore_thread(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - int s, s2, id; + int s; + struct aggr_cpu_id s2, id; bool first = true; for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - if (s2 == id) + id.id = config->aggr_map->map[s]; + if (cpu_map__compare_aggr_cpu_id(s2, id)) break; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1e125e39ff84e..1d5c9f98396b7 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL); + s = cpu_map__get_socket(cpus, cpu, NULL).id; if (s < 0) return -1; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 9979b4b100f21..87522ffb2db2b 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -6,6 +6,7 @@ #include #include #include +#include "cpumap.h" #include "rblist.h" struct perf_cpu_map; @@ -99,7 +100,7 @@ struct runtime_stat { struct rblist value_list; }; -typedef int (*aggr_get_id_t)(struct perf_stat_config *config, +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu_map *m, int cpu); struct perf_stat_config { @@ -170,7 +171,7 @@ struct evlist; struct perf_aggr_thread_value { struct evsel *counter; - int id; + struct aggr_cpu_id id; double uval; u64 val; u64 run; -- GitLab From cea6575fdccfc0624ca42f656e16e6b4d9bb48a5 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:21 +0200 Subject: [PATCH 1164/1894] perf cpumap: Add new map type for aggregation Currently this is a duplicate of perf_cpu_map so that it can be used as a drop in replacement. In a later commit it will be changed from a map of ints to use the new cpu_aggr_id struct. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-6-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 17 +++++++++++++++++ tools/perf/util/cpumap.h | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index e05a12bde0731..b18e53506656d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -95,6 +95,23 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) return cpus; } +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) +{ + struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr); + + if (cpus != NULL) { + int i; + + cpus->nr = nr; + for (i = 0; i < nr; i++) + cpus->map[i] = -1; + + refcount_set(&cpus->refcnt, 1); + } + + return cpus; +} + static int cpu__get_topology_int(int cpu, const char *name, int *value) { char path[PATH_MAX]; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b8c2288a3f6dd..ebd65c4f431b0 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -11,9 +11,17 @@ struct aggr_cpu_id { int id; }; +struct cpu_aggr_map { + refcount_t refcnt; + int nr; + int map[]; +}; + struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); + struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); -- GitLab From d526e1a033e03ec4515b1800f99d99a35c7ea790 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:22 +0200 Subject: [PATCH 1165/1894] perf cpumap: Drop in cpu_aggr_map struct Replace usages of perf_cpu_map with cpu_aggr map in places that are involved with 'perf stat' aggregation. This will then later be changed to be a map of cpu_aggr_id rather than an int so that more data can be stored. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: John Garry Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mark Rutland Cc: Alexander Shishkin Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-7-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 29 ++++++++++++++++++++++------- tools/perf/util/cpumap.c | 12 ++++++------ tools/perf/util/cpumap.h | 10 +++++----- tools/perf/util/stat.h | 4 ++-- 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1ff76d36d1c92..cc017c7dcee9d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1320,14 +1320,29 @@ static int perf_stat_init_aggr_mode(void) * the aggregation translate cpumap. */ nr = perf_cpu_map__max(evsel_list->core.cpus); - stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1); + stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } +static void cpu_aggr_map__delete(struct cpu_aggr_map *map) +{ + if (map) { + WARN_ONCE(refcount_read(&map->refcnt) != 0, + "cpu_aggr_map refcnt unbalanced\n"); + free(map); + } +} + +static void cpu_aggr_map__put(struct cpu_aggr_map *map) +{ + if (map && refcount_dec_and_test(&map->refcnt)) + cpu_aggr_map__delete(map); +} + static void perf_stat__exit_aggr_mode(void) { - perf_cpu_map__put(stat_config.aggr_map); - perf_cpu_map__put(stat_config.cpus_aggr_map); + cpu_aggr_map__put(stat_config.aggr_map); + cpu_aggr_map__put(stat_config.cpus_aggr_map); stat_config.aggr_map = NULL; stat_config.cpus_aggr_map = NULL; } @@ -1425,25 +1440,25 @@ static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, } static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **sockp) + struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); } static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **diep) + struct cpu_aggr_map **diep) { return cpu_map__build_map(cpus, diep, perf_env__get_die, env); } static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **corep) + struct cpu_aggr_map **corep) { return cpu_map__build_map(cpus, corep, perf_env__get_core, env); } static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **nodep) + struct cpu_aggr_map **nodep) { return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index b18e53506656d..ea81586305f4e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -151,12 +151,12 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->id - b->id; } -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data) { int nr = cpus->nr; - struct perf_cpu_map *c = perf_cpu_map__empty_new(nr); + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr); int cpu, s2; struct aggr_cpu_id s1; @@ -279,22 +279,22 @@ struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *da return id; } -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp) +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep) +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) { return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); } -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep) +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) { return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); } -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **numap) +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) { return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL); } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index ebd65c4f431b0..b112069038be2 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -34,10 +34,10 @@ int cpu_map__get_core_id(int cpu); struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_node_id(int cpu); struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp); -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep); -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **nodep); +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) @@ -69,7 +69,7 @@ int cpu__max_cpu(void); int cpu__max_present_cpu(void); int cpu__get_node(int cpu); -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 87522ffb2db2b..b5369730b4a2a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -139,9 +139,9 @@ struct perf_stat_config { const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; - struct perf_cpu_map *aggr_map; + struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; - struct perf_cpu_map *cpus_aggr_map; + struct cpu_aggr_map *cpus_aggr_map; u64 *walltime_run; struct rblist metric_events; int ctl_fd; -- GitLab From ff5232956e074994a66656f709c3ad1ee3d8a550 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:23 +0200 Subject: [PATCH 1166/1894] perf stat aggregation: Start using cpu_aggr_id in map Use the new cpu_aggr_id struct in the cpu map instead of int so that it can store more data. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mark Rutland Cc: Alexander Shishkin Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-8-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 6 +++--- tools/perf/util/cpumap.c | 8 ++++---- tools/perf/util/cpumap.h | 2 +- tools/perf/util/stat-display.c | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cc017c7dcee9d..3fec1b7e93b5f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1221,10 +1221,10 @@ static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, cpu = map->map[idx]; - if (config->cpus_aggr_map->map[cpu] == -1) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx).id; + if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) + config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); - id.id = config->cpus_aggr_map->map[cpu]; + id = config->cpus_aggr_map->map[cpu]; return id; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index ea81586305f4e..b50609b9a585e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -97,14 +97,14 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) { - struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr); + struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr); if (cpus != NULL) { int i; cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = -1; + cpus->map[i] = cpu_map__empty_aggr_cpu_id(); refcount_set(&cpus->refcnt, 1); } @@ -169,11 +169,11 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { - if (s1.id == c->map[s2]) + if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) break; } if (s2 == c->nr) { - c->map[c->nr] = s1.id; + c->map[c->nr] = s1; c->nr++; } } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b112069038be2..d8fc265bc7627 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -14,7 +14,7 @@ struct aggr_cpu_id { struct cpu_aggr_map { refcount_t refcnt; int nr; - int map[]; + struct aggr_cpu_id map[]; }; struct perf_record_cpu_map_data; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 8f4ee3621863e..a6309cedb37b4 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -506,7 +506,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, struct evsel *counter; for (s = 0; s < config->aggr_map->nr; s++) { - id.id = config->aggr_map->map[s]; + id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { @@ -638,7 +638,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_cpu_id id; double uval; - ad.id.id = id.id = config->aggr_map->map[s]; + ad.id = id = config->aggr_map->map[s]; ad.val = ad.ena = ad.run = 0; ad.nr = 0; if (!collect_data(config, counter, aggr_cb, &ad)) @@ -1166,7 +1166,7 @@ static void print_percore_thread(struct perf_stat_config *config, for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { - id.id = config->aggr_map->map[s]; + id = config->aggr_map->map[s]; if (cpu_map__compare_aggr_cpu_id(s2, id)) break; } -- GitLab From fcd83a35dd93b89d3f48cfcd33c31b112cc96180 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:24 +0200 Subject: [PATCH 1167/1894] perf stat aggregation: Add separate node member Add node as a separate member so that it doesn't have to be packed into the int value. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-9-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- tools/perf/tests/topology.c | 8 +++++++- tools/perf/util/cpumap.c | 16 +++++++++++----- tools/perf/util/cpumap.h | 1 + tools/perf/util/stat-display.c | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3fec1b7e93b5f..e51c5469d712d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1435,7 +1435,7 @@ static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, int cpu = perf_env__get_cpu(data, map, idx); struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - id.id = perf_env__numa_node(data, cpu); + id.node = perf_env__numa_node(data, cpu); return id; } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 7a7d16b3950a3..ee272d45a4459 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -119,6 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); + TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); } // Test that die ID contains socket and die @@ -131,6 +132,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Die map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id << 16)); + + TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); } // Test that socket ID contains only socket @@ -138,13 +141,16 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.id); + + TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); } // Test that node ID contains only node for (i = 0; i < map->nr; i++) { id = cpu_map__get_node(map, i, NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", - cpu__get_node(map->map[i]) == id.id); + cpu__get_node(map->map[i]) == id.node); + TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index b50609b9a585e..5f9e98ddbe343 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -148,7 +148,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; - return a->id - b->id; + if (a->id != b->id) + return a->id - b->id; + else + return a->node - b->node; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -275,7 +278,7 @@ struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *da if (idx < 0 || idx >= map->nr) return id; - id.id = cpu_map__get_node_id(map->map[idx]); + id.node = cpu_map__get_node_id(map->map[idx]); return id; } @@ -620,18 +623,21 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { - return a.id == b.id; + return a.id == b.id && + a.node == b.node; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { - return a.id == -1; + return a.id == -1 && + a.node == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { - .id = -1 + .id = -1, + .node = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index d8fc265bc7627..f79e92603024e 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -9,6 +9,7 @@ struct aggr_cpu_id { int id; + int node; }; struct cpu_aggr_map { diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a6309cedb37b4..7903922470260 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -104,7 +104,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NODE: fprintf(config->output, "N%*d%s%*d%s", config->csv_output ? 0 : -5, - id.id, + id.node, config->csv_sep, config->csv_output ? 0 : 4, nr, -- GitLab From 1a270cb6b3cc18663f7fd165aa691c48d68739f2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:25 +0200 Subject: [PATCH 1168/1894] perf stat aggregation: Add separate socket member Add socket as a separate member so that it doesn't have to be packed into the int value. When the socket ID was larger than 8 bits the output appeared corrupted or incomplete. For example, here on ThunderX2 'perf stat' reports a socket of -1 and an invalid die number: ./perf stat -a --per-die The socket id number is too big. Performance counter stats for 'system wide': S-1-D255 128 687.99 msec cpu-clock # 57.240 CPUs utilized ... S36-D0 128 842.34 msec cpu-clock # 70.081 CPUs utilized ... And with --per-core there is an entry with an invalid core ID: ./perf stat record -a --per-core The socket id number is too big. Performance counter stats for 'system wide': S-1-D255-C65535 128 671.04 msec cpu-clock # 54.112 CPUs utilized ... S36-D0-C0 4 28.27 msec cpu-clock # 2.279 CPUs utilized ... This fixes the "Session topology" self test on ThunderX2. After this fix the output contains the correct socket and die IDs and no longer prints a warning about the size of the socket ID: ./perf stat --per-die -a Performance counter stats for 'system wide': S36-D0 128 169,869.39 msec cpu-clock # 127.501 CPUs utilized ... S3612-D0 128 169,733.05 msec cpu-clock # 127.398 CPUs utilized Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-10-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 +++++++---------- tools/perf/tests/topology.c | 10 ++++---- tools/perf/util/cpumap.c | 44 +++++++++++++++++----------------- tools/perf/util/cpumap.h | 6 +---- tools/perf/util/stat-display.c | 8 +++---- tools/perf/util/stat.c | 2 +- 6 files changed, 41 insertions(+), 51 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e51c5469d712d..6248baa0f6122 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1369,7 +1369,7 @@ static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (cpu != -1) - id.id = env->cpu[cpu].socket_id; + id.socket = env->cpu[cpu].socket_id; return id; } @@ -1382,18 +1382,16 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v if (cpu != -1) { /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); + id.socket = env->cpu[cpu].socket_id; if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + id.id = env->cpu[cpu].die_id & 0xff; } return id; @@ -1407,23 +1405,19 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, if (cpu != -1) { /* - * Encode socket in bit range 31:24 * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine * socket + die id + core id */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (env->cpu[cpu].socket_id << 24) | - (env->cpu[cpu].die_id << 16) | + id.socket = env->cpu[cpu].socket_id; + id.id = (env->cpu[cpu].die_id << 16) | (env->cpu[cpu].core_id & 0xffff); } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ee272d45a4459..777dd8291bcc7 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -114,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id)); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id.id)); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); @@ -126,8 +125,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_die(map, i, NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id.id << 16)); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == @@ -140,9 +138,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.id); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); } // Test that node ID contains only node @@ -151,6 +150,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 5f9e98ddbe343..d2630f03f6827 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -139,7 +139,7 @@ struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, cpu = map->map[idx]; - id.id = cpu_map__get_socket_id(cpu); + id.socket = cpu_map__get_socket_id(cpu); return id; } @@ -150,8 +150,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) if (a->id != b->id) return a->id - b->id; - else + else if (a->node != b->node) return a->node - b->node; + else + return a->socket - b->socket; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -196,7 +198,7 @@ int cpu_map__get_die_id(int cpu) struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) { - int cpu, s; + int cpu, die; struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) @@ -204,28 +206,24 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat cpu = map->map[idx]; - id.id = cpu_map__get_die_id(cpu); + die = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (id.id == -1) - id.id = 0; - - s = cpu_map__get_socket(map, idx, data).id; - if (s == -1) - return cpu_map__empty_aggr_cpu_id(); + if (die == -1) + die = 0; /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, and - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(id.id >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); + id = cpu_map__get_socket(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) + return id; - if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) + if (WARN_ONCE(die >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (s << 8) | (id.id & 0xff); + id.id = (die & 0xff); return id; } @@ -258,7 +256,6 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da return id; /* - * encode socket in bit range 31:24 * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine @@ -624,20 +621,23 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { return a.id == b.id && - a.node == b.node; + a.node == b.node && + a.socket == b.socket; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { return a.id == -1 && - a.node == -1; + a.node == -1 && + a.socket == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { .id = -1, - .node = -1 + .node = -1, + .socket = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f79e92603024e..0123ecc90694f 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -10,6 +10,7 @@ struct aggr_cpu_id { int id; int node; + int socket; }; struct cpu_aggr_map { @@ -48,11 +49,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_socket(int id) -{ - return id >> 24; -} - static inline int cpu_map__id_to_die(int id) { return (id >> 16) & 0xff; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7903922470260..5a756c88c124a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -73,7 +73,7 @@ static void aggr_printout(struct perf_stat_config *config, switch (config->aggr_mode) { case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id.id), + id.socket, cpu_map__id_to_die(id.id), config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id.id), @@ -84,7 +84,7 @@ static void aggr_printout(struct perf_stat_config *config, break; case AGGR_DIE: fprintf(config->output, "S%d-D%*d%s%*d%s", - cpu_map__id_to_socket(id.id << 16), + id.socket, config->csv_output ? 0 : -8, cpu_map__id_to_die(id.id << 16), config->csv_sep, @@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, - id.id, + id.socket, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -113,7 +113,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", - cpu_map__id_to_socket(id.id), + id.socket, cpu_map__id_to_die(id.id), config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id.id), config->csv_sep); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1d5c9f98396b7..8ce1479c98f03 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL).id; + s = cpu_map__get_socket(cpus, cpu, NULL).socket; if (s < 0) return -1; -- GitLab From ba2ee166d92b201078cb941956547ab9828989d3 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:26 +0200 Subject: [PATCH 1169/1894] perf stat aggregation: Add separate die member Add die as a separate member so that it doesn't have to be packed into the int value. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-11-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 +++----------- tools/perf/tests/topology.c | 8 +++++--- tools/perf/util/cpumap.c | 28 ++++++++++++++-------------- tools/perf/util/cpumap.h | 6 +----- tools/perf/util/stat-display.c | 6 +++--- 5 files changed, 26 insertions(+), 36 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6248baa0f6122..bac37fe9373c8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1387,11 +1387,7 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v * make a unique ID. */ id.socket = env->cpu[cpu].socket_id; - - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - - id.id = env->cpu[cpu].die_id & 0xff; + id.die = env->cpu[cpu].die_id; } return id; @@ -1405,20 +1401,16 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, if (cpu != -1) { /* - * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine * socket + die id + core id */ - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); id.socket = env->cpu[cpu].socket_id; - id.id = (env->cpu[cpu].die_id << 16) | - (env->cpu[cpu].core_id & 0xffff); + id.die = env->cpu[cpu].die_id; + id.id = env->cpu[cpu].core_id & 0xffff; } return id; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 777dd8291bcc7..e3f822890a847 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -117,7 +117,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); + session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); } @@ -128,10 +128,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == - cpu_map__id_to_die(id.id << 16)); + session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Die map - ID is set", id.id == -1); } // Test that socket ID contains only socket @@ -141,6 +141,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); } @@ -151,6 +152,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); + TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index d2630f03f6827..10a52058d8383 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -152,8 +152,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->id - b->id; else if (a->node != b->node) return a->node - b->node; - else + else if (a->socket != b->socket) return a->socket - b->socket; + else + return a->die - b->die; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -220,10 +222,7 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat if (cpu_map__aggr_cpu_id_is_empty(id)) return id; - if (WARN_ONCE(die >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - - id.id = (die & 0xff); + id.die = die; return id; } @@ -250,21 +249,19 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da cpu = cpu_map__get_core_id(cpu); - /* cpu_map__get_die returns the combination of socket + die id */ + /* cpu_map__get_die returns a struct with socket and die set*/ id = cpu_map__get_die(map, idx, data); if (cpu_map__aggr_cpu_id_is_empty(id)) return id; /* - * encode die id in bit range 23:16 - * core_id is relative to socket and die, - * we need a global id. So we combine - * socket + die id + core id + * core_id is relative to socket and die, we need a global id. + * So we combine the result from cpu_map__get_die with the core id */ if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (id.id << 16) | (cpu & 0xffff); + id.id = (cpu & 0xffff); return id; } @@ -622,14 +619,16 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { return a.id == b.id && a.node == b.node && - a.socket == b.socket; + a.socket == b.socket && + a.die == b.die; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { return a.id == -1 && a.node == -1 && - a.socket == -1; + a.socket == -1 && + a.die == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) @@ -637,7 +636,8 @@ struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) struct aggr_cpu_id ret = { .id = -1, .node = -1, - .socket = -1 + .socket = -1, + .die = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 0123ecc90694f..51bbe1eca3f40 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -11,6 +11,7 @@ struct aggr_cpu_id { int id; int node; int socket; + int die; }; struct cpu_aggr_map { @@ -49,11 +50,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_die(int id) -{ - return (id >> 16) & 0xff; -} - static inline int cpu_map__id_to_cpu(int id) { return id & 0xffff; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 5a756c88c124a..dcce753f351dc 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -74,7 +74,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", id.socket, - cpu_map__id_to_die(id.id), + id.die, config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id.id), config->csv_sep, @@ -86,7 +86,7 @@ static void aggr_printout(struct perf_stat_config *config, fprintf(config->output, "S%d-D%*d%s%*d%s", id.socket, config->csv_output ? 0 : -8, - cpu_map__id_to_die(id.id << 16), + id.die, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -114,7 +114,7 @@ static void aggr_printout(struct perf_stat_config *config, if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", id.socket, - cpu_map__id_to_die(id.id), + id.die, config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id.id), config->csv_sep); } else if (id.id > -1) { -- GitLab From b993381779da406ca9ca0ae1e1b3968e9075ce77 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:27 +0200 Subject: [PATCH 1170/1894] perf stat aggregation: Add separate core member Add core as a separate member so that it doesn't have to be packed into the int value. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-12-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 +++------ tools/perf/tests/topology.c | 6 +++++- tools/perf/util/cpumap.c | 18 ++++++++++-------- tools/perf/util/cpumap.h | 6 +----- tools/perf/util/stat-display.c | 16 ++++++++-------- 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bac37fe9373c8..8cc24967bc273 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1402,15 +1402,12 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, if (cpu != -1) { /* * core_id is relative to socket and die, - * we need a global id. So we combine - * socket + die id + core id + * we need a global id. So we set + * socket, die id and core id */ - if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - id.socket = env->cpu[cpu].socket_id; id.die = env->cpu[cpu].die_id; - id.id = env->cpu[cpu].core_id & 0xffff; + id.core = env->cpu[cpu].core_id; } return id; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index e3f822890a847..a6e289f2c6dbd 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -111,7 +111,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_core(map, i, NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id)); + session->header.env.cpu[map->map[i]].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.socket); @@ -119,6 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Core map - ID is set", id.id == -1); } // Test that die ID contains socket and die @@ -132,6 +133,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); } // Test that socket ID contains only socket @@ -143,6 +145,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); } // Test that node ID contains only node @@ -153,6 +156,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); + TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 10a52058d8383..d164f7bd1ac71 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -154,8 +154,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->node - b->node; else if (a->socket != b->socket) return a->socket - b->socket; - else + else if (a->die != b->die) return a->die - b->die; + else + return a->core - b->core; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -258,10 +260,7 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da * core_id is relative to socket and die, we need a global id. * So we combine the result from cpu_map__get_die with the core id */ - if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - - id.id = (cpu & 0xffff); + id.core = cpu; return id; } @@ -620,7 +619,8 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) return a.id == b.id && a.node == b.node && a.socket == b.socket && - a.die == b.die; + a.die == b.die && + a.core == b.core; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) @@ -628,7 +628,8 @@ bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) return a.id == -1 && a.node == -1 && a.socket == -1 && - a.die == -1; + a.die == -1 && + a.core == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) @@ -637,7 +638,8 @@ struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) .id = -1, .node = -1, .socket = -1, - .die = -1 + .die = -1, + .core = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 51bbe1eca3f40..1bb8f7d472065 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -12,6 +12,7 @@ struct aggr_cpu_id { int node; int socket; int die; + int core; }; struct cpu_aggr_map { @@ -50,11 +51,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_cpu(int id) -{ - return id & 0xffff; -} - int cpu__setup_cpunode_map(void); int cpu__max_node(void); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index dcce753f351dc..2b3842f6080d8 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -76,7 +76,7 @@ static void aggr_printout(struct perf_stat_config *config, id.socket, id.die, config->csv_output ? 0 : -8, - cpu_map__id_to_cpu(id.id), + id.core, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -116,11 +116,11 @@ static void aggr_printout(struct perf_stat_config *config, id.socket, id.die, config->csv_output ? 0 : -3, - cpu_map__id_to_cpu(id.id), config->csv_sep); - } else if (id.id > -1) { + id.core, config->csv_sep); + } else if (id.core > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id.id], + evsel__cpus(evsel)->map[id.core], config->csv_sep); } break; @@ -326,7 +326,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, int i; if (config->aggr_mode == AGGR_NONE) - return id.id; + return id.core; if (!config->aggr_get_id) return 0; @@ -658,7 +658,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; if (cpu != -1) { id = cpu_map__empty_aggr_cpu_id(); - id.id = cpu; + id.core = cpu; } printout(config, id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -871,7 +871,7 @@ static void print_counter(struct perf_stat_config *config, uval = val * counter->scale; id = cpu_map__empty_aggr_cpu_id(); - id.id = cpu; + id.core = cpu; printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -898,7 +898,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { id = cpu_map__empty_aggr_cpu_id(); - id.id = cpu; + id.core = cpu; if (first) { aggr_printout(config, counter, id, 0); first = false; -- GitLab From 8d4852b468c38168c4e1e1652602b4a6c6c080b3 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:28 +0200 Subject: [PATCH 1171/1894] perf stat aggregation: Add separate thread member A separate field isn't strictly required. The core field could be re-used for thread IDs as a single field was used previously. But separating them will avoid confusion and catch potential errors where core IDs are read as thread IDs and vice versa. Also remove the placeholder id field which is now no longer used. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-13-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 8 ++++---- tools/perf/util/cpumap.c | 14 +++++++------- tools/perf/util/cpumap.h | 2 +- tools/perf/util/stat-display.c | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index a6e289f2c6dbd..74748ed75b2c6 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -119,7 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); - TEST_ASSERT_VAL("Core map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } // Test that die ID contains socket and die @@ -132,8 +132,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); - TEST_ASSERT_VAL("Die map - ID is set", id.id == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); } // Test that socket ID contains only socket @@ -144,8 +144,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); - TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); } // Test that node ID contains only node @@ -153,10 +153,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) id = cpu_map__get_node(map, i, NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); - TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index d164f7bd1ac71..87d3eca9b872d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -148,16 +148,16 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; - if (a->id != b->id) - return a->id - b->id; - else if (a->node != b->node) + if (a->node != b->node) return a->node - b->node; else if (a->socket != b->socket) return a->socket - b->socket; else if (a->die != b->die) return a->die - b->die; - else + else if (a->core != b->core) return a->core - b->core; + else + return a->thread - b->thread; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -616,7 +616,7 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { - return a.id == b.id && + return a.thread == b.thread && a.node == b.node && a.socket == b.socket && a.die == b.die && @@ -625,7 +625,7 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { - return a.id == -1 && + return a.thread == -1 && a.node == -1 && a.socket == -1 && a.die == -1 && @@ -635,7 +635,7 @@ bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { - .id = -1, + .thread = -1, .node = -1, .socket = -1, .die = -1, diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 1bb8f7d472065..a27eeaf086e8c 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -8,7 +8,7 @@ #include struct aggr_cpu_id { - int id; + int thread; int node; int socket; int die; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 2b3842f6080d8..583ae4f09c5d1 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -127,9 +127,9 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id.id), + perf_thread_map__comm(evsel->core.threads, id.thread), config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id.id), + perf_thread_map__pid(evsel->core.threads, id.thread), config->csv_sep); break; case AGGR_GLOBAL: @@ -740,7 +740,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( buf[i].counter = counter; buf[i].id = cpu_map__empty_aggr_cpu_id(); - buf[i].id.id = thread; + buf[i].id.thread = thread; buf[i].uval = uval; buf[i].val = val; buf[i].run = run; @@ -781,7 +781,7 @@ static void print_aggr_thread(struct perf_stat_config *config, if (config->stats) printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, - &config->stats[id.id]); + &config->stats[id.thread]); else printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, -- GitLab From 5149303fdfe5c67ddb51c911e23262f781cd75eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Dec 2020 10:52:10 -0300 Subject: [PATCH 1172/1894] perf probe: Fix memory leak when synthesizing SDT probes The argv_split() function must be paired with argv_free(), else we must keep a reference to the argv array received or do the freeing ourselves, in synthesize_sdt_probe_command() we were simply leaking that argv[] array. Fixes: 3b1f8311f6963cd1 ("perf probe: Add sdt probes arguments into the uprobe cmd string") Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Alexis Berlemont Cc: He Zhe Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Will Deacon Link: https://lore.kernel.org/r/20201224135139.GF477817@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 064b63a6a3f31..bbecb449ea944 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -791,7 +791,7 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, const char *sdtgrp) { struct strbuf buf; - char *ret = NULL, **args; + char *ret = NULL; int i, args_count, err; unsigned long long ref_ctr_offset; @@ -813,12 +813,19 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, goto out; if (note->args) { - args = argv_split(note->args, &args_count); + char **args = argv_split(note->args, &args_count); + + if (args == NULL) + goto error; for (i = 0; i < args_count; ++i) { - if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) + if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) { + argv_free(args); goto error; + } } + + argv_free(args); } out: -- GitLab From 6268d7da4d192af339f4d688942b9ccb45a65e04 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 18 Dec 2020 18:41:41 -0800 Subject: [PATCH 1173/1894] device-dax: Fix range release There are multiple locations that open-code the release of the last range in a device-dax instance. Consolidate this into a new dev_dax_trim_range() helper. This also addresses a kmemleak report: # cat /sys/kernel/debug/kmemleak [..] unreferenced object 0xffff976bd46f6240 (size 64): comm "ndctl", pid 23556, jiffies 4299514316 (age 5406.733s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 20 c3 37 00 00 00 .......... .7... ff ff ff 7f 38 00 00 00 00 00 00 00 00 00 00 00 ....8........... backtrace: [<00000000064003cf>] __kmalloc_track_caller+0x136/0x379 [<00000000d85e3c52>] krealloc+0x67/0x92 [<00000000d7d3ba8a>] __alloc_dev_dax_range+0x73/0x25c [<0000000027d58626>] devm_create_dev_dax+0x27d/0x416 [<00000000434abd43>] __dax_pmem_probe+0x1c9/0x1000 [dax_pmem_core] [<0000000083726c1c>] dax_pmem_probe+0x10/0x1f [dax_pmem] [<00000000b5f2319c>] nvdimm_bus_probe+0x9d/0x340 [libnvdimm] [<00000000c055e544>] really_probe+0x230/0x48d [<000000006cabd38e>] driver_probe_device+0x122/0x13b [<0000000029c7b95a>] device_driver_attach+0x5b/0x60 [<0000000053e5659b>] bind_store+0xb7/0xc3 [<00000000d3bdaadc>] drv_attr_store+0x27/0x31 [<00000000949069c5>] sysfs_kf_write+0x4a/0x57 [<000000004a8b5adf>] kernfs_fop_write+0x150/0x1e5 [<00000000bded60f0>] __vfs_write+0x1b/0x34 [<00000000b92900f0>] vfs_write+0xd8/0x1d1 Reported-by: Jane Chu Cc: Zhen Lei Link: https://lore.kernel.org/r/160834570161.1791850.14911670304441510419.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/dax/bus.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 9761cb40d4bbe..720cd140209f2 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -367,19 +367,28 @@ void kill_dev_dax(struct dev_dax *dev_dax) } EXPORT_SYMBOL_GPL(kill_dev_dax); -static void free_dev_dax_ranges(struct dev_dax *dev_dax) +static void trim_dev_dax_range(struct dev_dax *dev_dax) { + int i = dev_dax->nr_range - 1; + struct range *range = &dev_dax->ranges[i].range; struct dax_region *dax_region = dev_dax->region; - int i; device_lock_assert(dax_region->dev); - for (i = 0; i < dev_dax->nr_range; i++) { - struct range *range = &dev_dax->ranges[i].range; - - __release_region(&dax_region->res, range->start, - range_len(range)); + dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, + (unsigned long long)range->start, + (unsigned long long)range->end); + + __release_region(&dax_region->res, range->start, range_len(range)); + if (--dev_dax->nr_range == 0) { + kfree(dev_dax->ranges); + dev_dax->ranges = NULL; } - dev_dax->nr_range = 0; +} + +static void free_dev_dax_ranges(struct dev_dax *dev_dax) +{ + while (dev_dax->nr_range) + trim_dev_dax_range(dev_dax); } static void unregister_dev_dax(void *dev) @@ -804,15 +813,10 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, return 0; rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); - if (rc) { - dev_dbg(dev, "delete range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, - &alloc->start, &alloc->end); - dev_dax->nr_range--; - __release_region(res, alloc->start, resource_size(alloc)); - return rc; - } + if (rc) + trim_dev_dax_range(dev_dax); - return 0; + return rc; } static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) @@ -885,12 +889,7 @@ static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) if (shrink >= range_len(range)) { devm_release_action(dax_region->dev, unregister_dax_mapping, &mapping->dev); - __release_region(&dax_region->res, range->start, - range_len(range)); - dev_dax->nr_range--; - dev_dbg(dev, "delete range[%d]: %#llx:%#llx\n", i, - (unsigned long long) range->start, - (unsigned long long) range->end); + trim_dev_dax_range(dev_dax); to_shrink -= shrink; if (!to_shrink) break; @@ -1267,7 +1266,6 @@ static void dev_dax_release(struct device *dev) put_dax(dax_dev); free_dev_dax_id(dev_dax); dax_region_put(dax_region); - kfree(dev_dax->ranges); kfree(dev_dax->pgmap); kfree(dev_dax); } -- GitLab From ff8da37d3d8d438ded5a4841d979899269b94d0d Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 19 Dec 2020 16:18:40 +0800 Subject: [PATCH 1174/1894] device-dax: Avoid an unnecessary check in alloc_dev_dax_range() Swap the calling sequence of krealloc() and __request_region(), call the latter first. In this way, the value of dev_dax->nr_range does not need to be considered when __request_region() failed. Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20201219081840.1149-2-thunder.leizhen@huawei.com Signed-off-by: Dan Williams --- drivers/dax/bus.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 720cd140209f2..737b207c9e30d 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -772,22 +772,14 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, return 0; } - ranges = krealloc(dev_dax->ranges, sizeof(*ranges) - * (dev_dax->nr_range + 1), GFP_KERNEL); - if (!ranges) + alloc = __request_region(res, start, size, dev_name(dev), 0); + if (!alloc) return -ENOMEM; - alloc = __request_region(res, start, size, dev_name(dev), 0); - if (!alloc) { - /* - * If this was an empty set of ranges nothing else - * will release @ranges, so do it now. - */ - if (!dev_dax->nr_range) { - kfree(ranges); - ranges = NULL; - } - dev_dax->ranges = ranges; + ranges = krealloc(dev_dax->ranges, sizeof(*ranges) + * (dev_dax->nr_range + 1), GFP_KERNEL); + if (!ranges) { + __release_region(res, alloc->start, resource_size(alloc)); return -ENOMEM; } -- GitLab From 11cc92eb747aace5aa2b54b65b5cb8325a8981de Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 25 Dec 2020 22:30:58 +1100 Subject: [PATCH 1175/1894] genirq: Fix export of irq_to_desc() for powerpc KVM Commit 64a1b95bb9fe ("genirq: Restrict export of irq_to_desc()") removed the export of irq_to_desc() unless powerpc KVM is being built, because there is still a use of irq_to_desc() in modular code there. However it used: #ifdef CONFIG_KVM_BOOK3S_64_HV Which doesn't work when that symbol is =m, leading to a build failure: ERROR: modpost: "irq_to_desc" [arch/powerpc/kvm/kvm-hv.ko] undefined! Fix it by checking for the definedness of the correct symbol which is CONFIG_KVM_BOOK3S_64_HV_MODULE. Fixes: 64a1b95bb9fe ("genirq: Restrict export of irq_to_desc()") Signed-off-by: Michael Ellerman Signed-off-by: Linus Torvalds --- kernel/irq/irqdesc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 3d0bc38a0bcfc..cc1a09406c6e4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -352,7 +352,7 @@ struct irq_desc *irq_to_desc(unsigned int irq) { return radix_tree_lookup(&irq_desc_tree, irq); } -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE EXPORT_SYMBOL_GPL(irq_to_desc); #endif -- GitLab From 61d791365b72a89062fbbea69aa61479476da946 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 25 Dec 2020 15:41:13 -0800 Subject: [PATCH 1176/1894] drm/amd/display: avoid uninitialized variable warning clang (quite rightly) complains fairly loudly about the newly added mpc1_get_mpc_out_mux() function returning an uninitialized value if the 'opp_id' checks don't pass. This may not happen in practice, but the code really shouldn't return garbage if the sanity checks don't pass. So just initialize 'val' to zero to avoid the issue. Fixes: 110b055b2827 ("drm/amd/display: add getter routine to retrieve mpcc mux") Cc: Josip Pavic Cc: Bindu Ramamurthy Cc: Alex Deucher Signed-off-by: Linus Torvalds --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index a46cb20596fe8..100ce0e28fd5a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -470,7 +470,7 @@ void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock) unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - uint32_t val; + uint32_t val = 0; if (opp_id < MAX_OPP && REG(MUX[opp_id])) REG_GET(MUX[opp_id], MPC_OUT_MUX, &val); -- GitLab From 275e88b06a277ccf89d9c471a777e9b4f8c552b0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 18 Dec 2020 08:39:05 -0600 Subject: [PATCH 1177/1894] PCI: tegra: Fix host link initialization Commit b9ac0f9dc8ea ("PCI: dwc: Move dw_pcie_setup_rc() to DWC common code") broke enumeration of downstream devices on Tegra: In non-working case (next-20201211): 0001:00:00.0 PCI bridge: NVIDIA Corporation Device 1ad2 (rev a1) 0001:01:00.0 SATA controller: Marvell Technology Group Ltd. Device 9171 (rev 13) 0005:00:00.0 PCI bridge: NVIDIA Corporation Device 1ad0 (rev a1) In working case (v5.10-rc7): 0001:00:00.0 PCI bridge: Molex Incorporated Device 1ad2 (rev a1) 0001:01:00.0 SATA controller: Marvell Technology Group Ltd. Device 9171 (rev 13) 0005:00:00.0 PCI bridge: Molex Incorporated Device 1ad0 (rev a1) 0005:01:00.0 PCI bridge: PLX Technology, Inc. Device 3380 (rev ab) 0005:02:02.0 PCI bridge: PLX Technology, Inc. Device 3380 (rev ab) 0005:03:00.0 USB controller: PLX Technology, Inc. Device 3380 (rev ab) The problem seems to be dw_pcie_setup_rc() is now called twice before and after the link up handling. The fix is to move Tegra's link up handling to .start_link() function like other DWC drivers. Tegra is a bit more complicated than others as it re-inits the whole DWC controller to retry the link. With this, the initialization ordering is restored to match the prior sequence. Fixes: b9ac0f9dc8ea ("PCI: dwc: Move dw_pcie_setup_rc() to DWC common code") Link: https://lore.kernel.org/r/20201218143905.1614098-1-robh@kernel.org Reported-by: Mian Yousaf Kaukab Tested-by: Mian Yousaf Kaukab Signed-off-by: Rob Herring Signed-off-by: Bjorn Helgaas Cc: Lorenzo Pieralisi Cc: Thierry Reding Cc: Jonathan Hunter Cc: Vidya Sagar --- drivers/pci/controller/dwc/pcie-tegra194.c | 55 ++++++++++++---------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index 5597b2a495989..6fa216e52d142 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -853,12 +853,14 @@ static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie) dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val); } -static void tegra_pcie_prepare_host(struct pcie_port *pp) +static int tegra_pcie_dw_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); u32 val; + pp->bridge->ops = &tegra_pci_ops; + if (!pcie->pcie_cap_base) pcie->pcie_cap_base = dw_pcie_find_capability(&pcie->pci, PCI_CAP_ID_EXP); @@ -907,10 +909,24 @@ static void tegra_pcie_prepare_host(struct pcie_port *pp) dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val); } - dw_pcie_setup_rc(pp); - clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ); + return 0; +} + +static int tegra_pcie_dw_start_link(struct dw_pcie *pci) +{ + u32 val, offset, speed, tmp; + struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); + struct pcie_port *pp = &pci->pp; + bool retry = true; + + if (pcie->mode == DW_PCIE_EP_TYPE) { + enable_irq(pcie->pex_rst_irq); + return 0; + } + +retry_link: /* Assert RST */ val = appl_readl(pcie, APPL_PINMUX); val &= ~APPL_PINMUX_PEX_RST; @@ -929,19 +945,10 @@ static void tegra_pcie_prepare_host(struct pcie_port *pp) appl_writel(pcie, val, APPL_PINMUX); msleep(100); -} - -static int tegra_pcie_dw_host_init(struct pcie_port *pp) -{ - struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); - u32 val, tmp, offset, speed; - - pp->bridge->ops = &tegra_pci_ops; - - tegra_pcie_prepare_host(pp); if (dw_pcie_wait_for_link(pci)) { + if (!retry) + return 0; /* * There are some endpoints which can't get the link up if * root port has Data Link Feature (DLF) enabled. @@ -975,10 +982,11 @@ static int tegra_pcie_dw_host_init(struct pcie_port *pp) val &= ~PCI_DLF_EXCHANGE_ENABLE; dw_pcie_writel_dbi(pci, offset, val); - tegra_pcie_prepare_host(pp); + tegra_pcie_dw_host_init(pp); + dw_pcie_setup_rc(pp); - if (dw_pcie_wait_for_link(pci)) - return 0; + retry = false; + goto retry_link; } speed = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA) & @@ -998,15 +1006,6 @@ static int tegra_pcie_dw_link_up(struct dw_pcie *pci) return !!(val & PCI_EXP_LNKSTA_DLLLA); } -static int tegra_pcie_dw_start_link(struct dw_pcie *pci) -{ - struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); - - enable_irq(pcie->pex_rst_irq); - - return 0; -} - static void tegra_pcie_dw_stop_link(struct dw_pcie *pci) { struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); @@ -2215,6 +2214,10 @@ static int tegra_pcie_dw_resume_noirq(struct device *dev) goto fail_host_init; } + ret = tegra_pcie_dw_start_link(&pcie->pci); + if (ret < 0) + goto fail_host_init; + /* Restore MSI interrupt vector */ dw_pcie_writel_dbi(&pcie->pci, PORT_LOGIC_MSI_CTRL_INT_0_EN, pcie->msi_ctrl_int); -- GitLab From 99e629f14b471d852d28ecf554093c4730ed0927 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 22 Dec 2020 15:07:43 +0000 Subject: [PATCH 1178/1894] PCI: dwc: Fix inverted condition of DMA mask setup warning Commit 660c486590aa ("PCI: dwc: Set 32-bit DMA mask for MSI target address allocation") added dma_mask_set() call to explicitly set 32-bit DMA mask for MSI message mapping, but for now it throws a warning on ret == 0, while dma_set_mask() returns 0 in case of success. Fix this by inverting the condition. [bhelgaas: join string to make it greppable] Fixes: 660c486590aa ("PCI: dwc: Set 32-bit DMA mask for MSI target address allocation") Link: https://lore.kernel.org/r/20201222150708.67983-1-alobakin@pm.me Signed-off-by: Alexander Lobakin Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/dwc/pcie-designware-host.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 516b151e0ef32..8a84c005f32bd 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -397,12 +397,8 @@ int dw_pcie_host_init(struct pcie_port *pp) pp); ret = dma_set_mask(pci->dev, DMA_BIT_MASK(32)); - if (!ret) { - dev_warn(pci->dev, - "Failed to set DMA mask to 32-bit. " - "Devices with only 32-bit MSI support" - " may not work properly\n"); - } + if (ret) + dev_warn(pci->dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n"); pp->msi_data = dma_map_single_attrs(pci->dev, &pp->msi_msg, sizeof(pp->msi_msg), -- GitLab From c9a3c4e637ac2dce534f7e9e5a80aed93410ccad Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 25 Dec 2020 18:35:49 -0700 Subject: [PATCH 1179/1894] mfd: ab8500-debugfs: Remove extraneous curly brace Clang errors: drivers/mfd/ab8500-debugfs.c:1526:2: error: non-void function does not return a value [-Werror,-Wreturn-type] } ^ drivers/mfd/ab8500-debugfs.c:1528:2: error: expected identifier or '(' return 0; ^ drivers/mfd/ab8500-debugfs.c:1529:1: error: extraneous closing brace ('}') } ^ 3 errors generated. The cleanup in ab8500_interrupts_show left a curly brace around, remove it to fix the error. Fixes: 886c8121659d ("mfd: ab8500-debugfs: Remove the racy fiddling with irq_desc") Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- drivers/mfd/ab8500-debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index a32039366a93a..1cf84562351e4 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1521,7 +1521,6 @@ static int ab8500_interrupts_show(struct seq_file *s, void *p) line + irq_first, num_interrupts[line], num_wake_interrupts[line]); - } seq_putc(s, '\n'); } -- GitLab From f838f8d2b694cf9d524dc4423e9dd2db13892f3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 Dec 2020 09:19:49 -0800 Subject: [PATCH 1180/1894] mfd: ab8500-debugfs: Remove extraneous seq_putc Commit c9a3c4e637ac ("mfd: ab8500-debugfs: Remove extraneous curly brace") removed a left-over curly brace that caused build failures, but Joe Perches points out that the subsequent 'seq_putc()' should also be removed, because the commit that caused all these problems already added the final '\n' to the seq_printf() above it. Reported-by: Joe Perches Fixes: 886c8121659d ("mfd: ab8500-debugfs: Remove the racy fiddling with irq_desc") Cc: Thomas Gleixner Cc: Nathan Chancellor Signed-off-by: Linus Torvalds --- drivers/mfd/ab8500-debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index 1cf84562351e4..e43dea89b0947 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1521,7 +1521,6 @@ static int ab8500_interrupts_show(struct seq_file *s, void *p) line + irq_first, num_interrupts[line], num_wake_interrupts[line]); - seq_putc(s, '\n'); } return 0; -- GitLab From e13a6915a03ffc3ce332d28c141a335e25187fa3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 23 Dec 2020 15:36:38 +0100 Subject: [PATCH 1181/1894] vhost/vsock: add IOTLB API support This patch enables the IOTLB API support for vhost-vsock devices, allowing the userspace to emulate an IOMMU for the guest. These changes were made following vhost-net, in details this patch: - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb device if the feature is acked - implements VHOST_GET_BACKEND_FEATURES and VHOST_SET_BACKEND_FEATURES ioctls - calls vq_meta_prefetch() before vq processing to prefetch vq metadata address in IOTLB - provides .read_iter, .write_iter, and .poll callbacks for the chardev; they are used by the userspace to exchange IOTLB messages This patch was tested specifying "intel_iommu=strict" in the guest kernel command line. I used QEMU with a patch applied [1] to fix a simple issue (that patch was merged in QEMU v5.2.0): $ qemu -M q35,accel=kvm,kernel-irqchip=split \ -drive file=fedora.qcow2,format=qcow2,if=virtio \ -device intel-iommu,intremap=on,device-iotlb=on \ -device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on [1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vsock.c | 68 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a483cec31d5cb..5e78fb719602d 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -30,7 +30,12 @@ #define VHOST_VSOCK_PKT_WEIGHT 256 enum { - VHOST_VSOCK_FEATURES = VHOST_FEATURES, + VHOST_VSOCK_FEATURES = VHOST_FEATURES | + (1ULL << VIRTIO_F_ACCESS_PLATFORM) +}; + +enum { + VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) }; /* Used to track all the vhost_vsock instances on the system. */ @@ -94,6 +99,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, if (!vhost_vq_get_backend(vq)) goto out; + if (!vq_meta_prefetch(vq)) + goto out; + /* Avoid further vmexits, we're already processing the virtqueue */ vhost_disable_notify(&vsock->dev, vq); @@ -449,6 +457,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) if (!vhost_vq_get_backend(vq)) goto out; + if (!vq_meta_prefetch(vq)) + goto out; + vhost_disable_notify(&vsock->dev, vq); do { u32 len; @@ -766,8 +777,12 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) mutex_lock(&vsock->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&vsock->dev)) { - mutex_unlock(&vsock->dev.mutex); - return -EFAULT; + goto err; + } + + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { + if (vhost_init_device_iotlb(&vsock->dev, true)) + goto err; } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { @@ -778,6 +793,10 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) } mutex_unlock(&vsock->dev.mutex); return 0; + +err: + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; } static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, @@ -811,6 +830,18 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (copy_from_user(&features, argp, sizeof(features))) return -EFAULT; return vhost_vsock_set_features(vsock, features); + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VSOCK_BACKEND_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_BACKEND_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + if (features & ~VHOST_VSOCK_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&vsock->dev, features); + return 0; default: mutex_lock(&vsock->dev.mutex); r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); @@ -823,6 +854,34 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, } } +static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_write_iter(dev, from); +} + +static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait) +{ + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_poll(file, dev, wait); +} + static const struct file_operations vhost_vsock_fops = { .owner = THIS_MODULE, .open = vhost_vsock_dev_open, @@ -830,6 +889,9 @@ static const struct file_operations vhost_vsock_fops = { .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, + .read_iter = vhost_vsock_chr_read_iter, + .write_iter = vhost_vsock_chr_write_iter, + .poll = vhost_vsock_chr_poll, }; static struct miscdevice vhost_vsock_misc = { -- GitLab From 6cb56218ad9e580e519dcd23bfb3db08d8692e5a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Dec 2020 23:23:56 +0100 Subject: [PATCH 1182/1894] netfilter: xt_RATEEST: reject non-null terminated string from userspace syzbot reports: detected buffer overflow in strlen [..] Call Trace: strlen include/linux/string.h:325 [inline] strlcpy include/linux/string.h:348 [inline] xt_rateest_tg_checkentry+0x2a5/0x6b0 net/netfilter/xt_RATEEST.c:143 strlcpy assumes src is a c-string. Check info->name before its used. Reported-by: syzbot+e86f7c428c8c50db65b4@syzkaller.appspotmail.com Fixes: 5859034d7eb8793 ("[NETFILTER]: x_tables: add RATEEST target") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_RATEEST.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 37253d399c6b8..0d5c422f87452 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -115,6 +115,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) } cfg; int ret; + if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name)) + return -ENAMETOOLONG; + net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); mutex_lock(&xn->hash_lock); -- GitLab From 9e5c23b9bd71d00b07720b2a8037b019d356e9df Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:56 +0000 Subject: [PATCH 1183/1894] KVM: arm64: Update comment in kvm_vgic_map_resources() vgic_v3_map_resources() returns -EBUSY if the VGIC isn't initialized, update the comment to kvm_vgic_map_resources() to match what the function does. Signed-off-by: Alexandru Elisei Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-5-alexandru.elisei@arm.com --- arch/arm64/kvm/vgic/vgic-init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index a2f4d1c85f001..5b54787a9ad57 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -419,7 +419,8 @@ int vgic_lazy_init(struct kvm *kvm) * Map the MMIO regions depending on the VGIC model exposed to the guest * called on the first VCPU run. * Also map the virtual CPU interface into the VM. - * v2/v3 derivatives call vgic_init if not already done. + * v2 calls vgic_init() if not already done. + * v3 and derivatives return an error if the VGIC is not initialized. * vgic_ready() returns true if this function has succeeded. * @kvm: kvm struct pointer */ -- GitLab From 282ff80135717cc43f1e33ddd4b0cd9e760d060b Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:57 +0000 Subject: [PATCH 1184/1894] KVM: arm64: Remove redundant call to kvm_pmu_vcpu_reset() KVM_ARM_VCPU_INIT ioctl calls kvm_reset_vcpu(), which in turn resets the PMU with a call to kvm_pmu_vcpu_reset(). The function zeroes the PMU chained counters bitmap and stops all the counters with a perf event attached. Because it is called before the VCPU has had the chance to run, no perf events are in use and none are released. kvm_arm_pmu_v3_enable(), called by kvm_vcpu_first_run_init() only if the VCPU has been initialized, also resets the PMU. kvm_pmu_vcpu_reset() in this case does the exact same thing as the previous call, so remove it. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-6-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 398f6df1bbe40..4ad66a532e38b 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -850,8 +850,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - kvm_pmu_vcpu_reset(vcpu); - return 0; } -- GitLab From 101068b566ef227b605d807aad9e72efd8b6bc5b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Dec 2020 14:28:34 +0000 Subject: [PATCH 1185/1894] KVM: arm64: Consolidate dist->ready setting into kvm_vgic_map_resources() dist->ready setting is pointlessly spread across the two vgic backends, while it could be consolidated in kvm_vgic_map_resources(). Move it there, and slightly simplify the flows in both backends. Suggested-by: Eric Auger Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 2 ++ arch/arm64/kvm/vgic/vgic-v2.c | 17 ++++++----------- arch/arm64/kvm/vgic/vgic-v3.c | 18 ++++++------------ 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 5b54787a9ad57..052917deb1495 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -446,6 +446,8 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (ret) __kvm_vgic_destroy(kvm); + else + dist->ready = true; out: mutex_unlock(&kvm->lock); diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 7f38c1a936395..11934c2af2f42 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -309,14 +309,12 @@ int vgic_v2_map_resources(struct kvm *kvm) if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { kvm_err("VGIC CPU and dist frames overlap\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } /* @@ -326,13 +324,13 @@ int vgic_v2_map_resources(struct kvm *kvm) ret = vgic_init(kvm); if (ret) { kvm_err("Unable to initialize VGIC dynamic data structures\n"); - goto out; + return ret; } ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); if (ret) { kvm_err("Unable to register VGIC MMIO regions\n"); - goto out; + return ret; } if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { @@ -341,14 +339,11 @@ int vgic_v2_map_resources(struct kvm *kvm) KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + return ret; } } - dist->ready = true; - -out: - return ret; + return 0; } DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 35029c5cb0f12..52915b3423514 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -505,21 +505,18 @@ int vgic_v3_map_resources(struct kvm *kvm) if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { kvm_debug("vcpu %d redistributor base not set\n", c); - ret = -ENXIO; - goto out; + return -ENXIO; } } if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { kvm_err("Need to set vgic distributor addresses first\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } if (!vgic_v3_check_base(kvm)) { kvm_err("VGIC redist and dist frames overlap\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } /* @@ -527,22 +524,19 @@ int vgic_v3_map_resources(struct kvm *kvm) * the VGIC before we need to use it. */ if (!vgic_initialized(kvm)) { - ret = -EBUSY; - goto out; + return -EBUSY; } ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); if (ret) { kvm_err("Unable to register VGICv3 dist MMIO regions\n"); - goto out; + return ret; } if (kvm_vgic_global_state.has_gicv4_1) vgic_v4_configure_vsgis(kvm); - dist->ready = true; -out: - return ret; + return 0; } DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); -- GitLab From 14e3e989f6a5d9646b6cf60690499cc8bdc11f7d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Dec 2020 10:56:33 -0800 Subject: [PATCH 1186/1894] proc mountinfo: make splice available again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") we've required that file operation structures explicitly enable splice support, rather than falling back to the default handlers. Most /proc files use the indirect 'struct proc_ops' to describe their file operations, and were fixed up to support splice earlier in commits 40be821d627c..b24c30c67863, but the mountinfo files interact with the VFS directly using their own 'struct file_operations' and got missed as a result. This adds the necessary support for splice to work for /proc/*/mountinfo and friends. Reported-by: Joan Bruguera Micó Reported-by: Jussi Kivilinna Link: https://bugzilla.kernel.org/show_bug.cgi?id=209971 Cc: Greg Kroah-Hartman Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- fs/proc_namespace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index e59d4bb3a89e4..eafb75755fa37 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -320,7 +320,8 @@ static int mountstats_open(struct inode *inode, struct file *file) const struct file_operations proc_mounts_operations = { .open = mounts_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -328,7 +329,8 @@ const struct file_operations proc_mounts_operations = { const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -336,7 +338,8 @@ const struct file_operations proc_mountinfo_operations = { const struct file_operations proc_mountstats_operations = { .open = mountstats_open, - .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .llseek = seq_lseek, .release = mounts_release, }; -- GitLab From 5c8fe583cce542aa0b84adc939ce85293de36e5e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Dec 2020 15:30:22 -0800 Subject: [PATCH 1187/1894] Linux 5.11-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 29c3eecd7b308..3d328b7ab2003 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 10 +PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 976509bb310b913d30577f15b58bdd30effb0542 Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Thu, 24 Dec 2020 18:49:27 +0800 Subject: [PATCH 1188/1894] opp: fix memory leak in _allocate_opp_table In function _allocate_opp_table, opp_dev is allocated and referenced by opp_table via _add_opp_dev. But in the case that the subsequent calls return -EPROBE_DEFER, it will jump to err label and opp_table will be freed. Then opp_dev becomes an unreferenced object to cause memory leak. So let's call _remove_opp_dev to do the cleanup. This fixes the following kmemleak report: unreferenced object 0xffff000801524a00 (size 128): comm "swapper/0", pid 1, jiffies 4294892465 (age 84.616s) hex dump (first 32 bytes): 40 00 56 01 08 00 ff ff 40 00 56 01 08 00 ff ff @.V.....@.V..... b8 52 77 7f 08 00 ff ff 00 3c 4c 00 08 00 ff ff .Rw......] kmemleak_alloc+0x30/0x40 [<0000000056da48f0>] kmem_cache_alloc+0x3d4/0x588 [<00000000a84b3b0e>] _add_opp_dev+0x2c/0x88 [<0000000062a380cd>] _add_opp_table_indexed+0x124/0x268 [<000000008b4c8f1f>] dev_pm_opp_of_add_table+0x20/0x1d8 [<00000000e5316798>] dev_pm_opp_of_cpumask_add_table+0x48/0xf0 [<00000000db0a8ec2>] dt_cpufreq_probe+0x20c/0x448 [<0000000030a3a26c>] platform_probe+0x68/0xd8 [<00000000c618e78d>] really_probe+0xd0/0x3a0 [<00000000642e856f>] driver_probe_device+0x58/0xb8 [<00000000f10f5307>] device_driver_attach+0x74/0x80 [<0000000004f254b8>] __driver_attach+0x58/0xe0 [<0000000009d5d19e>] bus_for_each_dev+0x70/0xc8 [<0000000000d22e1c>] driver_attach+0x24/0x30 [<0000000001d4e952>] bus_add_driver+0x14c/0x1f0 [<0000000089928aaa>] driver_register+0x64/0x120 Cc: v5.10 # v5.10 Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER") Signed-off-by: Quanyang Wang [ Viresh: Added the stable tag ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 4268eb3599152..c9e50836b4c29 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1092,7 +1092,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) if (IS_ERR(opp_table->clk)) { ret = PTR_ERR(opp_table->clk); if (ret == -EPROBE_DEFER) - goto err; + goto remove_opp_dev; dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret); } @@ -1101,7 +1101,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); if (ret) { if (ret == -EPROBE_DEFER) - goto err; + goto remove_opp_dev; dev_warn(dev, "%s: Error finding interconnect paths: %d\n", __func__, ret); @@ -1113,6 +1113,8 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) return opp_table; +remove_opp_dev: + _remove_opp_dev(opp_dev, opp_table); err: kfree(opp_table); return ERR_PTR(ret); -- GitLab From 0e1d9ca1766f5d95fb881f57b6c4a1ffa63d4648 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 28 Dec 2020 10:51:04 +0530 Subject: [PATCH 1189/1894] opp: Call the missing clk_put() on error Fix the clock reference counting by calling the missing clk_put() in the error path. Cc: v5.10 # v5.10 Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER") Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index c9e50836b4c29..8c905aabacc01 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1101,7 +1101,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); if (ret) { if (ret == -EPROBE_DEFER) - goto remove_opp_dev; + goto put_clk; dev_warn(dev, "%s: Error finding interconnect paths: %d\n", __func__, ret); @@ -1113,6 +1113,9 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) return opp_table; +put_clk: + if (!IS_ERR(opp_table->clk)) + clk_put(opp_table->clk); remove_opp_dev: _remove_opp_dev(opp_dev, opp_table); err: -- GitLab From 105b5ca9b1e38a8db8446a493ca062eea98171eb Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 30 Nov 2020 16:36:30 +0200 Subject: [PATCH 1190/1894] habanalabs: Fix a missing-braces warning Fix a compilation "missing braces around initializer" warning. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 32e6af1db4e35..a9927a06c5ea1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_pll_frequency_info freq_info = {0}; + struct hl_pll_frequency_info freq_info = {}; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; int rc; -- GitLab From 429f1571e8f0b14ec42b8fb14efcfc0576b2788f Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Tue, 1 Dec 2020 18:44:11 +0200 Subject: [PATCH 1191/1894] habanalabs: add comment for pll frequency ioctl opcode Forgot to add the comment for the opcode when it was added. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 8c15a7d336a0d..dc8bcec195ccf 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -279,6 +279,7 @@ enum hl_device_status { * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption + * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 -- GitLab From 4783489951b78525a6e61b43936cbbd88b7938af Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Thu, 26 Nov 2020 13:05:20 +0200 Subject: [PATCH 1192/1894] habanalabs: fetch PSOC PLL frequency from F/W in goya When the F/W security is enabled, goya needs to fetch the PSOC pll frequency through a dedicated interface Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 61 ++++++++++++++++++----------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3e5eb9e3d7bd8..b66fd55accb50 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure) static void goya_fetch_psoc_frequency(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 trace_freq = 0; - u32 pll_clk = 0; - u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); - u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); - u32 nr = RREG32(mmPSOC_PCI_PLL_NR); - u32 nf = RREG32(mmPSOC_PCI_PLL_NF); - u32 od = RREG32(mmPSOC_PCI_PLL_OD); - - if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { - if (div_sel == DIV_SEL_REF_CLK) - trace_freq = PLL_REF_CLK; - else - trace_freq = PLL_REF_CLK / (div_fctr + 1); - } else if (div_sel == DIV_SEL_PLL_CLK || - div_sel == DIV_SEL_DIVIDED_PLL) { - pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1)); - if (div_sel == DIV_SEL_PLL_CLK) - trace_freq = pll_clk; - else - trace_freq = pll_clk / (div_fctr + 1); + u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; + int rc; + + if (hdev->asic_prop.fw_security_disabled) { + div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); + div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); + nr = RREG32(mmPSOC_PCI_PLL_NR); + nf = RREG32(mmPSOC_PCI_PLL_NF); + od = RREG32(mmPSOC_PCI_PLL_OD); + + if (div_sel == DIV_SEL_REF_CLK || + div_sel == DIV_SEL_DIVIDED_REF) { + if (div_sel == DIV_SEL_REF_CLK) + freq = PLL_REF_CLK; + else + freq = PLL_REF_CLK / (div_fctr + 1); + } else if (div_sel == DIV_SEL_PLL_CLK || + div_sel == DIV_SEL_DIVIDED_PLL) { + pll_clk = PLL_REF_CLK * (nf + 1) / + ((nr + 1) * (od + 1)); + if (div_sel == DIV_SEL_PLL_CLK) + freq = pll_clk; + else + freq = pll_clk / (div_fctr + 1); + } else { + dev_warn(hdev->dev, + "Received invalid div select value: %d", + div_sel); + freq = 0; + } } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", div_sel); + rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr); + + if (rc) + return; + + freq = pll_freq_arr[1]; } - prop->psoc_timestamp_frequency = trace_freq; + prop->psoc_timestamp_frequency = freq; prop->psoc_pci_pll_nr = nr; prop->psoc_pci_pll_nf = nf; prop->psoc_pci_pll_od = od; -- GitLab From 6585489e808d9964dbde9dad89ac8e792e1185fc Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Thu, 19 Nov 2020 16:34:19 +0200 Subject: [PATCH 1193/1894] habanalabs: remove generic gaudi get_pll_freq function As we only fetch the CPU_PLL frequency in gaudi, we don't need a generic get_pll_frequency function which takes a pll index as input Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/habanalabs_ioctl.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 126 ++++++------------ drivers/misc/habanalabs/gaudi/gaudiP.h | 7 - 3 files changed, 41 insertions(+), 94 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index a9927a06c5ea1..a0c0d20f6f8ff 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_pll_frequency_info freq_info = {}; + struct hl_pll_frequency_info freq_info = { {0} }; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; int rc; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 1f1926607c5e7..278c4de98e22e 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) }; -static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = { - [CPU_PLL] = mmPSOC_CPU_PLL_NR, - [PCI_PLL] = mmPSOC_PCI_PLL_NR, - [SRAM_PLL] = mmSRAM_W_PLL_NR, - [HBM_PLL] = mmPSOC_HBM_PLL_NR, - [NIC_PLL] = mmNIC0_PLL_NR, - [DMA_PLL] = mmDMA_W_PLL_NR, - [MESH_PLL] = mmMESH_W_PLL_NR, - [MME_PLL] = mmPSOC_MME_PLL_NR, - [TPC_PLL] = mmPSOC_TPC_PLL_NR, - [IF_PLL] = mmIF_W_PLL_NR -}; - static inline bool validate_packet_id(enum packet_id id) { switch (id) { @@ -703,93 +690,60 @@ static int gaudi_early_fini(struct hl_device *hdev) } /** - * gaudi_fetch_pll_frequency - Fetch PLL frequency values + * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values * * @hdev: pointer to hl_device structure - * @pll_index: index of the pll to fetch frequency from - * @pll_freq: pointer to store the pll frequency in MHz in each of the available - * outputs. if a certain output is not available a 0 will be set * */ -static int gaudi_fetch_pll_frequency(struct hl_device *hdev, - enum gaudi_pll_index pll_index, - u16 *pll_freq_arr) +static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) { - u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel, - pll_base_addr = gaudi_pll_base_addresses[pll_index]; - u16 freq = 0; - int i, rc; - - if (hdev->asic_prop.fw_security_status_valid && - (hdev->asic_prop.fw_app_security_map & - CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { - rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr); + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; + int rc; - if (rc) - return rc; - } else if (hdev->asic_prop.fw_security_disabled) { + if (hdev->asic_prop.fw_security_disabled) { /* Backward compatibility */ - nr = RREG32(pll_base_addr + PLL_NR_OFFSET); - nf = RREG32(pll_base_addr + PLL_NF_OFFSET); - od = RREG32(pll_base_addr + PLL_OD_OFFSET); - - for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) { - div_fctr = RREG32(pll_base_addr + - PLL_DIV_FACTOR_0_OFFSET + i * 4); - div_sel = RREG32(pll_base_addr + - PLL_DIV_SEL_0_OFFSET + i * 4); + div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); + div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); + nr = RREG32(mmPSOC_CPU_PLL_NR); + nf = RREG32(mmPSOC_CPU_PLL_NF); + od = RREG32(mmPSOC_CPU_PLL_OD); - if (div_sel == DIV_SEL_REF_CLK || + if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { - if (div_sel == DIV_SEL_REF_CLK) - freq = PLL_REF_CLK; - else - freq = PLL_REF_CLK / (div_fctr + 1); - } else if (div_sel == DIV_SEL_PLL_CLK || - div_sel == DIV_SEL_DIVIDED_PLL) { - pll_clk = PLL_REF_CLK * (nf + 1) / - ((nr + 1) * (od + 1)); - if (div_sel == DIV_SEL_PLL_CLK) - freq = pll_clk; - else - freq = pll_clk / (div_fctr + 1); - } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", - div_sel); - } - - pll_freq_arr[i] = freq; + if (div_sel == DIV_SEL_REF_CLK) + freq = PLL_REF_CLK; + else + freq = PLL_REF_CLK / (div_fctr + 1); + } else if (div_sel == DIV_SEL_PLL_CLK || + div_sel == DIV_SEL_DIVIDED_PLL) { + pll_clk = PLL_REF_CLK * (nf + 1) / + ((nr + 1) * (od + 1)); + if (div_sel == DIV_SEL_PLL_CLK) + freq = pll_clk; + else + freq = pll_clk / (div_fctr + 1); + } else { + dev_warn(hdev->dev, + "Received invalid div select value: %d", + div_sel); + freq = 0; } } else { - dev_err(hdev->dev, "Failed to fetch PLL frequency values\n"); - return -EIO; - } - - return 0; -} + rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr); -/** - * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values - * - * @hdev: pointer to hl_device structure - * - */ -static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u16 pll_freq[HL_PLL_NUM_OUTPUTS]; - int rc; + if (rc) + return rc; - rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq); - if (rc) - return rc; + freq = pll_freq_arr[2]; + } - prop->psoc_timestamp_frequency = pll_freq[2]; - prop->psoc_pci_pll_nr = 0; - prop->psoc_pci_pll_nf = 0; - prop->psoc_pci_pll_od = 0; - prop->psoc_pci_pll_div_factor = 0; + prop->psoc_timestamp_frequency = freq; + prop->psoc_pci_pll_nr = nr; + prop->psoc_pci_pll_nf = nf; + prop->psoc_pci_pll_od = od; + prop->psoc_pci_pll_div_factor = div_fctr; return 0; } diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index f2d91f4fcffea..a7ab2d7e57d44 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -105,13 +105,6 @@ #define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE) #define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE) -#define PLL_NR_OFFSET 0 -#define PLL_NF_OFFSET (mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR) -#define PLL_OD_OFFSET (mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR) -#define PLL_DIV_FACTOR_0_OFFSET (mmPSOC_CPU_PLL_DIV_FACTOR_0 - \ - mmPSOC_CPU_PLL_NR) -#define PLL_DIV_SEL_0_OFFSET (mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR) - #define NUM_OF_SOB_IN_BLOCK \ (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2) -- GitLab From 9c9013cbd8338ff8eac732d115c9005bc512cbc5 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 1 Dec 2020 10:39:54 +0200 Subject: [PATCH 1194/1894] habanalabs: preboot hard reset support FW hard reset capability indication is now moved to preboot stage. Driver will check if HW is dirty only after it validated preboot is up. If HW is dirty, driver will perform a hard reset according to the FW capability. In addition, FW defines a new message which driver need to send in order to initiate a hard reset. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 25 ++++++++++--------- drivers/misc/habanalabs/gaudi/gaudi.c | 17 +++++++------ drivers/misc/habanalabs/goya/goya.c | 12 ++++----- .../habanalabs/include/common/hl_boot_if.h | 2 ++ 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 0e1c629e9800a..c970bfc6db66c 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -627,7 +627,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, security_status = RREG32(cpu_security_boot_status_reg); /* We read security status multiple times during boot: - * 1. preboot - we check if fw security feature is supported + * 1. preboot - a. Check whether the security status bits are valid + * b. Check whether fw security is enabled + * c. Check whether hard reset is done by fw * 2. boot cpu - we get boot cpu security status * 3. FW application - we get FW application security status * @@ -637,13 +639,20 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, */ if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { hdev->asic_prop.fw_security_status_valid = 1; - prop->fw_security_disabled = - !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN); + + if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)) + prop->fw_security_disabled = true; + + if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + hdev->asic_prop.hard_reset_done_by_fw = true; } else { hdev->asic_prop.fw_security_status_valid = 0; prop->fw_security_disabled = true; } + dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", + hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_info(hdev->dev, "firmware-level security is %s\n", prop->fw_security_disabled ? "disabled" : "enabled"); @@ -797,18 +806,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, } /* Read FW application security bits */ - if (hdev->asic_prop.fw_security_status_valid) { + if (hdev->asic_prop.fw_security_status_valid) hdev->asic_prop.fw_app_security_map = RREG32(cpu_security_boot_status_reg); - if (hdev->asic_prop.fw_app_security_map & - CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - hdev->asic_prop.hard_reset_done_by_fw = true; - } - - dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", - hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); - dev_info(hdev->dev, "Successfully loaded firmware to device\n"); out: diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 278c4de98e22e..e465c158eaeb8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -654,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev) if (rc) goto free_queue_props; - if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); - } - /* Before continuing in the initialization, we need to read the preboot * version to determine whether we run with a security-enabled firmware */ @@ -672,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev) goto pci_fini; } + if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->hw_fini(hdev, true); + } + return 0; pci_fini: @@ -3881,7 +3881,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) /* I don't know what is the state of the CPU so make sure it is * stopped in any means necessary */ - WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); + if (hdev->asic_prop.hard_reset_done_by_fw) + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV); + else + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index b66fd55accb50..d61177bf36a5a 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev) if (rc) goto free_queue_props; - if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); - } - /* Before continuing in the initialization, we need to read the preboot * version to determine whether we run with a security-enabled firmware */ @@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev) goto pci_fini; } + if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->hw_fini(hdev, true); + } + if (!hdev->pldm) { val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index e5801ecf0cb23..755c4800f0028 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -204,6 +204,8 @@ enum kmd_msg { KMD_MSG_GOTO_WFE, KMD_MSG_FIT_RDY, KMD_MSG_SKIP_BMC, + RESERVED, + KMD_MSG_RST_DEV, }; enum cpu_msg_status { -- GitLab From 72ab9ca52de6856380c26b2045aa826ae4308b76 Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Wed, 2 Dec 2020 19:55:30 +0200 Subject: [PATCH 1195/1894] habanalabs/gaudi: do not set EB in collective slave queues We don't need to set EB on signal packets from collective slave queues as it degrades performance. Because the slaves are the network queues, the engine barrier doesn't actually guarantee that the packet has been sent. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/hw_queue.c | 5 ++++- drivers/misc/habanalabs/gaudi/gaudi.c | 8 ++++---- drivers/misc/habanalabs/goya/goya.c | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 571eda6ef5ab0..70b778a0d60e2 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -944,7 +944,7 @@ struct hl_asic_funcs { u32 (*get_signal_cb_size)(struct hl_device *hdev); u32 (*get_wait_cb_size)(struct hl_device *hdev); u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id, - u32 size); + u32 size, bool eb); u32 (*gen_wait_cb)(struct hl_device *hdev, struct hl_gen_wait_properties *prop); void (*reset_sob)(struct hl_device *hdev, void *data); diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 7caf868d1585c..76217258780a4 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev, "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + /* we set an EB since we must make sure all oeprations are done + * when sending the signal + */ hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id, 0); + cs_cmpl->hw_sob->sob_id, 0, true); kref_get(&hw_sob->kref); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e465c158eaeb8..65895ba075fe2 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -361,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev); static void gaudi_disable_clock_gating(struct hl_device *hdev); static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size); + u32 size, bool eb); static u32 gaudi_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop); @@ -1064,7 +1064,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev, prop->collective_sob_id, queue_id); cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, - prop->collective_sob_id, cb_size); + prop->collective_sob_id, cb_size, false); } static void gaudi_collective_wait_init_cs(struct hl_cs *cs) @@ -7893,7 +7893,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) } static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size) + u32 size, bool eb) { struct hl_cb *cb = (struct hl_cb *) data; struct packet_msg_short *pkt; @@ -7910,7 +7910,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); - ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1); + ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d61177bf36a5a..b8b4aa636b7cb 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5339,7 +5339,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev) } static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size) + u32 size, bool eb) { return 0; } -- GitLab From 7a585dfc32110a106f70474c6fa822d912a92c7e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 30 Nov 2020 14:56:06 +0200 Subject: [PATCH 1196/1894] habanalabs: Revise comment to align with mirror list name hw_queues_mirror was renamed to cs_mirror, so revise accordingly a comment that refers to this list. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index beb482310a586..92c1c516b65f3 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -562,7 +562,7 @@ void hl_cs_rollback_all(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) flush_workqueue(hdev->cq_wq[i]); - /* Make sure we don't have leftovers in the H/W queues mirror list */ + /* Make sure we don't have leftovers in the CS mirror list */ list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { cs_get(cs); cs->aborted = true; -- GitLab From 0024c094851f718ccb0b797255292bdce850a01f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 5 Dec 2020 22:55:09 +0200 Subject: [PATCH 1197/1894] habanalabs/gaudi: disable CGM at HW initialization In case the clock gating was enabled in preboot we need to disable it at the H/W initialization stage before touching the MME/TPC registers. Otherwise, the ASIC can get stuck. If the security is enabled in the firmware level, the CGM is always disabled and the driver can't enable it. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 14 +++++++++++--- .../misc/habanalabs/include/common/hl_boot_if.h | 5 +++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 65895ba075fe2..f316b898e8e05 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -2403,8 +2403,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) gaudi_init_e2e(hdev); gaudi_init_hbm_cred(hdev); - hdev->asic_funcs->disable_clock_gating(hdev); - for (tpc_id = 0, tpc_offset = 0; tpc_id < TPC_NUMBER_OF_ENGINES; tpc_id++, tpc_offset += TPC_CFG_OFFSET) { @@ -3416,6 +3414,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev) if (hdev->in_debug) return; + if (!hdev->asic_prop.fw_security_disabled) + return; + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { enable = !!(hdev->clock_gating_mask & (BIT_ULL(gaudi_dma_assignment[i]))); @@ -3467,7 +3468,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev) u32 qman_offset; int i; - if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)) + if (!hdev->asic_prop.fw_security_disabled) return; for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { @@ -3801,6 +3802,13 @@ static int gaudi_hw_init(struct hl_device *hdev) return rc; } + /* In case the clock gating was enabled in preboot we need to disable + * it here before touching the MME/TPC registers. + * There is no need to take clk gating mutex because when this function + * runs, no other relevant code can run + */ + hdev->asic_funcs->disable_clock_gating(hdev); + /* SRAM scrambler must be initialized after CPU is running from HBM */ gaudi_init_scrambler_sram(hdev); diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 755c4800f0028..7cb5f2d3e5654 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -150,6 +150,10 @@ * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. + * FW initialized Clock Gating. + * Initialized in: preboot + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -171,6 +175,7 @@ #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9) #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10) #define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) enum cpu_boot_status { -- GitLab From 6bbb77b9e6f0bd5595724b7c0cb1189afdd133d3 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 6 Dec 2020 14:00:35 +0200 Subject: [PATCH 1198/1894] habanalabs: full FW hard reset support Driver must fetch FW hard reset capability at every FW boot stage: preboot, CPU boot, CPU application. If hard reset is triggered, driver will take into consideration only the last capability received. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 55 +++++++++++++++----- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index c970bfc6db66c..20f77f58edef5 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -629,32 +629,36 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, /* We read security status multiple times during boot: * 1. preboot - a. Check whether the security status bits are valid * b. Check whether fw security is enabled - * c. Check whether hard reset is done by fw - * 2. boot cpu - we get boot cpu security status - * 3. FW application - we get FW application security status + * c. Check whether hard reset is done by preboot + * 2. boot cpu - a. Fetch boot cpu security status + * b. Check whether hard reset is done by boot cpu + * 3. FW application - a. Fetch fw application security status + * b. Check whether hard reset is done by fw app * * Preboot: * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN) */ if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { - hdev->asic_prop.fw_security_status_valid = 1; + prop->fw_security_status_valid = 1; - if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)) + if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN) + prop->fw_security_disabled = false; + else prop->fw_security_disabled = true; if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - hdev->asic_prop.hard_reset_done_by_fw = true; + prop->hard_reset_done_by_fw = true; } else { - hdev->asic_prop.fw_security_status_valid = 0; + prop->fw_security_status_valid = 0; prop->fw_security_disabled = true; } - dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", - hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); dev_info(hdev->dev, "firmware-level security is %s\n", - prop->fw_security_disabled ? "disabled" : "enabled"); + prop->fw_security_disabled ? "disabled" : "enabled"); return 0; } @@ -664,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 cpu_security_boot_status_reg, u32 boot_err0_reg, bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout) { + struct asic_fixed_properties *prop = &hdev->asic_prop; u32 status; int rc; @@ -732,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, /* Read U-Boot version now in case we will later fail */ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); + /* Clear reset status since we need to read it again from boot CPU */ + prop->hard_reset_done_by_fw = false; + /* Read boot_cpu security bits */ - if (hdev->asic_prop.fw_security_status_valid) - hdev->asic_prop.fw_boot_cpu_security_map = + if (prop->fw_security_status_valid) { + prop->fw_boot_cpu_security_map = RREG32(cpu_security_boot_status_reg); + if (prop->fw_boot_cpu_security_map & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + prop->hard_reset_done_by_fw = true; + } + + dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); + if (rc) { detect_cpu_boot_status(hdev, status); rc = -EIO; @@ -805,11 +821,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } + /* Clear reset status since we need to read again from app */ + prop->hard_reset_done_by_fw = false; + /* Read FW application security bits */ - if (hdev->asic_prop.fw_security_status_valid) - hdev->asic_prop.fw_app_security_map = + if (prop->fw_security_status_valid) { + prop->fw_app_security_map = RREG32(cpu_security_boot_status_reg); + if (prop->fw_app_security_map & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + prop->hard_reset_done_by_fw = true; + } + + dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_info(hdev->dev, "Successfully loaded firmware to device\n"); out: -- GitLab From 13d0ee10b55ecec01fd3c91e086e4f3ba75a7911 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 6 Dec 2020 23:48:45 +0200 Subject: [PATCH 1199/1894] habanalabs/gaudi: enhance reset message Print the initiator who performs the hard-reset for easier debugging. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f316b898e8e05..b0528883a9959 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3936,11 +3936,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); - } - dev_info(hdev->dev, - "Issued HARD reset command, going to wait %dms\n", - reset_timeout_ms); + dev_info(hdev->dev, + "Issued HARD reset command, going to wait %dms\n", + reset_timeout_ms); + } else { + dev_info(hdev->dev, + "Firmware performs HARD reset, going to wait %dms\n", + reset_timeout_ms); + } /* * After hard reset, we can't poll the BTM_FSM register because the PSOC -- GitLab From 90ffe170a390d5a620f8fe66758514e369e85d24 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 8 Dec 2020 16:52:48 +0200 Subject: [PATCH 1200/1894] habanalabs: update comment in hl_boot_if.h Hard-reset flag is updated in many stages of the boot sequence of the firmware. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/common/hl_boot_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 7cb5f2d3e5654..b637dfd69f6e0 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -145,7 +145,7 @@ * implemented. This means that FW will * perform hard reset procedure on * receiving the halt-machine event. - * Initialized in: linux + * Initialized in: preboot, u-boot, linux * * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. * Initialized in: linux -- GitLab From 377182a3cc5ae6cc17fb04d06864c975f9f71c18 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 9 Dec 2020 19:50:46 +0200 Subject: [PATCH 1201/1894] habanalabs: adjust pci controller init to new firmware When the firmware security is enabled, the pcie_aux_dbi_reg_addr register in the PCI controller is blocked. Therefore, ignore the result of writing to this register and assume it worked. Also remove the prints on errors in the internal ELBI write function. If the security is enabled, the firmware is responsible for setting this register correctly so we won't have any problem. If the security is disabled, the write will work (unless something is totally broken at the PCI level and then the whole sequence will fail). In addition, remove a write to register pcie_aux_dbi_reg_addr+4, which was never actually needed. Moreover, PCIE_DBI registers are blocked to access from host when firmware security is enabled. Use a different register to flush the writes. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/pci.c | 28 +++++++++++-------- drivers/misc/habanalabs/gaudi/gaudi.c | 4 +-- .../misc/habanalabs/gaudi/gaudi_coresight.c | 3 +- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c index 923b2606e29fe..b4725e6101f6c 100644 --- a/drivers/misc/habanalabs/common/pci.c +++ b/drivers/misc/habanalabs/common/pci.c @@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) return 0; - if (val & PCI_CONFIG_ELBI_STS_ERR) { - dev_err(hdev->dev, "Error writing to ELBI\n"); + if (val & PCI_CONFIG_ELBI_STS_ERR) return -EIO; - } if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { dev_err(hdev->dev, "ELBI write didn't finish in time\n"); @@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) dbi_offset = addr & 0xFFF; - rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); - rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, + /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); + + rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, data); if (rc) @@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); if (rc) dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", @@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, /* Enable */ rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); return rc; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b0528883a9959..88d0e4356d594 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3761,7 +3761,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) static void gaudi_pre_hw_init(struct hl_device *hdev) { /* Perform read from the device to make sure device is up */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); if (hdev->asic_prop.fw_security_disabled) { /* Set the access through PCI bars (Linux driver only) as @@ -3847,7 +3847,7 @@ static int gaudi_hw_init(struct hl_device *hdev) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 2e3612e1ee28d..88a09d42e111c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -9,6 +9,7 @@ #include "../include/gaudi/gaudi_coresight.h" #include "../include/gaudi/asic_reg/gaudi_regs.h" #include "../include/gaudi/gaudi_masks.h" +#include "../include/gaudi/gaudi_reg_map.h" #include #define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET @@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return rc; } -- GitLab From 98e8781f008372057bd5cb059ca6b507371e473d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 9 Dec 2020 23:07:58 +0200 Subject: [PATCH 1202/1894] habanalabs/gaudi: retry loading TPC f/w on -EINTR If loading the firmware file for the TPC f/w was interrupted, try to do it again, up to 5 times. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 88d0e4356d594..8c09e4466af8c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -838,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev) size_t fw_size; void *cpu_addr; dma_addr_t dma_handle; - int rc; + int rc, count = 5; +again: rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); + if (rc == -EINTR && count-- > 0) { + msleep(50); + goto again; + } + if (rc) { - dev_err(hdev->dev, "Firmware file %s is not found!\n", + dev_err(hdev->dev, "Failed to load firmware file %s\n", GAUDI_TPC_FW_FILE); goto out; } -- GitLab From a3fd28306329e8e82efab973aafe81e9001dcf6f Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Tue, 8 Dec 2020 16:14:01 +0200 Subject: [PATCH 1203/1894] habanalabs: add validation cs counter, fix misplaced counters Up until now validation errors were counted in the parsing field of the cs_counters struct, so we added a new counter and increased it when needed. In addition, there were some locations where only one of the counters was updated (ctx or aggregate) so add the second one to be updated as well. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c | 75 ++++++++++++++----- drivers/misc/habanalabs/common/habanalabs.h | 2 + .../misc/habanalabs/common/habanalabs_ioctl.c | 5 ++ include/uapi/misc/habanalabs.h | 4 + 4 files changed, 68 insertions(+), 18 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 92c1c516b65f3..b2b3d2b0f808a 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; cs = kzalloc(sizeof(*cs), GFP_ATOMIC); - if (!cs) + if (!cs) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); return -ENOMEM; + } cs->ctx = ctx; cs->submitted = false; @@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); if (!cs_cmpl) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); rc = -ENOMEM; goto free_cs; } @@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); if (!cs->jobs_in_queue_cnt) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); rc = -ENOMEM; goto free_fence; } @@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) static int hl_cs_copy_chunk_array(struct hl_device *hdev, struct hl_cs_chunk **cs_chunk_array, - void __user *chunks, u32 num_chunks) + void __user *chunks, u32 num_chunks, + struct hl_ctx *ctx) { u32 size_to_copy; if (num_chunks > HL_MAX_JOBS_PER_CS) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Number of chunks can NOT be larger than %d\n", HL_MAX_JOBS_PER_CS); @@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), GFP_ATOMIC); - if (!*cs_chunk_array) + if (!*cs_chunk_array) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); return -ENOMEM; + } size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); kfree(*cs_chunk_array); return -EFAULT; @@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, struct hl_device *hdev = hpriv->hdev; struct hl_cs_chunk *cs_chunk_array; struct hl_cs_counters_atomic *cntr; + struct hl_ctx *ctx = hpriv->ctx; struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; @@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, cntr = &hdev->aggregated_cs_counters; *cs_seq = ULLONG_MAX; - rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, + hpriv->ctx); if (rc) goto out; @@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = validate_queue_index(hdev, chunk, &queue_type, &is_kernel_allocated_cb); if (rc) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); goto free_cs_object; } @@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { atomic64_inc( - &hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + &ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); rc = -EINVAL; goto free_cs_object; } @@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, job = hl_cs_allocate_job(hdev, queue_type, is_kernel_allocated_cb); if (!job) { - atomic64_inc( - &hpriv->ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; @@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = cs_parser(hpriv, job); if (rc) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); atomic64_inc(&cntr->parsing_drop_cnt); dev_err(hdev->dev, "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", @@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, } if (int_queues_only) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); @@ -1042,7 +1058,7 @@ out: } static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, - struct hl_cs_chunk *chunk, u64 *signal_seq) + struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx) { u64 *signal_seq_arr = NULL; u32 size_to_copy, signal_seq_arr_len; @@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, /* currently only one signal seq is supported */ if (signal_seq_arr_len != 1) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Wait for signal CS supports only one signal CS seq\n"); return -EINVAL; @@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, signal_seq_arr = kmalloc_array(signal_seq_arr_len, sizeof(*signal_seq_arr), GFP_ATOMIC); - if (!signal_seq_arr) + if (!signal_seq_arr) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); return -ENOMEM; + } size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr); if (copy_from_user(signal_seq_arr, u64_to_user_ptr(chunk->signal_seq_arr), size_to_copy)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Failed to copy signal seq array from user\n"); rc = -EFAULT; @@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, struct hl_device *hdev = hpriv->hdev; struct hl_cs_compl *sig_waitcs_cmpl; u32 q_idx, collective_engine_id = 0; + struct hl_cs_counters_atomic *cntr; struct hl_fence *sig_fence = NULL; struct hl_ctx *ctx = hpriv->ctx; enum hl_queue_type q_type; @@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, u64 signal_seq; int rc; + cntr = &hdev->aggregated_cs_counters; *cs_seq = ULLONG_MAX; - rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, + ctx); if (rc) goto out; @@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, chunk = &cs_chunk_array[0]; if (chunk->queue_index >= hdev->asic_prop.max_queues) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d is invalid\n", chunk->queue_index); rc = -EINVAL; @@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, q_type = hw_queue_prop->type; if (!hw_queue_prop->supports_sync_stream) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d does not support sync stream operations\n", q_idx); @@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); rc = -EINVAL; @@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) { - rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq); + rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx); if (rc) goto free_cs_chunk_array; sig_fence = hl_ctx_get_fence(ctx, signal_seq); if (IS_ERR(sig_fence)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Failed to get signal CS with seq 0x%llx\n", signal_seq); @@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, container_of(sig_fence, struct hl_cs_compl, base_fence); if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "CS seq 0x%llx is not of a signal CS\n", signal_seq); @@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, cs, q_idx, collective_engine_id); - else + else { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); rc = -EINVAL; + } if (rc) goto free_cs_object; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 70b778a0d60e2..e0d7f5fbaa5c3 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1000,6 +1000,7 @@ struct hl_va_range { * @queue_full_drop_cnt: dropped due to queue full * @device_in_reset_drop_cnt: dropped due to device in reset * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight + * @validation_drop_cnt: dropped due to error in validation */ struct hl_cs_counters_atomic { atomic64_t out_of_mem_drop_cnt; @@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic { atomic64_t queue_full_drop_cnt; atomic64_t device_in_reset_drop_cnt; atomic64_t max_cs_in_flight_drop_cnt; + atomic64_t validation_drop_cnt; }; /** diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index a0c0d20f6f8ff..12efbd9d2e3a0 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) atomic64_read(&cntr->device_in_reset_drop_cnt); cs_counters.total_max_cs_in_flight_drop_cnt = atomic64_read(&cntr->max_cs_in_flight_drop_cnt); + cs_counters.total_validation_drop_cnt = + atomic64_read(&cntr->validation_drop_cnt); if (hpriv->ctx) { cs_counters.ctx_out_of_mem_drop_cnt = @@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) cs_counters.ctx_max_cs_in_flight_drop_cnt = atomic64_read( &hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt); + cs_counters.ctx_validation_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.validation_drop_cnt); } return copy_to_user(out, &cs_counters, diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index dc8bcec195ccf..dba3827c43ca4 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -426,6 +426,8 @@ struct hl_info_sync_manager { * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight + * @total_validation_drop_cnt: total dropped due to validation error + * @ctx_validation_drop_cnt: context dropped due to validation error */ struct hl_info_cs_counters { __u64 total_out_of_mem_drop_cnt; @@ -438,6 +440,8 @@ struct hl_info_cs_counters { __u64 ctx_device_in_reset_drop_cnt; __u64 total_max_cs_in_flight_drop_cnt; __u64 ctx_max_cs_in_flight_drop_cnt; + __u64 total_validation_drop_cnt; + __u64 ctx_validation_drop_cnt; }; enum gaudi_dcores { -- GitLab From fcaebc7354188b0d708c79df4390fbabd4d9799d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 14 Dec 2020 12:52:06 +0200 Subject: [PATCH 1204/1894] habanalabs: register to pci shutdown callback We need to make sure our device is idle when rebooting a virtual machine. This is done in the driver level. The firmware will later handle FLR but we want to be extra safe and stop the devices until the FLR is handled. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 6bbb6bca68600..032d114f01ea5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = { .id_table = ids, .probe = hl_pci_probe, .remove = hl_pci_remove, + .shutdown = hl_pci_remove, .driver.pm = &hl_pm_ops, .err_handler = &hl_pci_err_handler, }; -- GitLab From 097c62b6f0ec2bdadf86afbe80df03856338724d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 22 Dec 2020 15:21:07 +0200 Subject: [PATCH 1205/1894] habanalabs: fix order of status check When the device is in reset or needs to be reset, the disabled property is don't-care. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 5871162a84425..0749c92cbcf63 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; - if (hdev->disabled) - status = HL_DEVICE_STATUS_MALFUNCTION; - else if (atomic_read(&hdev->in_reset)) + if (atomic_read(&hdev->in_reset)) status = HL_DEVICE_STATUS_IN_RESET; else if (hdev->needs_reset) status = HL_DEVICE_STATUS_NEEDS_RESET; + else if (hdev->disabled) + status = HL_DEVICE_STATUS_MALFUNCTION; else status = HL_DEVICE_STATUS_OPERATIONAL; -- GitLab From 95cd4bca7b1f4a25810f3ddfc5e767fb46931789 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 27 Dec 2020 12:33:44 +0100 Subject: [PATCH 1206/1894] netfilter: nft_dynset: report EOPNOTSUPP on missing set feature If userspace requests a feature which is not available the original set definition, then bail out with EOPNOTSUPP. If userspace sends unsupported dynset flags (new feature not supported by this kernel), then report EOPNOTSUPP to userspace. EINVAL should be only used to report malformed netlink messages from userspace. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 983a1d5ca3ab5..f35df221a6334 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -177,7 +177,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); if (flags & ~NFT_DYNSET_F_INV) - return -EINVAL; + return -EOPNOTSUPP; if (flags & NFT_DYNSET_F_INV) priv->invert = true; } @@ -210,7 +210,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) - return -EINVAL; + return -EOPNOTSUPP; err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); if (err) @@ -224,7 +224,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { if (!(set->flags & NFT_SET_MAP)) - return -EINVAL; + return -EOPNOTSUPP; if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; -- GitLab From b4e70d8dd9ea6bd5d5fb3122586f652326ca09cd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 27 Dec 2020 12:35:43 +0100 Subject: [PATCH 1207/1894] netfilter: nftables: add set expression flags The set flag NFT_SET_EXPR provides a hint to the kernel that userspace supports for multiple expressions per set element. In the same direction, NFT_DYNSET_F_EXPR specifies that dynset expression defines multiple expressions per set element. This allows new userspace software with old kernels to bail out with EOPNOTSUPP. This update is similar to ef516e8625dd ("netfilter: nf_tables: reintroduce the NFT_SET_CONCAT flag"). The NFT_SET_EXPR flag needs to be set on when the NFTA_SET_EXPRESSIONS attribute is specified. The NFT_SET_EXPR flag is not set on with NFTA_SET_EXPR to retain backward compatibility in old userspace binaries. Fixes: 48b0ae046ee9 ("netfilter: nftables: netlink support for several set element expressions") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 3 +++ net/netfilter/nf_tables_api.c | 6 +++++- net/netfilter/nft_dynset.c | 9 +++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 28b6ee53305f4..b1633e7ba5296 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -293,6 +293,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_EVAL: set can be updated from the evaluation path * @NFT_SET_OBJECT: set contains stateful objects * @NFT_SET_CONCAT: set contains a concatenation + * @NFT_SET_EXPR: set contains expressions */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -303,6 +304,7 @@ enum nft_set_flags { NFT_SET_EVAL = 0x20, NFT_SET_OBJECT = 0x40, NFT_SET_CONCAT = 0x80, + NFT_SET_EXPR = 0x100, }; /** @@ -706,6 +708,7 @@ enum nft_dynset_ops { enum nft_dynset_flags { NFT_DYNSET_F_INV = (1 << 0), + NFT_DYNSET_F_EXPR = (1 << 1), }; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4186b1e52d584..15c467f1a9dd9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4162,7 +4162,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL | NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL | - NFT_SET_OBJECT | NFT_SET_CONCAT)) + NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR)) return -EOPNOTSUPP; /* Only one of these operations is supported */ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == @@ -4304,6 +4304,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nlattr *tmp; int left; + if (!(flags & NFT_SET_EXPR)) { + err = -EINVAL; + goto err_set_alloc_name; + } i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index f35df221a6334..0b053f75cd604 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -19,6 +19,7 @@ struct nft_dynset { enum nft_registers sreg_key:8; enum nft_registers sreg_data:8; bool invert; + bool expr; u8 num_exprs; u64 timeout; struct nft_expr *expr_array[NFT_SET_EXPR_MAX]; @@ -175,11 +176,12 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); - - if (flags & ~NFT_DYNSET_F_INV) + if (flags & ~(NFT_DYNSET_F_INV | NFT_DYNSET_F_EXPR)) return -EOPNOTSUPP; if (flags & NFT_DYNSET_F_INV) priv->invert = true; + if (flags & NFT_DYNSET_F_EXPR) + priv->expr = true; } set = nft_set_lookup_global(ctx->net, ctx->table, @@ -261,6 +263,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, struct nlattr *tmp; int left; + if (!priv->expr) + return -EINVAL; + i = 0; nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { -- GitLab From 2ca408d9c749c32288bc28725f9f12ba30299e8f Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 30 Nov 2020 17:30:59 -0500 Subject: [PATCH 1208/1894] fanotify: Fix sys_fanotify_mark() on native x86-32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") converted native x86-32 which take 64-bit arguments to use the compat handlers to allow conversion to passing args via pt_regs. sys_fanotify_mark() was however missed, as it has a general compat handler. Add a config option that will use the syscall wrapper that takes the split args for native 32-bit. [ bp: Fix typo in Kconfig help text. ] Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") Reported-by: Paweł Jasiak Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Acked-by: Jan Kara Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20201130223059.101286-1-brgerst@gmail.com --- arch/Kconfig | 6 ++++++ arch/x86/Kconfig | 1 + fs/notify/fanotify/fanotify_user.c | 17 +++++++---------- include/linux/syscalls.h | 24 ++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 78c6f05b10f91..24862d15f3a36 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID config ARCH_SUPPORTS_DEBUG_PAGEALLOC bool +config ARCH_SPLIT_ARG64 + bool + help + If a 32-bit architecture requires 64-bit arguments to be split into + pairs of 32-bit arguments, select this option. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7b6dd10b162ac..21f851179ff08 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -19,6 +19,7 @@ config X86_32 select KMAP_LOCAL select MODULES_USE_ELF_REL select OLD_SIGACTION + select ARCH_SPLIT_ARG64 config X86_64 def_bool y diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3e01d8f2ab906..dcab112e1f001 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1285,26 +1285,23 @@ fput_and_out: return ret; } +#ifndef CONFIG_ARCH_SPLIT_ARG64 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, __u64, mask, int, dfd, const char __user *, pathname) { return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); } +#endif -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(fanotify_mark, +#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) +SYSCALL32_DEFINE6(fanotify_mark, int, fanotify_fd, unsigned int, flags, - __u32, mask0, __u32, mask1, int, dfd, + SC_ARG64(mask), int, dfd, const char __user *, pathname) { - return do_fanotify_mark(fanotify_fd, flags, -#ifdef __BIG_ENDIAN - ((__u64)mask0 << 32) | mask1, -#else - ((__u64)mask1 << 32) | mask0, -#endif - dfd, pathname); + return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), + dfd, pathname); } #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f3929aff39cf2..7688bc983de54 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ +/* For split 64-bit arguments on 32-bit architectures */ +#ifdef __LITTLE_ENDIAN +#define SC_ARG64(name) u32, name##_lo, u32, name##_hi +#else +#define SC_ARG64(name) u32, name##_hi, u32, name##_lo +#endif +#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) + +#ifdef CONFIG_COMPAT +#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 +#else +#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6 +#endif + /* * Called before coming back to user-mode. Returning to user-mode with an * address limit different than USER_DS can allow to overwrite kernel memory. -- GitLab From 512d4a26abdbd11c6ffa03032740e5ab3c62c55b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 1209/1894] interconnect: qcom: fix rpmh link failures When CONFIG_COMPILE_TEST is set, it is possible to build some of the interconnect drivers into the kernel while their dependencies are loadable modules, which is bad: arm-linux-gnueabi-ld: drivers/interconnect/qcom/bcm-voter.o: in function `qcom_icc_bcm_voter_commit': (.text+0x1f8): undefined reference to `rpmh_invalidate' arm-linux-gnueabi-ld: (.text+0x20c): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: (.text+0x2b0): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: (.text+0x2e8): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: drivers/interconnect/qcom/icc-rpmh.o: in function `qcom_icc_bcm_init': (.text+0x2ac): undefined reference to `cmd_db_read_addr' arm-linux-gnueabi-ld: (.text+0x2c8): undefined reference to `cmd_db_read_aux_data' The exact dependencies are a bit complicated, so split them out into a hidden Kconfig symbol that all drivers can in turn depend on to get it right. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Arnd Bergmann Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201204165030.3747484-1-arnd@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/Kconfig | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig index a8f93ba265f81..b3fb5b02bcf1e 100644 --- a/drivers/interconnect/qcom/Kconfig +++ b/drivers/interconnect/qcom/Kconfig @@ -42,13 +42,23 @@ config INTERCONNECT_QCOM_QCS404 This is a driver for the Qualcomm Network-on-Chip on qcs404-based platforms. +config INTERCONNECT_QCOM_RPMH_POSSIBLE + tristate + default INTERCONNECT_QCOM + depends on QCOM_RPMH || (COMPILE_TEST && !QCOM_RPMH) + depends on QCOM_COMMAND_DB || (COMPILE_TEST && !QCOM_COMMAND_DB) + depends on OF || COMPILE_TEST + help + Compile-testing RPMH drivers is possible on other platforms, + but in order to avoid link failures, drivers must not be built-in + when QCOM_RPMH or QCOM_COMMAND_DB are loadable modules + config INTERCONNECT_QCOM_RPMH tristate config INTERCONNECT_QCOM_SC7180 tristate "Qualcomm SC7180 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -57,8 +67,7 @@ config INTERCONNECT_QCOM_SC7180 config INTERCONNECT_QCOM_SDM845 tristate "Qualcomm SDM845 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -67,8 +76,7 @@ config INTERCONNECT_QCOM_SDM845 config INTERCONNECT_QCOM_SM8150 tristate "Qualcomm SM8150 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -77,8 +85,7 @@ config INTERCONNECT_QCOM_SM8150 config INTERCONNECT_QCOM_SM8250 tristate "Qualcomm SM8250 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help -- GitLab From c6174c0e058fc0a54e0b9787c44cb24b0a8d0217 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 1210/1894] interconnect: imx: Add a missing of_node_put after of_device_is_available Add an 'of_node_put()' call when a tested device node is not available. Fixes: f0d8048525d7 ("interconnect: Add imx core driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201206121304.29381-1-christophe.jaillet@wanadoo.fr Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index 41dba7090c2ae..e398ebf1dbbab 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -99,6 +99,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider, if (!dn || !of_device_is_available(dn)) { dev_warn(dev, "Missing property %s, skip scaling %s\n", adj->phandle_name, node->name); + of_node_put(dn); return 0; } -- GitLab From 6414b79d02c426b7dd7d942fc19fb38220ea44ec Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 1211/1894] interconnect: imx: Remove a useless test 'dn' can't be NULL here, it is tested just the line above. Remove this useless test. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201206121322.29434-1-christophe.jaillet@wanadoo.fr Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index e398ebf1dbbab..c770951a909c9 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -96,7 +96,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider, return -ENODEV; } /* Allow scaling to be disabled on a per-node basis */ - if (!dn || !of_device_is_available(dn)) { + if (!of_device_is_available(dn)) { dev_warn(dev, "Missing property %s, skip scaling %s\n", adj->phandle_name, node->name); of_node_put(dn); -- GitLab From 67288f74d4837b82ef937170da3389b0779c17be Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 1212/1894] interconnect: imx8mq: Use icc_sync_state Add the icc_sync_state callback to notify the framework when consumers are probed and the bandwidth doesn't have to be kept at maximum anymore. Signed-off-by: Martin Kepplinger Suggested-by: Georgi Djakov Fixes: 7d3b0b0d8184 ("interconnect: qcom: Use icc_sync_state") Link: https://lore.kernel.org/r/20201210100906.18205-6-martin.kepplinger@puri.sm Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx8mq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/interconnect/imx/imx8mq.c b/drivers/interconnect/imx/imx8mq.c index ba43a15aefec0..d7768d3c6d8aa 100644 --- a/drivers/interconnect/imx/imx8mq.c +++ b/drivers/interconnect/imx/imx8mq.c @@ -7,6 +7,7 @@ #include #include +#include #include #include "imx.h" @@ -94,6 +95,7 @@ static struct platform_driver imx8mq_icc_driver = { .remove = imx8mq_icc_remove, .driver = { .name = "imx8mq-interconnect", + .sync_state = icc_sync_state, }, }; -- GitLab From 12b38ea040b3bb2a30eb9cd488376df5be7ea81f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 13 Dec 2020 16:11:05 +0100 Subject: [PATCH 1213/1894] staging: spmi: hisi-spmi-controller: Fix some error handling paths IN the probe function, if an error occurs after calling 'spmi_controller_alloc()', it must be undone by a corresponding 'spmi_controller_put() call. In the remove function, use 'spmi_controller_put(ctrl)' instead of 'kfree(ctrl)'. While a it fix an error message (s/spmi_add_controller/spmi_controller_add/) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201213151105.137731-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- .../staging/hikey9xx/hisi-spmi-controller.c | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/staging/hikey9xx/hisi-spmi-controller.c b/drivers/staging/hikey9xx/hisi-spmi-controller.c index 861aedd5de484..0d42bc65f39bc 100644 --- a/drivers/staging/hikey9xx/hisi-spmi-controller.c +++ b/drivers/staging/hikey9xx/hisi-spmi-controller.c @@ -278,21 +278,24 @@ static int spmi_controller_probe(struct platform_device *pdev) iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iores) { dev_err(&pdev->dev, "can not get resource!\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_controller; } spmi_controller->base = devm_ioremap(&pdev->dev, iores->start, resource_size(iores)); if (!spmi_controller->base) { dev_err(&pdev->dev, "can not remap base addr!\n"); - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + goto err_put_controller; } ret = of_property_read_u32(pdev->dev.of_node, "spmi-channel", &spmi_controller->channel); if (ret) { dev_err(&pdev->dev, "can not get channel\n"); - return -ENODEV; + ret = -ENODEV; + goto err_put_controller; } platform_set_drvdata(pdev, spmi_controller); @@ -309,9 +312,15 @@ static int spmi_controller_probe(struct platform_device *pdev) ctrl->write_cmd = spmi_write_cmd; ret = spmi_controller_add(ctrl); - if (ret) - dev_err(&pdev->dev, "spmi_add_controller failed with error %d!\n", ret); + if (ret) { + dev_err(&pdev->dev, "spmi_controller_add failed with error %d!\n", ret); + goto err_put_controller; + } + + return 0; +err_put_controller: + spmi_controller_put(ctrl); return ret; } @@ -320,7 +329,7 @@ static int spmi_del_controller(struct platform_device *pdev) struct spmi_controller *ctrl = platform_get_drvdata(pdev); spmi_controller_remove(ctrl); - kfree(ctrl); + spmi_controller_put(ctrl); return 0; } -- GitLab From cab36da4bf1a35739b091b73714a39a1bbd02b05 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Dec 2020 09:43:49 +0300 Subject: [PATCH 1214/1894] Staging: comedi: Return -EFAULT if copy_to_user() fails Return -EFAULT on error instead of the number of bytes remaining to be copied. Fixes: bac42fb21259 ("comedi: get rid of compat_alloc_user_space() mess in COMEDI_CMD{,TEST} compat") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/X8c3pfwFy2jpy4BP@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index d99231c737fbf..80d74cce2a010 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2987,7 +2987,9 @@ static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32, v32.chanlist_len = cmd->chanlist_len; v32.data = ptr_to_compat(cmd->data); v32.data_len = cmd->data_len; - return copy_to_user(cmd32, &v32, sizeof(v32)); + if (copy_to_user(cmd32, &v32, sizeof(v32))) + return -EFAULT; + return 0; } /* Handle 32-bit COMEDI_CMD ioctl. */ -- GitLab From d887d6104adeb94d1b926936ea21f07367f0ff9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 13 Dec 2020 16:35:13 +0100 Subject: [PATCH 1215/1894] staging: mt7621-dma: Fix a resource leak in an error handling path If an error occurs after calling 'mtk_hsdma_init()', it must be undone by a corresponding call to 'mtk_hsdma_uninit()' as already done in the remove function. Fixes: 0853c7a53eb3 ("staging: mt7621-dma: ralink: add rt2880 dma engine") Signed-off-by: Christophe JAILLET Cc: stable Link: https://lore.kernel.org/r/20201213153513.138723-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-dma/mtk-hsdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c index d241349214e71..bc4bb43743131 100644 --- a/drivers/staging/mt7621-dma/mtk-hsdma.c +++ b/drivers/staging/mt7621-dma/mtk-hsdma.c @@ -712,7 +712,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) ret = dma_async_device_register(dd); if (ret) { dev_err(&pdev->dev, "failed to register dma device\n"); - return ret; + goto err_uninit_hsdma; } ret = of_dma_controller_register(pdev->dev.of_node, @@ -728,6 +728,8 @@ static int mtk_hsdma_probe(struct platform_device *pdev) err_unregister: dma_async_device_unregister(dd); +err_uninit_hsdma: + mtk_hsdma_uninit(hsdma); return ret; } -- GitLab From 275565997ade6fc32be9cd49a910ba996bcb4797 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Sun, 27 Dec 2020 17:40:37 +0100 Subject: [PATCH 1216/1894] ASoC: AMD Renoir - add DMI entry for Lenovo ThinkPad E14 Gen 2 The ThinkPad E14 Gen 2 latop does not have the internal digital microphone connected to the AMD's ACP bridge, but it's advertised via BIOS. The internal microphone is connected to the HDA codec. Use DMI to block the microphone PCM device for this platform. Reported-by: Eliot Blennerhassett Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20201227164037.269893-1-perex@perex.cz Signed-off-by: Mark Brown --- sound/soc/amd/renoir/rn-pci-acp3x.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index fa169bf09886f..17ec35be73ac4 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -171,6 +171,13 @@ static const struct dmi_system_id rn_acp_quirk_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "LNVNB161216"), } }, + { + /* Lenovo ThinkPad E14 Gen 2 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "20T6CTO1WW"), + } + }, {} }; -- GitLab From a523e1538fdd5f00ea3289cc0b3c6c1785b89814 Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu vardhan rao Date: Tue, 22 Dec 2020 17:29:18 +0530 Subject: [PATCH 1217/1894] ASoC: amd: Replacing MSI with Legacy IRQ model When we try to play and capture simultaneously we see that interrupts are genrated but our handler is not being acknowledged, After investigating further more in detail on this issue we found that IRQ delivery via MSI from the ACP IP is unreliable and so sometimes interrupt generated will not be acknowledged so MSI model shouldn't be used and using legacy IRQs will resolve interrupt handling issue. This patch replaces MSI interrupt handling with legacy IRQ model. Issue can be reproduced easily by running below python script: import subprocess import time import threading def do2(): cmd = 'aplay -f dat -D hw:2,1 /dev/zero -d 1' subprocess.call(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) print('Play Done') def run(): for i in range(1000): do2() def do(i): cmd = 'arecord -f dat -D hw:2,2 /dev/null -d 1' subprocess.call(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) print(datetime.datetime.now(), i) t = threading.Thread(target=run) t.start() for i in range(1000): do(i) t.join() After applying this patch issue is resolved. Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/20201222115929.11222-1-Vishnuvardhanrao.Ravulapati@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/pci-acp3x.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c index 8c138e490f0c5..d3536fd6a1240 100644 --- a/sound/soc/amd/raven/pci-acp3x.c +++ b/sound/soc/amd/raven/pci-acp3x.c @@ -140,21 +140,14 @@ static int snd_acp3x_probe(struct pci_dev *pci, goto release_regions; } - /* check for msi interrupt support */ - ret = pci_enable_msi(pci); - if (ret) - /* msi is not enabled */ - irqflags = IRQF_SHARED; - else - /* msi is enabled */ - irqflags = 0; + irqflags = IRQF_SHARED; addr = pci_resource_start(pci, 0); adata->acp3x_base = devm_ioremap(&pci->dev, addr, pci_resource_len(pci, 0)); if (!adata->acp3x_base) { ret = -ENOMEM; - goto disable_msi; + goto release_regions; } pci_set_master(pci); pci_set_drvdata(pci, adata); @@ -162,7 +155,7 @@ static int snd_acp3x_probe(struct pci_dev *pci, adata->pme_en = rv_readl(adata->acp3x_base + mmACP_PME_EN); ret = acp3x_init(adata); if (ret) - goto disable_msi; + goto release_regions; val = rv_readl(adata->acp3x_base + mmACP_I2S_PIN_CONFIG); switch (val) { @@ -251,8 +244,6 @@ unregister_devs: de_init: if (acp3x_deinit(adata->acp3x_base)) dev_err(&pci->dev, "ACP de-init failed\n"); -disable_msi: - pci_disable_msi(pci); release_regions: pci_release_regions(pci); disable_pci: @@ -311,7 +302,6 @@ static void snd_acp3x_remove(struct pci_dev *pci) dev_err(&pci->dev, "ACP de-init failed\n"); pm_runtime_forbid(&pci->dev); pm_runtime_get_noresume(&pci->dev); - pci_disable_msi(pci); pci_release_regions(pci); pci_disable_device(pci); } -- GitLab From 1f092d1c8819679d78a7d9c62a46d4939d217a9d Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Sun, 27 Dec 2020 17:41:09 +0100 Subject: [PATCH 1218/1894] ASoC: AMD Renoir - add DMI entry for Lenovo ThinkPad X395 The ThinkPad X395 latop does not have the internal digital microphone connected to the AMD's ACP bridge, but it's advertised via BIOS. The internal microphone is connected to the HDA codec. Use DMI to block the microphone PCM device for this platform. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1892115 Cc: Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20201227164109.269973-1-perex@perex.cz Signed-off-by: Mark Brown --- sound/soc/amd/renoir/rn-pci-acp3x.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index 17ec35be73ac4..deca8c7a0e878 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -178,6 +178,13 @@ static const struct dmi_system_id rn_acp_quirk_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "20T6CTO1WW"), } }, + { + /* Lenovo ThinkPad X395 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "20NLCTO1WW"), + } + }, {} }; -- GitLab From 0ffc76539e6e8d28114f95ac25c167c37b5191b3 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 27 Dec 2020 13:45:02 +0000 Subject: [PATCH 1219/1894] USB: cdc-acm: blacklist another IR Droid device This device is supported by the IR Toy driver. Reported-by: Georgi Bakalski Signed-off-by: Sean Young Acked-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20201227134502.4548-2-sean@mess.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f52f1bc0559f9..781905745812e 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1895,6 +1895,10 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x04d8, 0xfd08), .driver_info = IGNORE_DEVICE, }, + + { USB_DEVICE(0x04d8, 0xf58b), + .driver_info = IGNORE_DEVICE, + }, #endif /*Samsung phone in firmware update mode */ -- GitLab From 421da9413a6a5ec4334cade5092370cf2c8c8add Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 18 Dec 2020 12:57:36 +0200 Subject: [PATCH 1220/1894] MAINTAINERS: Update address for Cadence USB3 driver Updates my email address for Cadence USB3 driver. Signed-off-by: Roger Quadros Link: https://lore.kernel.org/r/20201218105736.17667-1-rogerq@ti.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..afe3a5c66bc90 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3883,7 +3883,7 @@ F: drivers/mtd/nand/raw/cadence-nand-controller.c CADENCE USB3 DRD IP DRIVER M: Peter Chen M: Pawel Laszczak -M: Roger Quadros +R: Roger Quadros R: Aswath Govindraju L: linux-usb@vger.kernel.org S: Maintained -- GitLab From 88ebce92806e5dff3549e1a8cacb53978104d3b4 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Tue, 15 Dec 2020 09:55:49 +0530 Subject: [PATCH 1221/1894] dt-bindings: usb: Add new compatible string for AM64 SoC Add compatible string in j721e-usb binding file as the same USB subsystem is present in AM64. Reviewed-by: Rob Herring Signed-off-by: Aswath Govindraju Link: https://lore.kernel.org/r/20201215042549.7956-1-a-govindraju@ti.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index 388245b91a55b..148b3fb4ceaf8 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -11,8 +11,12 @@ maintainers: properties: compatible: - items: + oneOf: - const: ti,j721e-usb + - const: ti,am64-usb + - items: + - const: ti,j721e-usb + - const: ti,am64-usb reg: description: module registers -- GitLab From a390bef7db1f192cc5b588dbcf8ed113406ec130 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 10 Dec 2020 18:04:13 -0300 Subject: [PATCH 1222/1894] usb: gadget: fsl_mxc_udc: Remove the driver Since 5.10-rc1 i.MX is a devicetree-only platform, and this driver was only used by the old non-DT i.MX devices. Remove the driver as it has no users left. Signed-off-by: Fabio Estevam Acked-by: Peter Chen Link: https://lore.kernel.org/r/20201210210413.15262-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/Kconfig | 2 +- drivers/usb/gadget/udc/Makefile | 1 - drivers/usb/gadget/udc/fsl_mxc_udc.c | 122 --------------------------- 3 files changed, 1 insertion(+), 124 deletions(-) delete mode 100644 drivers/usb/gadget/udc/fsl_mxc_udc.c diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 1a12aab208b46..8c614bb86c665 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -90,7 +90,7 @@ config USB_BCM63XX_UDC config USB_FSL_USB2 tristate "Freescale Highspeed USB DR Peripheral Controller" - depends on FSL_SOC || ARCH_MXC + depends on FSL_SOC help Some of Freescale PowerPC and i.MX processors have a High Speed Dual-Role(DR) USB controller, which supports device mode. diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile index f5a7ce28aecdf..a21f2224e7eb7 100644 --- a/drivers/usb/gadget/udc/Makefile +++ b/drivers/usb/gadget/udc/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_USB_ATMEL_USBA) += atmel_usba_udc.o obj-$(CONFIG_USB_BCM63XX_UDC) += bcm63xx_udc.o obj-$(CONFIG_USB_FSL_USB2) += fsl_usb2_udc.o fsl_usb2_udc-y := fsl_udc_core.o -fsl_usb2_udc-$(CONFIG_ARCH_MXC) += fsl_mxc_udc.o obj-$(CONFIG_USB_TEGRA_XUDC) += tegra-xudc.o obj-$(CONFIG_USB_M66592) += m66592-udc.o obj-$(CONFIG_USB_R8A66597) += r8a66597-udc.o diff --git a/drivers/usb/gadget/udc/fsl_mxc_udc.c b/drivers/usb/gadget/udc/fsl_mxc_udc.c deleted file mode 100644 index 5a321992decc8..0000000000000 --- a/drivers/usb/gadget/udc/fsl_mxc_udc.c +++ /dev/null @@ -1,122 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2009 - * Guennadi Liakhovetski, DENX Software Engineering, - * - * Description: - * Helper routines for i.MX3x SoCs from Freescale, needed by the fsl_usb2_udc.c - * driver to function correctly on these systems. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "fsl_usb2_udc.h" - -static struct clk *mxc_ahb_clk; -static struct clk *mxc_per_clk; -static struct clk *mxc_ipg_clk; - -/* workaround ENGcm09152 for i.MX35 */ -#define MX35_USBPHYCTRL_OFFSET 0x600 -#define USBPHYCTRL_OTGBASE_OFFSET 0x8 -#define USBPHYCTRL_EVDO (1 << 23) - -int fsl_udc_clk_init(struct platform_device *pdev) -{ - struct fsl_usb2_platform_data *pdata; - unsigned long freq; - int ret; - - pdata = dev_get_platdata(&pdev->dev); - - mxc_ipg_clk = devm_clk_get(&pdev->dev, "ipg"); - if (IS_ERR(mxc_ipg_clk)) { - dev_err(&pdev->dev, "clk_get(\"ipg\") failed\n"); - return PTR_ERR(mxc_ipg_clk); - } - - mxc_ahb_clk = devm_clk_get(&pdev->dev, "ahb"); - if (IS_ERR(mxc_ahb_clk)) { - dev_err(&pdev->dev, "clk_get(\"ahb\") failed\n"); - return PTR_ERR(mxc_ahb_clk); - } - - mxc_per_clk = devm_clk_get(&pdev->dev, "per"); - if (IS_ERR(mxc_per_clk)) { - dev_err(&pdev->dev, "clk_get(\"per\") failed\n"); - return PTR_ERR(mxc_per_clk); - } - - clk_prepare_enable(mxc_ipg_clk); - clk_prepare_enable(mxc_ahb_clk); - clk_prepare_enable(mxc_per_clk); - - /* make sure USB_CLK is running at 60 MHz +/- 1000 Hz */ - if (!strcmp(pdev->id_entry->name, "imx-udc-mx27")) { - freq = clk_get_rate(mxc_per_clk); - if (pdata->phy_mode != FSL_USB2_PHY_ULPI && - (freq < 59999000 || freq > 60001000)) { - dev_err(&pdev->dev, "USB_CLK=%lu, should be 60MHz\n", freq); - ret = -EINVAL; - goto eclkrate; - } - } - - return 0; - -eclkrate: - clk_disable_unprepare(mxc_ipg_clk); - clk_disable_unprepare(mxc_ahb_clk); - clk_disable_unprepare(mxc_per_clk); - mxc_per_clk = NULL; - return ret; -} - -int fsl_udc_clk_finalize(struct platform_device *pdev) -{ - struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev); - int ret = 0; - - /* workaround ENGcm09152 for i.MX35 */ - if (pdata->workaround & FLS_USB2_WORKAROUND_ENGCM09152) { - unsigned int v; - struct resource *res = platform_get_resource - (pdev, IORESOURCE_MEM, 0); - void __iomem *phy_regs = ioremap(res->start + - MX35_USBPHYCTRL_OFFSET, 512); - if (!phy_regs) { - dev_err(&pdev->dev, "ioremap for phy address fails\n"); - ret = -EINVAL; - goto ioremap_err; - } - - v = readl(phy_regs + USBPHYCTRL_OTGBASE_OFFSET); - writel(v | USBPHYCTRL_EVDO, - phy_regs + USBPHYCTRL_OTGBASE_OFFSET); - - iounmap(phy_regs); - } - - -ioremap_err: - /* ULPI transceivers don't need usbpll */ - if (pdata->phy_mode == FSL_USB2_PHY_ULPI) { - clk_disable_unprepare(mxc_per_clk); - mxc_per_clk = NULL; - } - - return ret; -} - -void fsl_udc_clk_release(void) -{ - if (mxc_per_clk) - clk_disable_unprepare(mxc_per_clk); - clk_disable_unprepare(mxc_ahb_clk); - clk_disable_unprepare(mxc_ipg_clk); -} -- GitLab From 5e5ff0b4b6bcb4d17b7a26ec8bcfc7dd4651684f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 20 Dec 2020 00:25:53 +0900 Subject: [PATCH 1223/1894] USB: cdc-wdm: Fix use after free in service_outstanding_interrupt(). syzbot is reporting UAF at usb_submit_urb() [1], for service_outstanding_interrupt() is not checking WDM_DISCONNECTING before calling usb_submit_urb(). Close the race by doing same checks wdm_read() does upon retry. Also, while wdm_read() checks WDM_DISCONNECTING with desc->rlock held, service_interrupt_work() does not hold desc->rlock. Thus, it is possible that usb_submit_urb() is called from service_outstanding_interrupt() from service_interrupt_work() after WDM_DISCONNECTING was set and kill_urbs() from wdm_disconnect() completed. Thus, move kill_urbs() in wdm_disconnect() to after cancel_work_sync() (which makes sure that service_interrupt_work() is no longer running) completed. Although it seems to be safe to dereference desc->intf->dev in service_outstanding_interrupt() even if WDM_DISCONNECTING was already set because desc->rlock or cancel_work_sync() prevents wdm_disconnect() from reaching list_del() before service_outstanding_interrupt() completes, let's not emit error message if WDM_DISCONNECTING is set by wdm_disconnect() while usb_submit_urb() is in progress. [1] https://syzkaller.appspot.com/bug?extid=9e04e2df4a32fb661daf Reported-by: syzbot Signed-off-by: Tetsuo Handa Cc: stable Link: https://lore.kernel.org/r/620e2ee0-b9a3-dbda-a25b-a93e0ed03ec5@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 02d0cfd23bb29..508b1c3f8b731 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -465,13 +465,23 @@ static int service_outstanding_interrupt(struct wdm_device *desc) if (!desc->resp_count || !--desc->resp_count) goto out; + if (test_bit(WDM_DISCONNECTING, &desc->flags)) { + rv = -ENODEV; + goto out; + } + if (test_bit(WDM_RESETTING, &desc->flags)) { + rv = -EIO; + goto out; + } + set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); if (rv) { - dev_err(&desc->intf->dev, - "usb_submit_urb failed with result %d\n", rv); + if (!test_bit(WDM_DISCONNECTING, &desc->flags)) + dev_err(&desc->intf->dev, + "usb_submit_urb failed with result %d\n", rv); /* make sure the next notification trigger a submit */ clear_bit(WDM_RESPONDING, &desc->flags); @@ -1027,9 +1037,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); -- GitLab From 0f041b8592daaaea46e91a8ebb3b47e6e0171fd8 Mon Sep 17 00:00:00 2001 From: Madhusudanarao Amara Date: Wed, 16 Dec 2020 19:39:18 +0530 Subject: [PATCH 1224/1894] usb: typec: intel_pmc_mux: Configure HPD first for HPD+IRQ request Warm reboot scenarios some times type C Mux driver gets Mux configuration request as HPD=1,IRQ=1. In that scenario typeC Mux driver need to configure Mux as follows as per IOM requirement: (1). Confgiure Mux HPD = 1, IRQ = 0 (2). Configure Mux with HPD = 1, IRQ = 1 IOM expects TypeC Mux configuration as follows: (1). HPD=1, IRQ=0 (2). HPD=1, IRQ=1 if IOM gets mux config request (2) without configuring (1), it will ignore the request. The impact of this is there is no DP_alt mode display. Fixes: 43d596e32276 ("usb: typec: intel_pmc_mux: Check the port status before connect") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Madhusudanarao Amara Link: https://lore.kernel.org/r/20201216140918.49197-1-madhusudanarao.amara@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index cf37a59ce1304..46a25b8db72e5 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -207,10 +207,21 @@ static int pmc_usb_mux_dp_hpd(struct pmc_usb_port *port, struct typec_displayport_data *dp) { u8 msg[2] = { }; + int ret; msg[0] = PMC_USB_DP_HPD; msg[0] |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; + /* Configure HPD first if HPD,IRQ comes together */ + if (!IOM_PORT_HPD_ASSERTED(port->iom_status) && + dp->status & DP_STATUS_IRQ_HPD && + dp->status & DP_STATUS_HPD_STATE) { + msg[1] = PMC_USB_DP_HPD_LVL; + ret = pmc_usb_command(port, msg, sizeof(msg)); + if (ret) + return ret; + } + if (dp->status & DP_STATUS_IRQ_HPD) msg[1] = PMC_USB_DP_HPD_IRQ; -- GitLab From 5d5323a6f3625f101dbfa94ba3ef7706cce38760 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Tue, 15 Dec 2020 20:31:47 +0100 Subject: [PATCH 1225/1894] USB: xhci: fix U1/U2 handling for hardware with XHCI_INTEL_HOST quirk set The commit 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit latency is too long") was constraining the xhci code not to allow U1/U2 sleep states if the latency to wake up from the U-states reached the service interval of an periodic endpoint. This fix was not taking into account that in case the quirk XHCI_INTEL_HOST is set, the wakeup time will be calculated and configured differently. It checks for u1_params.mel/u2_params.mel as a limit. But the code could decide to write another MEL into the hardware. This leads to broken cases where not enough bandwidth is available for other devices: usb 1-2: can't set config #1, error -28 This patch is fixing that case by checking for timeout_ns after the wakeup time was calculated depending on the quirks. Fixes: 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit latency is too long") Signed-off-by: Michael Grzeschik Cc: stable Link: https://lore.kernel.org/r/20201215193147.11738-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 91ab81c3fc79a..e86940571b4cf 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4770,19 +4770,19 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); + else + timeout_ns = udev->u1_params.sel; + /* Prevent U1 if service interval is shorter than U1 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); - else - timeout_ns = udev->u1_params.sel; - /* The U1 timeout is encoded in 1us intervals. * Don't return a timeout of zero, because that's USB3_LPM_DISABLED. */ @@ -4834,19 +4834,19 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); + else + timeout_ns = udev->u2_params.sel; + /* Prevent U2 if service interval is shorter than U2 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); - else - timeout_ns = udev->u2_params.sel; - /* The U2 timeout is encoded in 256us intervals */ timeout_ns = DIV_ROUND_UP_ULL(timeout_ns, 256 * 1000); /* If the necessary timeout value is bigger than what we can set in the -- GitLab From a5ada3dfe6a20f41f91448b9034a1ef8da3dc87d Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Tue, 15 Dec 2020 10:54:59 +0800 Subject: [PATCH 1226/1894] usb: dwc3: meson-g12a: disable clk on error handling path in probe dwc3_meson_g12a_probe() does not invoke clk_bulk_disable_unprepare() on one error handling path. This patch fixes that. Fixes: 347052e3bf1b ("usb: dwc3: meson-g12a: fix USB2 PHY initialization on G12A and A1 SoCs") Reported-by: Hulk Robot Signed-off-by: Zheng Zengkai Cc: stable Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20201215025459.91794-1-zhengzengkai@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-meson-g12a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c index 417e05381b5d0..bdf1f98dfad8c 100644 --- a/drivers/usb/dwc3/dwc3-meson-g12a.c +++ b/drivers/usb/dwc3/dwc3-meson-g12a.c @@ -754,7 +754,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) ret = priv->drvdata->setup_regmaps(priv, base); if (ret) - return ret; + goto err_disable_clks; if (priv->vbus) { ret = regulator_enable(priv->vbus); -- GitLab From 2cc332e4ee4febcbb685e2962ad323fe4b3b750a Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 10 Dec 2020 10:01:48 +0800 Subject: [PATCH 1227/1894] usb: gadget: function: printer: Fix a memory leak for interface descriptor When printer driver is loaded, the printer_func_bind function is called, in this function, the interface descriptor be allocated memory, if after that, the error occurred, the interface descriptor memory need to be free. Reviewed-by: Peter Chen Cc: Signed-off-by: Zqiang Link: https://lore.kernel.org/r/20201210020148.6691-1-qiang.zhang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_printer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index 64a4112068fc8..2f1eb2e81d306 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -1162,6 +1162,7 @@ fail_tx_reqs: printer_req_free(dev->in_ep, req); } + usb_free_all_descriptors(f); return ret; } -- GitLab From 5cc35c224a80aa5a5a539510ef049faf0d6ed181 Mon Sep 17 00:00:00 2001 From: Sriharsha Allenki Date: Wed, 2 Dec 2020 18:32:20 +0530 Subject: [PATCH 1228/1894] usb: gadget: Fix spinlock lockup on usb_function_deactivate There is a spinlock lockup as part of composite_disconnect when it tries to acquire cdev->lock as part of usb_gadget_deactivate. This is because the usb_gadget_deactivate is called from usb_function_deactivate with the same spinlock held. This would result in the below call stack and leads to stall. rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: rcu: 3-...0: (1 GPs behind) idle=162/1/0x4000000000000000 softirq=10819/10819 fqs=2356 (detected by 2, t=5252 jiffies, g=20129, q=3770) Task dump for CPU 3: task:uvc-gadget_wlhe state:R running task stack: 0 pid: 674 ppid: 636 flags:0x00000202 Call trace: __switch_to+0xc0/0x170 _raw_spin_lock_irqsave+0x84/0xb0 composite_disconnect+0x28/0x78 configfs_composite_disconnect+0x68/0x70 usb_gadget_disconnect+0x10c/0x128 usb_gadget_deactivate+0xd4/0x108 usb_function_deactivate+0x6c/0x80 uvc_function_disconnect+0x20/0x58 uvc_v4l2_release+0x30/0x88 v4l2_release+0xbc/0xf0 __fput+0x7c/0x230 ____fput+0x14/0x20 task_work_run+0x88/0x140 do_notify_resume+0x240/0x6f0 work_pending+0x8/0x200 Fix this by doing an unlock on cdev->lock before the usb_gadget_deactivate call from usb_function_deactivate. The same lockup can happen in the usb_gadget_activate path. Fix that path as well. Reported-by: Peter Chen Link: https://lore.kernel.org/linux-usb/20201102094936.GA29581@b29397-desktop/ Tested-by: Peter Chen Signed-off-by: Sriharsha Allenki Cc: stable Link: https://lore.kernel.org/r/20201202130220.24926-1-sallenki@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index c6d455f2bb928..1a556a628971f 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -392,8 +392,11 @@ int usb_function_deactivate(struct usb_function *function) spin_lock_irqsave(&cdev->lock, flags); - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_deactivate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } if (status == 0) cdev->deactivations++; @@ -424,8 +427,11 @@ int usb_function_activate(struct usb_function *function) status = -EINVAL; else { cdev->deactivations--; - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_activate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } } spin_unlock_irqrestore(&cdev->lock, flags); -- GitLab From c91d3a6bcaa031f551ba29a496a8027b31289464 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 17 Nov 2020 17:29:55 +0800 Subject: [PATCH 1229/1894] USB: gadget: legacy: fix return error code in acm_ms_bind() If usb_otg_descriptor_alloc() failed, it need return ENOMEM. Fixes: 578aa8a2b12c ("usb: gadget: acm_ms: allocate and init otg descriptor by otg capabilities") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Cc: stable Link: https://lore.kernel.org/r/20201117092955.4102785-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/acm_ms.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/acm_ms.c b/drivers/usb/gadget/legacy/acm_ms.c index 59be2d8417c9c..e8033e5f0c18e 100644 --- a/drivers/usb/gadget/legacy/acm_ms.c +++ b/drivers/usb/gadget/legacy/acm_ms.c @@ -200,8 +200,10 @@ static int acm_ms_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(gadget); - if (!usb_desc) + if (!usb_desc) { + status = -ENOMEM; goto fail_string_ids; + } usb_otg_descriptor_init(gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; -- GitLab From 0a88fa221ce911c331bf700d2214c5b2f77414d3 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 17 Nov 2020 12:43:35 +0530 Subject: [PATCH 1230/1894] usb: gadget: u_ether: Fix MTU size mismatch with RX packet size Fix the MTU size issue with RX packet size as the host sends the packet with extra bytes containing ethernet header. This causes failure when user sets the MTU size to the maximum i.e. 15412. In this case the ethernet packet received will be of length 15412 plus the ethernet header length. This patch fixes the issue where there is a check that RX packet length must not be more than max packet length. Fixes: bba787a860fa ("usb: gadget: ether: Allow jumbo frames") Signed-off-by: Manish Narani Cc: stable Link: https://lore.kernel.org/r/1605597215-122027-1-git-send-email-manish.narani@xilinx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 31ea76adcc0db..c019f2b0c0af3 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -45,9 +45,10 @@ #define UETH__VERSION "29-May-2008" /* Experiments show that both Linux and Windows hosts allow up to 16k - * frame sizes. Set the max size to 15k+52 to prevent allocating 32k + * frame sizes. Set the max MTU size to 15k+52 to prevent allocating 32k * blocks and still have efficient handling. */ -#define GETHER_MAX_ETH_FRAME_LEN 15412 +#define GETHER_MAX_MTU_SIZE 15412 +#define GETHER_MAX_ETH_FRAME_LEN (GETHER_MAX_MTU_SIZE + ETH_HLEN) struct eth_dev { /* lock is held while accessing port_usb @@ -786,7 +787,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); @@ -848,7 +849,7 @@ struct net_device *gether_setup_name_default(const char *netname) /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; return net; } -- GitLab From 83a43ff80a566de8718dfc6565545a0080ec1fb5 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 17 Nov 2020 09:14:30 +0800 Subject: [PATCH 1231/1894] usb: chipidea: ci_hdrc_imx: add missing put_device() call in usbmisc_get_init_data() if of_find_device_by_node() succeed, usbmisc_get_init_data() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling for this function implementation. Fixes: ef12da914ed6 ("usb: chipidea: imx: properly check for usbmisc") Signed-off-by: Yu Kuai Cc: stable Link: https://lore.kernel.org/r/20201117011430.642589-1-yukuai3@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 9e12152ea46bc..8b7bc10b6e8b4 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -139,9 +139,13 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) misc_pdev = of_find_device_by_node(args.np); of_node_put(args.np); - if (!misc_pdev || !platform_get_drvdata(misc_pdev)) + if (!misc_pdev) return ERR_PTR(-EPROBE_DEFER); + if (!platform_get_drvdata(misc_pdev)) { + put_device(&misc_pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } data->dev = &misc_pdev->dev; /* -- GitLab From 372c93131998c0622304bed118322d2a04489e63 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Dec 2020 11:30:53 +0100 Subject: [PATCH 1232/1894] USB: yurex: fix control-URB timeout handling Make sure to always cancel the control URB in write() so that it can be reused after a timeout or spurious CMD_ACK. Currently any further write requests after a timeout would fail after triggering a WARN() in usb_submit_urb() when attempting to submit the already active URB. Reported-by: syzbot+e87ebe0f7913f71f2ea5@syzkaller.appspotmail.com Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX") Cc: stable # 2.6.37 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 73ebfa6e9715e..c640f98d20c54 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -496,6 +496,9 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, timeout = schedule_timeout(YUREX_WRITE_TIMEOUT); finish_wait(&dev->waitq, &wait); + /* make sure URB is idle after timeout or (spurious) CMD_ACK */ + usb_kill_urb(dev->cntl_urb); + mutex_unlock(&dev->io_mutex); if (retval < 0) { -- GitLab From ce722da66d3e9384aa2de9d33d584ee154e5e157 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:06 +0300 Subject: [PATCH 1233/1894] usb: dwc3: ulpi: Use VStsDone to detect PHY regs access completion In accordance with [1] the DWC_usb3 core sets the GUSB2PHYACCn.VStsDone bit when the PHY vendor control access is done and clears it when the application initiates a new transaction. The doc doesn't say anything about the GUSB2PHYACCn.VStsBsy flag serving for the same purpose. Moreover we've discovered that the VStsBsy flag can be cleared before the VStsDone bit. So using the former as a signal of the PHY control registers completion might be dangerous. Let's have the VStsDone flag utilized instead then. [1] Synopsys DesignWare Cores SuperSpeed USB 3.0 xHCI Host Controller Databook, 2.70a, December 2013, p.388 Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-2-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/ulpi.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 2f95f08ca5119..1b241f937d8f4 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -285,6 +285,7 @@ /* Global USB2 PHY Vendor Control Register */ #define DWC3_GUSB2PHYACC_NEWREGREQ BIT(25) +#define DWC3_GUSB2PHYACC_DONE BIT(24) #define DWC3_GUSB2PHYACC_BUSY BIT(23) #define DWC3_GUSB2PHYACC_WRITE BIT(22) #define DWC3_GUSB2PHYACC_ADDR(n) (n << 16) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index aa213c9815f67..3cc4f4970c050 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -24,7 +24,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc) while (count--) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); - if (!(reg & DWC3_GUSB2PHYACC_BUSY)) + if (reg & DWC3_GUSB2PHYACC_DONE) return 0; cpu_relax(); } -- GitLab From fca3f138105727c3a22edda32d02f91ce1bf11c9 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:07 +0300 Subject: [PATCH 1234/1894] usb: dwc3: ulpi: Replace CPU-based busyloop with Protocol-based one Originally the procedure of the ULPI transaction finish detection has been developed as a simple busy-loop with just decrementing counter and no delays. It's wrong since on different systems the loop will take a different time to complete. So if the system bus and CPU are fast enough to overtake the ULPI bus and the companion PHY reaction, then we'll get to take a false timeout error. Fix this by converting the busy-loop procedure to take the standard bus speed, address value and the registers access mode into account for the busy-loop delay calculation. Here is the way the fix works. It's known that the ULPI bus is clocked with 60MHz signal. In accordance with [1] the ULPI bus protocol is created so to spend 5 and 6 clock periods for immediate register write and read operations respectively, and 6 and 7 clock periods - for the extended register writes and reads. Based on that we can easily pre-calculate the time which will be needed for the controller to perform a requested IO operation. Note we'll still preserve the attempts counter in case if the DWC USB3 controller has got some internals delays. [1] UTMI+ Low Pin Interface (ULPI) Specification, Revision 1.1, October 20, 2004, pp. 30 - 36. Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-3-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index 3cc4f4970c050..54c877f7b51db 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -7,6 +7,8 @@ * Author: Heikki Krogerus */ +#include +#include #include #include "core.h" @@ -17,12 +19,22 @@ DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \ DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a)) -static int dwc3_ulpi_busyloop(struct dwc3 *dwc) +#define DWC3_ULPI_BASE_DELAY DIV_ROUND_UP(NSEC_PER_SEC, 60000000L) + +static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { + unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; unsigned int count = 1000; u32 reg; + if (addr >= ULPI_EXT_VENDOR_SPECIFIC) + ns += DWC3_ULPI_BASE_DELAY; + + if (read) + ns += DWC3_ULPI_BASE_DELAY; + while (count--) { + ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); if (reg & DWC3_GUSB2PHYACC_DONE) return 0; @@ -47,7 +59,7 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - ret = dwc3_ulpi_busyloop(dwc); + ret = dwc3_ulpi_busyloop(dwc, addr, true); if (ret) return ret; @@ -71,7 +83,7 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - return dwc3_ulpi_busyloop(dwc); + return dwc3_ulpi_busyloop(dwc, addr, false); } static const struct ulpi_ops dwc3_ulpi_ops = { -- GitLab From e5f4ca3fce90a37b23a77bfcc86800d484a80514 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:08 +0300 Subject: [PATCH 1235/1894] usb: dwc3: ulpi: Fix USB2.0 HS/FS/LS PHY suspend regression First of all the commit e0082698b689 ("usb: dwc3: ulpi: conditionally resume ULPI PHY") introduced the Suspend USB2.0 HS/FS/LS PHY regression, as by design of the fix any attempt to read/write from/to the PHY control registers will completely disable the PHY suspension, which consequently will increase the USB bus power consumption. Secondly the fix won't work well for the very first attempt of the ULPI PHY control registers IO, because after disabling the USB2.0 PHY suspension functionality it will still take some time for the bus to resume from the sleep state if one has been reached before it. So the very first PHY register read/write operation will take more time than the busy-loop provides and the IO timeout error might be returned anyway. Here we suggest to fix the denoted problems in the following way. First of all let's not disable the Suspend USB2.0 HS/FS/LS PHY functionality so to make the controller and the USB2.0 bus more power efficient. Secondly instead of that we'll extend the PHY IO op wait procedure with 1 - 1.2 ms sleep if the PHY suspension is enabled (1ms should be enough as by LPM specification it is at most how long it takes for the USB2.0 bus to resume from L1 (Sleep) state). Finally in case if the USB2.0 PHY suspension functionality has been disabled on the DWC USB3 controller setup procedure we'll compensate the USB bus resume process latency by extending the busy-loop attempts counter. Fixes: e0082698b689 ("usb: dwc3: ulpi: conditionally resume ULPI PHY") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-4-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index 54c877f7b51db..f23f4c9a557e9 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -24,7 +24,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; - unsigned int count = 1000; + unsigned int count = 10000; u32 reg; if (addr >= ULPI_EXT_VENDOR_SPECIFIC) @@ -33,6 +33,10 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) if (read) ns += DWC3_ULPI_BASE_DELAY; + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (reg & DWC3_GUSB2PHYCFG_SUSPHY) + usleep_range(1000, 1200); + while (count--) { ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); @@ -50,12 +54,6 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) u32 reg; int ret; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); @@ -73,12 +71,6 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) struct dwc3 *dwc = dev_get_drvdata(dev); u32 reg; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); -- GitLab From 9389044f27081d6ec77730c36d5bf9a1288bcda2 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 21 Dec 2020 18:35:28 +0100 Subject: [PATCH 1236/1894] usb: gadget: f_uac2: reset wMaxPacketSize With commit 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth") wMaxPacketSize is computed dynamically but the value is never reset. Because of this, the actual maximum packet size can only decrease each time the audio gadget is instantiated. Reset the endpoint maximum packet size and mark wMaxPacketSize as dynamic to solve the problem. Fixes: 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth") Signed-off-by: Jerome Brunet Cc: stable Link: https://lore.kernel.org/r/20201221173531.215169-2-jbrunet@baylibre.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 69 ++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 3633df6d7610f..5d960b6603b6f 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -271,7 +271,7 @@ static struct usb_endpoint_descriptor fs_epout_desc = { .bEndpointAddress = USB_DIR_OUT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -280,7 +280,7 @@ static struct usb_endpoint_descriptor hs_epout_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -348,7 +348,7 @@ static struct usb_endpoint_descriptor fs_epin_desc = { .bEndpointAddress = USB_DIR_IN, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -357,7 +357,7 @@ static struct usb_endpoint_descriptor hs_epin_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -444,12 +444,28 @@ struct cntrl_range_lay3 { __le32 dRES; } __packed; -static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, +static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, struct usb_endpoint_descriptor *ep_desc, - unsigned int factor, bool is_playback) + enum usb_device_speed speed, bool is_playback) { int chmask, srate, ssize; - u16 max_packet_size; + u16 max_size_bw, max_size_ep; + unsigned int factor; + + switch (speed) { + case USB_SPEED_FULL: + max_size_ep = 1023; + factor = 1000; + break; + + case USB_SPEED_HIGH: + max_size_ep = 1024; + factor = 8000; + break; + + default: + return -EINVAL; + } if (is_playback) { chmask = uac2_opts->p_chmask; @@ -461,10 +477,12 @@ static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, ssize = uac2_opts->c_ssize; } - max_packet_size = num_channels(chmask) * ssize * + max_size_bw = num_channels(chmask) * ssize * DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); - ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_packet_size, - le16_to_cpu(ep_desc->wMaxPacketSize))); + ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, + max_size_ep)); + + return 0; } /* Use macro to overcome line length limitation */ @@ -670,10 +688,33 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) } /* Calculate wMaxPacketSize according to audio bandwidth */ - set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true); - set_ep_max_packet_size(uac2_opts, &fs_epout_desc, 1000, false); - set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true); - set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false); + ret = set_ep_max_packet_size(uac2_opts, &fs_epin_desc, USB_SPEED_FULL, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &fs_epout_desc, USB_SPEED_FULL, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epin_desc, USB_SPEED_HIGH, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epout_desc, USB_SPEED_HIGH, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } if (EPOUT_EN(uac2_opts)) { agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); -- GitLab From 60267ba35c744d851dcd2d22ebaa240ca6aaa15f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 16 Dec 2020 17:19:58 +0100 Subject: [PATCH 1237/1894] ceph: reencode gid_list when reconnecting On reconnect, cap and dentry releases are dropped and the fields that follow must be reencoded into the freed space. Currently these are timestamp and gid_list, but gid_list isn't reencoded. This results in failed to decode message of type 24 v4: End of buffer errors on the MDS. While at it, make a change to encode gid_list unconditionally, without regard to what head/which version was used as a result of checking whether CEPH_FEATURE_FS_BTIME is supported or not. URL: https://tracker.ceph.com/issues/48618 Fixes: 4f1ddb1ea874 ("ceph: implement updated ceph_mds_request_head structure") Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- fs/ceph/mds_client.c | 53 ++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 98c15ff2e599a..840587037b59b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2475,6 +2475,22 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, return r; } +static void encode_timestamp_and_gids(void **p, + const struct ceph_mds_request *req) +{ + struct ceph_timespec ts; + int i; + + ceph_encode_timespec64(&ts, &req->r_stamp); + ceph_encode_copy(p, &ts, sizeof(ts)); + + /* gid_list */ + ceph_encode_32(p, req->r_cred->group_info->ngroups); + for (i = 0; i < req->r_cred->group_info->ngroups; i++) + ceph_encode_64(p, from_kgid(&init_user_ns, + req->r_cred->group_info->gid[i])); +} + /* * called under mdsc->mutex */ @@ -2491,7 +2507,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; bool freepath1 = false, freepath2 = false; - int len, i; + int len; u16 releases; void *p, *end; int ret; @@ -2517,17 +2533,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, goto out_free1; } - if (legacy) { - /* Old style */ - len = sizeof(*head); - } else { - /* New style: add gid_list and any later fields */ - len = sizeof(struct ceph_mds_request_head) + sizeof(u32) + - (sizeof(u64) * req->r_cred->group_info->ngroups); - } - + len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head); len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + sizeof(struct ceph_timespec); + len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -2548,7 +2557,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.tid = cpu_to_le64(req->r_tid); /* - * The old ceph_mds_request_header didn't contain a version field, and + * The old ceph_mds_request_head didn't contain a version field, and * one was added when we moved the message version from 3->4. */ if (legacy) { @@ -2609,20 +2618,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, head->num_releases = cpu_to_le16(releases); - /* time stamp */ - { - struct ceph_timespec ts; - ceph_encode_timespec64(&ts, &req->r_stamp); - ceph_encode_copy(&p, &ts, sizeof(ts)); - } - - /* gid list */ - if (!legacy) { - ceph_encode_32(&p, req->r_cred->group_info->ngroups); - for (i = 0; i < req->r_cred->group_info->ngroups; i++) - ceph_encode_64(&p, from_kgid(&init_user_ns, - req->r_cred->group_info->gid[i])); - } + encode_timestamp_and_gids(&p, req); if (WARN_ON_ONCE(p > end)) { ceph_msg_put(msg); @@ -2730,13 +2726,8 @@ static int __prepare_send_request(struct ceph_mds_session *session, /* remove cap/dentry releases from message */ rhead->num_releases = 0; - /* time stamp */ p = msg->front.iov_base + req->r_request_release_offset; - { - struct ceph_timespec ts; - ceph_encode_timespec64(&ts, &req->r_stamp); - ceph_encode_copy(&p, &ts, sizeof(ts)); - } + encode_timestamp_and_gids(&p, req); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); -- GitLab From ad32fe8801c38f7b1a8b3814bd1f006cb2b5e781 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 15 Dec 2020 16:40:59 +0100 Subject: [PATCH 1238/1894] libceph: fix auth_signature buffer allocation in secure mode auth_signature frame is 68 bytes in plain mode and 96 bytes in secure mode but we are requesting 68 bytes in both modes. By luck, this doesn't actually result in any invalid memory accesses because the allocation is satisfied out of kmalloc-96 slab and so exactly 96 bytes are allocated, but KASAN rightfully complains. Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)") Reported-by: Luis Henriques Signed-off-by: Ilya Dryomov --- net/ceph/messenger_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c1ebb2aa08b5c..4f938fc8deaf5 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1333,7 +1333,8 @@ static int prepare_auth_signature(struct ceph_connection *con) void *buf; int ret; - buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, false)); + buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, + con_secure(con))); if (!buf) return -ENOMEM; -- GitLab From f5f2c9a0e3073debc6bc0ecc855ced0158526ee8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 15 Dec 2020 16:49:07 +0100 Subject: [PATCH 1239/1894] libceph: align session_key and con_secret to 16 bytes crypto_shash_setkey() and crypto_aead_setkey() will do a (small) GFP_ATOMIC allocation to align the key if it isn't suitably aligned. It's not a big deal, but at the same time easy to avoid. The actual alignment requirement is dynamic, queryable with crypto_shash_alignmask() and crypto_aead_alignmask(), but shouldn't be stricter than 16 bytes for our algorithms. Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)") Signed-off-by: Ilya Dryomov --- net/ceph/messenger_v2.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 4f938fc8deaf5..c38d8de938363 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -2033,10 +2033,18 @@ bad: return -EINVAL; } +/* + * Align session_key and con_secret to avoid GFP_ATOMIC allocation + * inside crypto_shash_setkey() and crypto_aead_setkey() called from + * setup_crypto(). __aligned(16) isn't guaranteed to work for stack + * objects, so do it by hand. + */ static int process_auth_done(struct ceph_connection *con, void *p, void *end) { - u8 session_key[CEPH_KEY_LEN]; - u8 con_secret[CEPH_MAX_CON_SECRET_LEN]; + u8 session_key_buf[CEPH_KEY_LEN + 16]; + u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16]; + u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16); + u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16); int session_key_len, con_secret_len; int payload_len; u64 global_id; -- GitLab From 664f1e259a982bf213f0cd8eea7616c89546585c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 16 Dec 2020 18:40:05 +0100 Subject: [PATCH 1240/1894] libceph: add __maybe_unused to DEFINE_MSGR2_FEATURE Avoid -Wunused-const-variable warnings for "make W=1". Reported-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/msgr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index f5e02f6c06555..3989dcb94d3d1 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -33,8 +33,8 @@ #define CEPH_MSGR2_INCARNATION_1 (0ull) #define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \ - static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ - static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \ + static const uint64_t __maybe_unused CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ + static const uint64_t __maybe_unused CEPH_MSGR2_FEATUREMASK_##name = \ (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation); #define HAVE_MSGR2_FEATURE(x, name) \ -- GitLab From 48b0777cd93dbd800d3966b6f5c34714aad5c203 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 28 Dec 2020 16:13:52 -0500 Subject: [PATCH 1241/1894] Revert "dm crypt: export sysfs of kcryptd workqueue" This reverts commit a2b8b2d975673b1a50ab0bcce5d146b9335edfad. WQ_SYSFS breaks the ability to reload a DM table due to sysfs kobject collision (due to active and inactive table). Given lack of demonstrated need for exposing this workqueue via sysfs: revert exposing it. Reported-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5f9f9b3a226d7..53791138d78bf 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3166,12 +3166,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) - cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, + cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1, devname); else - cc->crypt_queue = alloc_workqueue("kcryptd-%s", - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | - WQ_UNBOUND | WQ_SYSFS, + cc->crypt_queue = alloc_workqueue("kcryptd/%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(), devname); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; -- GitLab From 59b4a8fa27f5a895582ada1ae5034af7c94a57b5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 23 Dec 2020 19:21:16 -0800 Subject: [PATCH 1242/1894] CDC-NCM: remove "connected" log message The cdc_ncm driver passes network connection notifications up to usbnet_link_change(), which is the right place for any logging. Remove the netdev_info() duplicating this from the driver itself. This stops devices such as my "TRENDnet USB 10/100/1G/2.5G LAN" (ID 20f4:e02b) adapter from spamming the kernel log with cdc_ncm 2-2:2.0 enp0s2u2c2: network connection: connected messages every 60 msec or so. Signed-off-by: Roland Dreier Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20201224032116.2453938-1-roland@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2bac57d5e8d50..3b816a4731f29 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1863,9 +1863,6 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - netif_info(dev, link, dev->net, - "network connection: %sconnected\n", - !!event->wValue ? "" : "dis"); usbnet_link_change(dev, !!event->wValue, 0); break; -- GitLab From 1ad58225dba3f2f598d2c6daed4323f24547168f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:20 +0100 Subject: [PATCH 1243/1894] net-sysfs: take the rtnl lock when storing xps_cpus Two race conditions can be triggered when storing xps cpus, resulting in various oops and invalid memory accesses: 1. Calling netdev_set_num_tc while netif_set_xps_queue: - netif_set_xps_queue uses dev->tc_num as one of the parameters to compute the size of new_dev_maps when allocating it. dev->tc_num is also used to access the map, and the compiler may generate code to retrieve this field multiple times in the function. - netdev_set_num_tc sets dev->tc_num. If new_dev_maps is allocated using dev->tc_num and then dev->tc_num is set to a higher value through netdev_set_num_tc, later accesses to new_dev_maps in netif_set_xps_queue could lead to accessing memory outside of new_dev_maps; triggering an oops. 2. Calling netif_set_xps_queue while netdev_set_num_tc is running: 2.1. netdev_set_num_tc starts by resetting the xps queues, dev->tc_num isn't updated yet. 2.2. netif_set_xps_queue is called, setting up the map with the *old* dev->num_tc. 2.3. netdev_set_num_tc updates dev->tc_num. 2.4. Later accesses to the map lead to out of bound accesses and oops. A similar issue can be found with netdev_reset_tc. One way of triggering this is to set an iface up (for which the driver uses netdev_set_num_tc in the open path, such as bnx2x) and writing to xps_cpus in a concurrent thread. With the right timing an oops is triggered. Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc and netdev_reset_tc should be mutually exclusive. We do that by taking the rtnl lock in xps_cpus_store. Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 999b70c59761d..7cc15dec1717c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1396,7 +1396,13 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, return err; } + if (!rtnl_trylock()) { + free_cpumask_var(mask); + return restart_syscall(); + } + err = netif_set_xps_queue(dev, mask, index); + rtnl_unlock(); free_cpumask_var(mask); -- GitLab From fb25038586d0064123e393cadf1fadd70a9df97a Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:21 +0100 Subject: [PATCH 1244/1894] net-sysfs: take the rtnl lock when accessing xps_cpus_map and num_tc Accesses to dev->xps_cpus_map (when using dev->num_tc) should be protected by the rtnl lock, like we do for netif_set_xps_queue. I didn't see an actual bug being triggered, but let's be safe here and take the rtnl lock while accessing the map in sysfs. Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7cc15dec1717c..65886bfbf8223 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1317,8 +1317,8 @@ static const struct attribute_group dql_group = { static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) { + int cpu, len, ret, num_tc = 1, tc = 0; struct net_device *dev = queue->dev; - int cpu, len, num_tc = 1, tc = 0; struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; @@ -1328,22 +1328,31 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, index = get_netdev_queue_index(queue); + if (!rtnl_trylock()) + return restart_syscall(); + if (dev->num_tc) { /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; - if (num_tc < 0) - return -EINVAL; + if (num_tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } /* If queue belongs to subordinate dev use its map */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; tc = netdev_txq_to_tc(dev, index); - if (tc < 0) - return -EINVAL; + if (tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } } - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_rtnl_unlock; + } rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_cpus_map); @@ -1366,9 +1375,15 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, } rcu_read_unlock(); + rtnl_unlock(); + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); return len < PAGE_SIZE ? len : -EINVAL; + +err_rtnl_unlock: + rtnl_unlock(); + return ret; } static ssize_t xps_cpus_store(struct netdev_queue *queue, -- GitLab From 2d57b4f142e0b03e854612b8e28978935414bced Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:22 +0100 Subject: [PATCH 1245/1894] net-sysfs: take the rtnl lock when storing xps_rxqs Two race conditions can be triggered when storing xps rxqs, resulting in various oops and invalid memory accesses: 1. Calling netdev_set_num_tc while netif_set_xps_queue: - netif_set_xps_queue uses dev->tc_num as one of the parameters to compute the size of new_dev_maps when allocating it. dev->tc_num is also used to access the map, and the compiler may generate code to retrieve this field multiple times in the function. - netdev_set_num_tc sets dev->tc_num. If new_dev_maps is allocated using dev->tc_num and then dev->tc_num is set to a higher value through netdev_set_num_tc, later accesses to new_dev_maps in netif_set_xps_queue could lead to accessing memory outside of new_dev_maps; triggering an oops. 2. Calling netif_set_xps_queue while netdev_set_num_tc is running: 2.1. netdev_set_num_tc starts by resetting the xps queues, dev->tc_num isn't updated yet. 2.2. netif_set_xps_queue is called, setting up the map with the *old* dev->num_tc. 2.3. netdev_set_num_tc updates dev->tc_num. 2.4. Later accesses to the map lead to out of bound accesses and oops. A similar issue can be found with netdev_reset_tc. One way of triggering this is to set an iface up (for which the driver uses netdev_set_num_tc in the open path, such as bnx2x) and writing to xps_rxqs in a concurrent thread. With the right timing an oops is triggered. Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc and netdev_reset_tc should be mutually exclusive. We do that by taking the rtnl lock in xps_rxqs_store. Fixes: 8af2c06ff4b1 ("net-sysfs: Add interface for Rx queue(s) map per Tx queue") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 65886bfbf8223..62ca2f2c0ee61 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1499,10 +1499,17 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } + if (!rtnl_trylock()) { + bitmap_free(mask); + return restart_syscall(); + } + cpus_read_lock(); err = __netif_set_xps_queue(dev, mask, index, true); cpus_read_unlock(); + rtnl_unlock(); + bitmap_free(mask); return err ? : len; } -- GitLab From 4ae2bb81649dc03dfc95875f02126b14b773f7ab Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:23 +0100 Subject: [PATCH 1246/1894] net-sysfs: take the rtnl lock when accessing xps_rxqs_map and num_tc Accesses to dev->xps_rxqs_map (when using dev->num_tc) should be protected by the rtnl lock, like we do for netif_set_xps_queue. I didn't see an actual bug being triggered, but let's be safe here and take the rtnl lock while accessing the map in sysfs. Fixes: 8af2c06ff4b1 ("net-sysfs: Add interface for Rx queue(s) map per Tx queue") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 62ca2f2c0ee61..daf502c13d6da 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1429,22 +1429,29 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) { + int j, len, ret, num_tc = 1, tc = 0; struct net_device *dev = queue->dev; struct xps_dev_maps *dev_maps; unsigned long *mask, index; - int j, len, num_tc = 1, tc = 0; index = get_netdev_queue_index(queue); + if (!rtnl_trylock()) + return restart_syscall(); + if (dev->num_tc) { num_tc = dev->num_tc; tc = netdev_txq_to_tc(dev, index); - if (tc < 0) - return -EINVAL; + if (tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } } mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL); - if (!mask) - return -ENOMEM; + if (!mask) { + ret = -ENOMEM; + goto err_rtnl_unlock; + } rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_rxqs_map); @@ -1470,10 +1477,16 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) out_no_maps: rcu_read_unlock(); + rtnl_unlock(); + len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues); bitmap_free(mask); return len < PAGE_SIZE ? len : -EINVAL; + +err_rtnl_unlock: + rtnl_unlock(); + return ret; } static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, -- GitLab From 4614792eebcbf81c60ad3604c1aeeb2b0899cea4 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 24 Dec 2020 18:24:05 +0200 Subject: [PATCH 1247/1894] net: ethernet: ti: cpts: fix ethtool output when no ptp_clock registered The CPTS driver registers PTP PHC clock when first netif is going up and unregister it when all netif are down. Now ethtool will show: - PTP PHC clock index 0 after boot until first netif is up; - the last assigned PTP PHC clock index even if PTP PHC clock is not registered any more after all netifs are down. This patch ensures that -1 is returned by ethtool when PTP PHC clock is not registered any more. Fixes: 8a2c9a5ab4b9 ("net: ethernet: ti: cpts: rework initialization/deinitialization") Signed-off-by: Grygorii Strashko Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20201224162405.28032-1-grygorii.strashko@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpts.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index d1fc7955d4227..43222a34cba06 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -599,6 +599,7 @@ void cpts_unregister(struct cpts *cpts) ptp_clock_unregister(cpts->clock); cpts->clock = NULL; + cpts->phc_index = -1; cpts_write32(cpts, 0, int_enable); cpts_write32(cpts, 0, control); @@ -784,6 +785,7 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, cpts->cc.read = cpts_systim_read; cpts->cc.mask = CLOCKSOURCE_MASK(32); cpts->info = cpts_info; + cpts->phc_index = -1; if (n_ext_ts) cpts->info.n_ext_ts = n_ext_ts; -- GitLab From 950271d7cc0b4546af3549d8143c4132d6e1f138 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 25 Dec 2020 10:52:16 +0800 Subject: [PATCH 1248/1894] tun: fix return value when the number of iovs exceeds MAX_SKB_FRAGS Currently the tun_napi_alloc_frags() function returns -ENOMEM when the number of iovs exceeds MAX_SKB_FRAGS + 1. However this is inappropriate, we should use -EMSGSIZE instead of -ENOMEM. The following distinctions are matters: 1. the caller need to drop the bad packet when -EMSGSIZE is returned, which means meeting a persistent failure. 2. the caller can try again when -ENOMEM is returned, which means meeting a transient failure. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Yunjian Wang Acked-by: Willem de Bruijn Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/1608864736-24332-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fbed05ae7b0f6..978ac0981d160 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1365,7 +1365,7 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int i; if (it->nr_segs > MAX_SKB_FRAGS + 1) - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EMSGSIZE); local_bh_disable(); skb = napi_get_frags(&tfile->napi); -- GitLab From e7579d5d5b3298f7e888ed07ac16bfb7174c135a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 21 Dec 2020 22:07:25 +0100 Subject: [PATCH 1249/1894] net: mptcp: cap forward allocation to 1M the following syzkaller reproducer: r0 = socket$inet_mptcp(0x2, 0x1, 0x106) bind$inet(r0, &(0x7f0000000080)={0x2, 0x4e24, @multicast2}, 0x10) connect$inet(r0, &(0x7f0000000480)={0x2, 0x4e24, @local}, 0x10) sendto$inet(r0, &(0x7f0000000100)="f6", 0xffffffe7, 0xc000, 0x0, 0x0) systematically triggers the following warning: WARNING: CPU: 2 PID: 8618 at net/core/stream.c:208 sk_stream_kill_queues+0x3fa/0x580 Modules linked in: CPU: 2 PID: 8618 Comm: syz-executor Not tainted 5.10.0+ #334 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/04 RIP: 0010:sk_stream_kill_queues+0x3fa/0x580 Code: df 48 c1 ea 03 0f b6 04 02 84 c0 74 04 3c 03 7e 40 8b ab 20 02 00 00 e9 64 ff ff ff e8 df f0 81 2 RSP: 0018:ffffc9000290fcb0 EFLAGS: 00010293 RAX: ffff888011cb8000 RBX: 0000000000000000 RCX: ffffffff86eecf0e RDX: 0000000000000000 RSI: ffffffff86eecf6a RDI: 0000000000000005 RBP: 0000000000000e28 R08: ffff888011cb8000 R09: fffffbfff1f48139 R10: ffffffff8fa409c7 R11: fffffbfff1f48138 R12: ffff8880215e6220 R13: ffffffff8fa409c0 R14: ffffc9000290fd30 R15: 1ffff92000521fa2 FS: 00007f41c78f4800(0000) GS:ffff88802d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f95c803d088 CR3: 0000000025ed2000 CR4: 00000000000006f0 Call Trace: __mptcp_destroy_sock+0x4f5/0x8e0 mptcp_close+0x5e2/0x7f0 inet_release+0x12b/0x270 __sock_release+0xc8/0x270 sock_close+0x18/0x20 __fput+0x272/0x8e0 task_work_run+0xe0/0x1a0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x19/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 userspace programs provide arbitrarily high values of 'len' in sendmsg(): this is causing integer overflow of 'amount'. Cap forward allocation to 1 megabyte: higher values are not really useful. Suggested-by: Paolo Abeni Fixes: e93da92896bc ("mptcp: implement wmem reservation") Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/3334d00d8b2faecafdfab9aa593efcbf61442756.1608584474.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 09b19aa2f2051..6628d8d742030 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -877,6 +877,9 @@ static void __mptcp_wmem_reserve(struct sock *sk, int size) struct mptcp_sock *msk = mptcp_sk(sk); WARN_ON_ONCE(msk->wmem_reserved); + if (WARN_ON_ONCE(amount < 0)) + amount = 0; + if (amount <= sk->sk_forward_alloc) goto reserve; @@ -1587,7 +1590,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -EOPNOTSUPP; - mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, len)); + mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len))); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -- GitLab From fb1e6e562b37b39adfe251919c9abfdb3e01f921 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 27 Dec 2020 14:18:17 -0500 Subject: [PATCH 1250/1894] bnxt_en: Fix AER recovery. A recent change skips sending firmware messages to the firmware when pci_channel_offline() is true during fatal AER error. To make this complete, we need to move the re-initialization sequence to bnxt_io_resume(), otherwise the firmware messages to re-initialize will all be skipped. In any case, it is more correct to re-initialize in bnxt_io_resume(). Also, fix the reverse x-mas tree format when defining variables in bnxt_io_slot_reset(). Fixes: b340dc680ed4 ("bnxt_en: Avoid sending firmware messages when AER error is detected.") Reviewed-by: Edwin Peer Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 ++++++++++------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4edd6f8e017e4..b8351e24395de 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12887,10 +12887,10 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, */ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) { + pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); int err = 0, off; - pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; netdev_info(bp->dev, "PCI Slot Reset\n"); @@ -12919,22 +12919,8 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); err = bnxt_hwrm_func_reset(bp); - if (!err) { - err = bnxt_hwrm_func_qcaps(bp); - if (!err && netif_running(netdev)) - err = bnxt_open(netdev); - } - bnxt_ulp_start(bp, err); - if (!err) { - bnxt_reenable_sriov(bp); + if (!err) result = PCI_ERS_RESULT_RECOVERED; - } - } - - if (result != PCI_ERS_RESULT_RECOVERED) { - if (netif_running(netdev)) - dev_close(netdev); - pci_disable_device(pdev); } rtnl_unlock(); @@ -12952,10 +12938,21 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) static void bnxt_io_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); + struct bnxt *bp = netdev_priv(netdev); + int err; + netdev_info(bp->dev, "PCI Slot Resume\n"); rtnl_lock(); - netif_device_attach(netdev); + err = bnxt_hwrm_func_qcaps(bp); + if (!err && netif_running(netdev)) + err = bnxt_open(netdev); + + bnxt_ulp_start(bp, err); + if (!err) { + bnxt_reenable_sriov(bp); + netif_device_attach(netdev); + } rtnl_unlock(); } -- GitLab From a029a2fef5d11bb85587433c3783615442abac96 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 27 Dec 2020 14:18:18 -0500 Subject: [PATCH 1251/1894] bnxt_en: Check TQM rings for maximum supported value. TQM rings are hardware resources that require host context memory managed by the driver. The driver supports up to 9 TQM rings and the number of rings to use is requested by firmware during run-time. Cap this number to the maximum supported to prevent accessing beyond the array. Future firmware may request more than 9 TQM rings. Define macros to remove the magic number 9 from the C code. Fixes: ac3158cb0108 ("bnxt_en: Allocate TQM ring context memory according to fw specification.") Reviewed-by: Pavan Chebbi Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b8351e24395de..d10e4f85dd11a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6790,8 +6790,10 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp) ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count; if (!ctx->tqm_fp_rings_count) ctx->tqm_fp_rings_count = bp->max_q; + else if (ctx->tqm_fp_rings_count > BNXT_MAX_TQM_FP_RINGS) + ctx->tqm_fp_rings_count = BNXT_MAX_TQM_FP_RINGS; - tqm_rings = ctx->tqm_fp_rings_count + 1; + tqm_rings = ctx->tqm_fp_rings_count + BNXT_MAX_TQM_SP_RINGS; ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL); if (!ctx_pg) { kfree(ctx); @@ -6925,7 +6927,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl, pg_dir = &req.tqm_sp_page_dir, ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP; - i < 9; i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) { + i < BNXT_MAX_TQM_RINGS; + i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) { if (!(enables & ena)) continue; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 950ea26ae0d26..51996c85547ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1436,6 +1436,11 @@ struct bnxt_ctx_pg_info { struct bnxt_ctx_pg_info **ctx_pg_tbl; }; +#define BNXT_MAX_TQM_SP_RINGS 1 +#define BNXT_MAX_TQM_FP_RINGS 8 +#define BNXT_MAX_TQM_RINGS \ + (BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS) + struct bnxt_ctx_mem_info { u32 qp_max_entries; u16 qp_min_qp1_entries; @@ -1474,7 +1479,7 @@ struct bnxt_ctx_mem_info { struct bnxt_ctx_pg_info stat_mem; struct bnxt_ctx_pg_info mrav_mem; struct bnxt_ctx_pg_info tim_mem; - struct bnxt_ctx_pg_info *tqm_mem[9]; + struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TQM_RINGS]; }; struct bnxt_fw_health { -- GitLab From 1169318bd565d2911b949f6123e109baa35881b6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 Dec 2020 15:37:36 -0600 Subject: [PATCH 1252/1894] net: ipa: don't return a value from gsi_channel_command() Callers of gsi_channel_command() no longer care whether the command times out, and don't use what gsi_channel_command() returns. Redefine that function to have void return type. Reported-by: kernel test robot Fixes: 6ffddf3b3d182 ("net: ipa: use state to determine channel command success") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 579cc3e516775..e51a770578990 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -454,7 +454,7 @@ static enum gsi_channel_state gsi_channel_state(struct gsi_channel *channel) } /* Issue a channel command and wait for it to complete */ -static int +static void gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) { struct completion *completion = &channel->completion; @@ -489,12 +489,10 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); if (success) - return 0; + return; dev_err(dev, "GSI command %u for channel %u timed out, state %u\n", opcode, channel_id, gsi_channel_state(channel)); - - return -ETIMEDOUT; } /* Allocate GSI channel in NOT_ALLOCATED state */ @@ -503,7 +501,6 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; struct device *dev = gsi->dev; enum gsi_channel_state state; - int ret; /* Get initial channel state */ state = gsi_channel_state(channel); @@ -513,7 +510,7 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) return -EINVAL; } - ret = gsi_channel_command(channel, GSI_CH_ALLOCATE); + gsi_channel_command(channel, GSI_CH_ALLOCATE); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -531,7 +528,6 @@ static int gsi_channel_start_command(struct gsi_channel *channel) { struct device *dev = channel->gsi->dev; enum gsi_channel_state state; - int ret; state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_ALLOCATED && @@ -541,7 +537,7 @@ static int gsi_channel_start_command(struct gsi_channel *channel) return -EINVAL; } - ret = gsi_channel_command(channel, GSI_CH_START); + gsi_channel_command(channel, GSI_CH_START); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -559,7 +555,6 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) { struct device *dev = channel->gsi->dev; enum gsi_channel_state state; - int ret; state = gsi_channel_state(channel); @@ -576,7 +571,7 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) return -EINVAL; } - ret = gsi_channel_command(channel, GSI_CH_STOP); + gsi_channel_command(channel, GSI_CH_STOP); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -598,7 +593,6 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) { struct device *dev = channel->gsi->dev; enum gsi_channel_state state; - int ret; msleep(1); /* A short delay is required before a RESET command */ @@ -612,7 +606,7 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) return; } - ret = gsi_channel_command(channel, GSI_CH_RESET); + gsi_channel_command(channel, GSI_CH_RESET); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -627,7 +621,6 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; struct device *dev = gsi->dev; enum gsi_channel_state state; - int ret; state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_ALLOCATED) { @@ -636,7 +629,7 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) return; } - ret = gsi_channel_command(channel, GSI_CH_DE_ALLOC); + gsi_channel_command(channel, GSI_CH_DE_ALLOC); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); -- GitLab From 1ddf776b498c922935d0ec3283b9817dd33aedf7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 Dec 2020 15:37:37 -0600 Subject: [PATCH 1253/1894] net: ipa: don't return a value from evt_ring_command() Callers of evt_ring_command() no longer care whether the command times out, and don't use what evt_ring_command() returns. Redefine that function to have void return type. Reported-by: kernel test robot Fixes: 428b448ee764a ("net: ipa: use state to determine event ring command success") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index e51a770578990..14d9a791924bf 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -326,8 +326,8 @@ gsi_evt_ring_state(struct gsi *gsi, u32 evt_ring_id) } /* Issue an event ring command and wait for it to complete */ -static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id, - enum gsi_evt_cmd_opcode opcode) +static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id, + enum gsi_evt_cmd_opcode opcode) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; struct completion *completion = &evt_ring->completion; @@ -361,19 +361,16 @@ static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id, iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); if (success) - return 0; + return; dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n", opcode, evt_ring_id, evt_ring->state); - - return -ETIMEDOUT; } /* Allocate an event ring in NOT_ALLOCATED state */ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; - int ret; /* Get initial event ring state */ evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id); @@ -383,7 +380,7 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) return -EINVAL; } - ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); + evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) @@ -400,7 +397,6 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; enum gsi_evt_ring_state state = evt_ring->state; - int ret; if (state != GSI_EVT_RING_STATE_ALLOCATED && state != GSI_EVT_RING_STATE_ERROR) { @@ -409,7 +405,7 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) return; } - ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); + evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) @@ -423,7 +419,6 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; - int ret; if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) { dev_err(gsi->dev, "event ring %u state %u before dealloc\n", @@ -431,7 +426,7 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) return; } - ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); + evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED) -- GitLab From 4d4f9c1a17a3480f8fe523673f7232b254d724b7 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 16 Dec 2020 23:39:56 +0000 Subject: [PATCH 1254/1894] MIPS: boot: Fix unaligned access with CONFIG_MIPS_RAW_APPENDED_DTB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compressed payload is not necesarily 4-byte aligned, at least when compiling with Clang. In that case, the 4-byte value appended to the compressed payload that corresponds to the uncompressed kernel image size must be read using get_unaligned_le32(). This fixes Clang-built kernels not booting on MIPS (tested on a Ingenic JZ4770 board). Fixes: b8f54f2cde78 ("MIPS: ZBOOT: copy appended dtb to the end of the kernel") Cc: # v4.7 Signed-off-by: Paul Cercueil Reviewed-by: Nick Desaulniers Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/decompress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index c61c641674e6b..e3946b06e840a 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -13,6 +13,7 @@ #include #include +#include /* * These two variables specify the free mem region @@ -117,7 +118,7 @@ void decompress_kernel(unsigned long boot_heap_start) dtb_size = fdt_totalsize((void *)&__appended_dtb); /* last four bytes is always image size in little endian */ - image_size = le32_to_cpup((void *)&__image_end - 4); + image_size = get_unaligned_le32((void *)&__image_end - 4); /* copy dtb to where the booted kernel will expect it */ memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, -- GitLab From 698222457465ce343443be81c5512edda86e5914 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Dec 2020 19:44:38 +0000 Subject: [PATCH 1255/1894] MIPS: Fix malformed NT_FILE and NT_SIGINFO in 32bit coredumps Patches that introduced NT_FILE and NT_SIGINFO notes back in 2012 had taken care of native (fs/binfmt_elf.c) and compat (fs/compat_binfmt_elf.c) coredumps; unfortunately, compat on mips (which does not go through the usual compat_binfmt_elf.c) had not been noticed. As the result, both N32 and O32 coredumps on 64bit mips kernels have those sections malformed enough to confuse the living hell out of all gdb and readelf versions (up to and including the tip of binutils-gdb.git). Longer term solution is to make both O32 and N32 compat use the regular compat_binfmt_elf.c, but that's too much for backports. The minimal solution is to do in arch/mips/kernel/binfmt_elf[on]32.c the same thing those patches have done in fs/compat_binfmt_elf.c Cc: stable@kernel.org # v3.7+ Signed-off-by: Al Viro Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/binfmt_elfn32.c | 7 +++++++ arch/mips/kernel/binfmt_elfo32.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 6ee3f7218c675..c4441416e96b6 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -103,4 +103,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index 6dd103d3cebba..7b2a23f48c1ac 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -106,4 +106,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" -- GitLab From 4f374d2c43a9e5e773f1dee56db63bd6b8a36276 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Wed, 23 Dec 2020 20:35:21 +0200 Subject: [PATCH 1256/1894] net: mvpp2: fix pkt coalescing int-threshold configuration The packet coalescing interrupt threshold has separated registers for different aggregated/cpu (sw-thread). The required value should be loaded for every thread but not only for 1 current cpu. Fixes: 213f428f5056 ("net: mvpp2: add support for TX interrupts and RX queue distribution modes") Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608748521-11033-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f20b313270270..4b1808acef581 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2370,17 +2370,18 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port, static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port, struct mvpp2_tx_queue *txq) { - unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu()); + unsigned int thread; u32 val; if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK) txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK; val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET); - mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id); - mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val); - - put_cpu(); + /* PKT-coalescing registers are per-queue + per-thread */ + for (thread = 0; thread < MVPP2_MAX_THREADS; thread++) { + mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id); + mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val); + } } static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz) -- GitLab From 21fdca22eb7df2a1e194b8adb812ce370748b733 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 24 Dec 2020 20:01:09 +0100 Subject: [PATCH 1257/1894] ipv4: Ignore ECN bits for fib lookups in fib_compute_spec_dst() RT_TOS() only clears one of the ECN bits. Therefore, when fib_compute_spec_dst() resorts to a fib lookup, it can return different results depending on the value of the second ECN bit. For example, ECT(0) and ECT(1) packets could be treated differently. $ ip netns add ns0 $ ip netns add ns1 $ ip link add name veth01 netns ns0 type veth peer name veth10 netns ns1 $ ip -netns ns0 link set dev lo up $ ip -netns ns1 link set dev lo up $ ip -netns ns0 link set dev veth01 up $ ip -netns ns1 link set dev veth10 up $ ip -netns ns0 address add 192.0.2.10/24 dev veth01 $ ip -netns ns1 address add 192.0.2.11/24 dev veth10 $ ip -netns ns1 address add 192.0.2.21/32 dev lo $ ip -netns ns1 route add 192.0.2.10/32 tos 4 dev veth10 src 192.0.2.21 $ ip netns exec ns1 sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0 With TOS 4 and ECT(1), ns1 replies using source address 192.0.2.21 (ping uses -Q to set all TOS and ECN bits): $ ip netns exec ns0 ping -c 1 -b -Q 5 192.0.2.255 [...] 64 bytes from 192.0.2.21: icmp_seq=1 ttl=64 time=0.544 ms But with TOS 4 and ECT(0), ns1 replies using source address 192.0.2.11 because the "tos 4" route isn't matched: $ ip netns exec ns0 ping -c 1 -b -Q 6 192.0.2.255 [...] 64 bytes from 192.0.2.11: icmp_seq=1 ttl=64 time=0.597 ms After this patch the ECN bits don't affect the result anymore: $ ip netns exec ns0 ping -c 1 -b -Q 6 192.0.2.255 [...] 64 bytes from 192.0.2.21: icmp_seq=1 ttl=64 time=0.591 ms Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper.") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cdf6ec5aa45de..84bb707bd88d8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -292,7 +292,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; -- GitLab From a533b70a657c03137dd49cbcfee70aac086ab2b1 Mon Sep 17 00:00:00 2001 From: weichenchen Date: Fri, 25 Dec 2020 13:44:45 +0800 Subject: [PATCH 1258/1894] net: neighbor: fix a crash caused by mod zero pneigh_enqueue() tries to obtain a random delay by mod NEIGH_VAR(p, PROXY_DELAY). However, NEIGH_VAR(p, PROXY_DELAY) migth be zero at that point because someone could write zero to /proc/sys/net/ipv4/neigh/[device]/proxy_delay after the callers check it. This patch uses prandom_u32_max() to get a random delay instead which avoids potential division by zero. Signed-off-by: weichenchen Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9500d28a43b0e..277ed854aef1c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1569,10 +1569,8 @@ static void neigh_proxy_process(struct timer_list *t) void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { - unsigned long now = jiffies; - - unsigned long sched_next = now + (prandom_u32() % - NEIGH_VAR(p, PROXY_DELAY)); + unsigned long sched_next = jiffies + + prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); -- GitLab From bd1248f1ddbc48b0c30565fce897a3b6423313b8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 24 Dec 2020 22:23:44 -0800 Subject: [PATCH 1259/1894] net: sched: prevent invalid Scell_log shift count Check Scell_log shift size in red_check_params() and modify all callers of red_check_params() to pass Scell_log. This prevents a shift out-of-bounds as detected by UBSAN: UBSAN: shift-out-of-bounds in ./include/net/red.h:252:22 shift exponent 72 is too large for 32-bit type 'int' Fixes: 8afa10cbe281 ("net_sched: red: Avoid illegal values") Signed-off-by: Randy Dunlap Reported-by: syzbot+97c5bd9cc81eca63d36e@syzkaller.appspotmail.com Cc: Nogah Frankel Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: netdev@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/red.h | 4 +++- net/sched/sch_choke.c | 2 +- net/sched/sch_gred.c | 2 +- net/sched/sch_red.c | 2 +- net/sched/sch_sfq.c | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/red.h b/include/net/red.h index fc455445f4b22..932f0d79d60cb 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -168,12 +168,14 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } -static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_log) { if (fls(qth_min) + Wlog > 32) return false; if (fls(qth_max) + Wlog > 32) return false; + if (Scell_log >= 32) + return false; if (qth_max < qth_min) return false; return true; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index bd618b00d3193..50f680f03a547 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -362,7 +362,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, ctl = nla_data(tb[TCA_CHOKE_PARMS]); - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) return -EINVAL; if (ctl->limit > CHOKE_MAX_QUEUE) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 8599c6f31b057..e0bc77533acc3 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -480,7 +480,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) { + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) { NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index e89fab6ccb34f..b4ae34d7aa965 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -250,7 +250,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; ctl = nla_data(tb[TCA_RED_PARMS]); - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) return -EINVAL; err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index bca2be57d9fc1..b25e51440623b 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -647,7 +647,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) } if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, - ctl_v1->Wlog)) + ctl_v1->Wlog, ctl_v1->Scell_log)) return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); -- GitLab From 5ede3ada3da7f050519112b81badc058190b9f9f Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Sat, 26 Dec 2020 16:10:05 +0800 Subject: [PATCH 1260/1894] net: hns: fix return value check in __lb_other_process() The function skb_copy() could return NULL, the return value need to be checked. Fixes: b5996f11ea54 ("net: add Hisilicon Network Subsystem basic ethernet support") Signed-off-by: Yunjian Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 7165da0ee9aa5..a6e3f07caf99c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -415,6 +415,10 @@ static void __lb_other_process(struct hns_nic_ring_data *ring_data, /* for mutl buffer*/ new_skb = skb_copy(skb, GFP_ATOMIC); dev_kfree_skb_any(skb); + if (!new_skb) { + netdev_err(ndev, "skb alloc failed\n"); + return; + } skb = new_skb; check_ok = 0; -- GitLab From 085c7c4e1c0e50d90b7d90f61a12e12b317a91e2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 26 Dec 2020 15:44:53 -0800 Subject: [PATCH 1261/1894] erspan: fix version 1 check in gre_parse_header() Both version 0 and version 1 use ETH_P_ERSPAN, but version 0 does not have an erspan header. So the check in gre_parse_header() is wrong, we have to distinguish version 1 from version 0. We can just check the gre header length like is_erspan_type1(). Fixes: cb73ee40b1b3 ("net: ip_gre: use erspan key field for tunnel lookup") Reported-by: syzbot+f583ce3d4ddf9836b27a@syzkaller.appspotmail.com Cc: William Tu Cc: Lorenzo Bianconi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/gre_demux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 66fdbfe5447cd..5d1e6fe9d8387 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -128,7 +128,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * to 0 and sets the configured key in the * inner erspan header field */ - if (greh->protocol == htons(ETH_P_ERSPAN) || + if ((greh->protocol == htons(ETH_P_ERSPAN) && hdr_len != 4) || greh->protocol == htons(ETH_P_ERSPAN2)) { struct erspan_base_hdr *ershdr; -- GitLab From 9b22fece786ed641909988da4810bfa8e5d2e592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Le=20Bouter?= Date: Sun, 27 Dec 2020 17:11:36 +0100 Subject: [PATCH 1262/1894] atlantic: remove architecture depends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was tested on a RaptorCS Talos II with IBM POWER9 DD2.2 CPUs and an ASUS XG-C100F PCI-e card without any issue. Speeds of ~8Gbps could be attained with not-very-scientific (wget HTTP) both-ways measurements on a local network. No warning or error reported in kernel logs. The drivers seems to be portable enough for it not to be gated like such. Signed-off-by: Léo Le Bouter Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig index efb33c078a3c6..cec2018c84a9b 100644 --- a/drivers/net/ethernet/aquantia/Kconfig +++ b/drivers/net/ethernet/aquantia/Kconfig @@ -19,7 +19,6 @@ if NET_VENDOR_AQUANTIA config AQTION tristate "aQuantia AQtion(tm) Support" depends on PCI - depends on X86_64 || ARM64 || COMPILE_TEST depends on MACSEC || MACSEC=n help This enables the support for the aQuantia AQtion(tm) Ethernet card. -- GitLab From 1fef73597fa545c35fddc953979013882fbd4e55 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sun, 27 Dec 2020 18:53:39 -0800 Subject: [PATCH 1263/1894] net: hdlc_ppp: Fix issues when mod_timer is called while timer is running ppp_cp_event is called directly or indirectly by ppp_rx with "ppp->lock" held. It may call mod_timer to add a new timer. However, at the same time ppp_timer may be already running and waiting for "ppp->lock". In this case, there's no need for ppp_timer to continue running and it can just exit. If we let ppp_timer continue running, it may call add_timer. This causes kernel panic because add_timer can't be called with a timer pending. This patch fixes this problem. Fixes: e022c2f07ae5 ("WAN: new synchronous PPP implementation for generic HDLC.") Cc: Krzysztof Halasa Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_ppp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 64f8556513369..261b53fc8e04c 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -569,6 +569,13 @@ static void ppp_timer(struct timer_list *t) unsigned long flags; spin_lock_irqsave(&ppp->lock, flags); + /* mod_timer could be called after we entered this function but + * before we got the lock. + */ + if (timer_pending(&proto->timer)) { + spin_unlock_irqrestore(&ppp->lock, flags); + return; + } switch (proto->state) { case STOPPING: case REQ_SENT: -- GitLab From 26b614fa441048a9f8e4a814c3b01756816ce7a7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 16 Dec 2020 17:48:33 +0200 Subject: [PATCH 1264/1894] dmaengine: ti: k3-udma: Fix pktdma rchan TPL level setup Instead of initializing the rchan_tpl the initial commit re-initialized the tchan_tpl. Fixes: d2abc982333c0 ("dmaengine: ti: k3-udma: Initial support for K3 PKTDMA") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201216154833.20821-1-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 87157cbae1b8e..298460438bb4d 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4698,9 +4698,9 @@ static int pktdma_setup_resources(struct udma_dev *ud) ud->tchan_tpl.levels = 1; } - ud->tchan_tpl.levels = ud->tchan_tpl.levels; - ud->tchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; - ud->tchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; + ud->rchan_tpl.levels = ud->tchan_tpl.levels; + ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; + ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), sizeof(unsigned long), GFP_KERNEL); -- GitLab From ff58f7dd0c1352a01de3a40327895bd51e03de3a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2020 11:29:46 +0300 Subject: [PATCH 1265/1894] dmaengine: idxd: off by one in cleanup code The clean up is off by one so this will start at "i" and it should start with "i - 1" and then it doesn't unregister the zeroeth elements in the array. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dan Carpenter Acked-by: Dave Jiang Link: https://lore.kernel.org/r/X9nFeojulsNqUSnG@mwanda Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 266423a2cabc7..4dbb03c545e48 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -434,7 +434,7 @@ int idxd_register_driver(void) return 0; drv_fail: - for (; i > 0; i--) + while (--i >= 0) driver_unregister(&idxd_drvs[i]->drv); return rc; } @@ -1840,7 +1840,7 @@ int idxd_register_bus_type(void) return 0; bus_err: - for (; i > 0; i--) + while (--i >= 0) bus_unregister(idxd_bus_types[i]); return rc; } -- GitLab From 8fb28795fb64e1151c0e713686d8b026a5a2aece Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 18 Dec 2020 18:41:37 +0800 Subject: [PATCH 1266/1894] dmaengine: qcom: gpi: Fixes a format mismatch drivers/dma/qcom/gpi.c:1419:3: warning: format '%lu' expects argument of type 'long unsigned int', but argument 8 has type 'size_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1427:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'dma_addr_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 5 has type 'phys_addr_t {aka unsigned int}' [-Wformat=] Signed-off-by: Xiaoming Ni Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201218104137.59200-1-nixiaoming@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index d2334f535de2a..556c070a514ca 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1416,7 +1416,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, len = 1 << bit; ring->alloc_size = (len + (len - 1)); dev_dbg(gpii->gpi_dev->dev, - "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%lu\n", + "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%zu\n", elements, el_size, (elements * el_size), len, ring->alloc_size); @@ -1424,7 +1424,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, ring->alloc_size, &ring->dma_handle, GFP_KERNEL); if (!ring->pre_aligned) { - dev_err(gpii->gpi_dev->dev, "could not alloc size:%lu mem for ring\n", + dev_err(gpii->gpi_dev->dev, "could not alloc size:%zu mem for ring\n", ring->alloc_size); return -ENOMEM; } @@ -1444,8 +1444,8 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, smp_wmb(); dev_dbg(gpii->gpi_dev->dev, - "phy_pre:0x%0llx phy_alig:0x%0llx len:%u el_size:%u elements:%u\n", - ring->dma_handle, ring->phys_addr, ring->len, + "phy_pre:%pad phy_alig:%pa len:%u el_size:%u elements:%u\n", + &ring->dma_handle, &ring->phys_addr, ring->len, ring->el_size, ring->elements); return 0; -- GitLab From 33cbd54dc515cc04b5a603603414222b4bb1448d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Dec 2020 13:47:18 +0100 Subject: [PATCH 1267/1894] dmaengine: mediatek: mtk-hsdma: Fix a resource leak in the error handling path of the probe function 'mtk_hsdma_hw_deinit()' should be called in the error handling path of the probe function to undo a previous 'mtk_hsdma_hw_init()' call, as already done in the remove function. Fixes: 548c4597e984 ("dmaengine: mediatek: Add MediaTek High-Speed DMA controller for MT7622 and MT7623 SoC") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201219124718.182664-1-christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-hsdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c index f133ae8dece16..6ad8afbb95f2b 100644 --- a/drivers/dma/mediatek/mtk-hsdma.c +++ b/drivers/dma/mediatek/mtk-hsdma.c @@ -1007,6 +1007,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) return 0; err_free: + mtk_hsdma_hw_deinit(hsdma); of_dma_controller_free(pdev->dev.of_node); err_unregister: dma_async_device_unregister(dd); -- GitLab From d645148cc82ca7fbacaa601414a552184e9c6dd3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Dec 2020 14:28:00 +0100 Subject: [PATCH 1268/1894] dmaengine: milbeaut-xdmac: Fix a resource leak in the error handling path of the probe function 'disable_xdmac()' should be called in the error handling path of the probe function to undo a previous 'enable_xdmac()' call, as already done in the remove function. Fixes: a6e9be055d47 ("dmaengine: milbeaut-xdmac: Add XDMAC driver for Milbeaut platforms") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201219132800.183254-1-christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/milbeaut-xdmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c index 584c931e807af..d29d01e730aa0 100644 --- a/drivers/dma/milbeaut-xdmac.c +++ b/drivers/dma/milbeaut-xdmac.c @@ -350,7 +350,7 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) ret = dma_async_device_register(ddev); if (ret) - return ret; + goto disable_xdmac; ret = of_dma_controller_register(dev->of_node, of_dma_simple_xlate, mdev); @@ -363,6 +363,8 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) unregister_dmac: dma_async_device_unregister(ddev); +disable_xdmac: + disable_xdmac(mdev); return ret; } -- GitLab From 595a334148449bd1d27cf5d6fcb3b0d718cb1b9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Dec 2020 14:56:52 +0300 Subject: [PATCH 1269/1894] dmaengine: dw-edma: Fix use after free in dw_edma_alloc_chunk() If the dw_edma_alloc_burst() function fails then we free "chunk" but it's still on the "desc->chunk->list" list so it will lead to a use after free. Also the "->chunks_alloc" count is incremented when it shouldn't be. In current kernels small allocations are guaranteed to succeed and dw_edma_alloc_burst() can't fail so this will not actually affect runtime. Fixes: e63d79d1ffcd ("dmaengine: Add Synopsys eDMA IP core driver") Signed-off-by: Dan Carpenter Acked-by: Gustavo Pimentel Link: https://lore.kernel.org/r/X9dTBFrUPEvvW7qc@mwanda Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index b971505b87152..08d71dafa0015 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -86,12 +86,12 @@ static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc) if (desc->chunk) { /* Create and add new element into the linked list */ - desc->chunks_alloc++; - list_add_tail(&chunk->list, &desc->chunk->list); if (!dw_edma_alloc_burst(chunk)) { kfree(chunk); return NULL; } + desc->chunks_alloc++; + list_add_tail(&chunk->list, &desc->chunk->list); } else { /* List head */ chunk->burst = NULL; -- GitLab From ba42f61b36121730d7f51cc261dfd744ee19f50b Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:06:49 +0800 Subject: [PATCH 1270/1894] qcom: bam_dma: Delete useless kfree code The parameter of kfree function is NULL, so kfree code is useless, delete it. Therefore, goto expression is no longer needed, so simplify it. Signed-off-by: Zheng Yongjun Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201216130649.13979-1-zhengyongjun3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/bam_dma.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d5773d474d8f5..88579857ca1d6 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -630,7 +630,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, GFP_NOWAIT); if (!async_desc) - goto err_out; + return NULL; if (flags & DMA_PREP_FENCE) async_desc->flags |= DESC_FLAG_NWD; @@ -670,10 +670,6 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, } return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags); - -err_out: - kfree(async_desc); - return NULL; } /** -- GitLab From 28d8e07fc9478f8f14dd5dd4b2c382982fa12461 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:13:47 +0200 Subject: [PATCH 1271/1894] MAINTAINERS: Add entry for Texas Instruments DMA drivers My employment with TI is coming to an end, it is my intention to look after the DMA drivers I have worked with over the years. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215131348.11282-2-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..217866b668b9b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17553,6 +17553,19 @@ S: Supported F: Documentation/devicetree/bindings/iio/dac/ti,dac7612.txt F: drivers/iio/dac/ti-dac7612.c +TEXAS INSTRUMENTS DMA DRIVERS +M: Peter Ujfalusi +L: dmaengine@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt +F: Documentation/devicetree/bindings/dma/ti-edma.txt +F: Documentation/devicetree/bindings/dma/ti/ +F: drivers/dma/ti/ +X: drivers/dma/ti/cppi41.c +F: include/linux/dma/k3-udma-glue.h +F: include/linux/dma/ti-cppi5.h +F: include/linux/dma/k3-psil.h + TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER M: Nishanth Menon M: Tero Kristo -- GitLab From cc465fa269bc0dc63a1ab7384110e4079fb40421 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:13:48 +0200 Subject: [PATCH 1272/1894] dt-bindings: dma: ti: Update maintainer and author information My employment with TI is coming to an end, add the copyright and author comments as they due and change the maintainer mail address. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215131348.11282-3-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml | 4 +++- Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml | 4 +++- Documentation/devicetree/bindings/dma/ti/k3-udma.yaml | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml index b15f68c499cb2..df29d59d13a8d 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-bcdma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 DMSS BCDMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Block Copy DMA (BCDMA) is intended to perform similar functions as the TR diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml index b13ab60cd740f..ea19d12a9337e 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-pktdma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 DMSS PKTDMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Packet DMA (PKTDMA) is intended to perform similar functions as the packet diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 9a87fd9041eba..6a09bbf83d462 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2019 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-udma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 NAVSS Unified DMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The UDMA-P is intended to perform similar (but significantly upgraded) -- GitLab From 3deba4d8f07be264b21e81d604c6b569a41a33b5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Dec 2020 09:34:28 +0100 Subject: [PATCH 1273/1894] ALSA: usb-audio: Add quirk for BOSS AD-10 BOSS AD-10 requires the very same quirk like other BOSS devices to enable the special implicit feedback mode. Reported-and-tested-by: Martin Passing Link: https://lore.kernel.org/r/20201229083428.20467-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index eb3a4c433c3e9..70b9777b2e639 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -78,6 +78,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6), /* BOSS GT-1 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8), /* BOSS Katana */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01e5), /* BOSS GT-001 */ + IMPLICIT_FB_SKIP_DEV(0x0582, 0x0203), /* BOSS AD-10 */ {} /* terminator */ }; @@ -89,6 +90,7 @@ static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01e5, 0x0d, 0x01), /* BOSS GT-001 */ + IMPLICIT_FB_FIXED_DEV(0x0582, 0x0203, 0x0d, 0x01), /* BOSS AD-10 */ {} /* terminator */ }; -- GitLab From cffa4b2122f5f3e53cf3d529bbc74651f95856d5 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Tue, 29 Dec 2020 18:50:46 +0800 Subject: [PATCH 1274/1894] regmap: debugfs: Fix a memory leak when calling regmap_attach_dev After initializing the regmap through syscon_regmap_lookup_by_compatible, then regmap_attach_dev to the device, because the debugfs_name has been allocated, there is no need to redistribute it again unreferenced object 0xd8399b80 (size 64): comm "swapper/0", pid 1, jiffies 4294937641 (age 278.590s) hex dump (first 32 bytes): 64 75 6d 6d 79 2d 69 6f 6d 75 78 63 2d 67 70 72 dummy-iomuxc-gpr 40 32 30 65 34 30 30 30 00 7f 52 5b d8 7e 42 69 @20e4000..R[.~Bi backtrace: [] kasprintf+0x2c/0x54 [<6ad3bbc2>] regmap_debugfs_init+0xdc/0x2fc [] __regmap_init+0xc38/0xd88 [<1f7e0609>] of_syscon_register+0x168/0x294 [<735e8766>] device_node_get_regmap+0x6c/0x98 [] imx6ul_init_machine+0x20/0x88 [<0456565b>] customize_machine+0x1c/0x30 [] do_one_initcall+0x80/0x3ac [<7e584867>] kernel_init_freeable+0x170/0x1f0 [<80074741>] kernel_init+0x8/0x120 [<285d6f28>] ret_from_fork+0x14/0x20 [<00000000>] 0x0 Fixes: 9b947a13e7f6 ("regmap: use debugfs even when no device") Signed-off-by: Xiaolei Wang Link: https://lore.kernel.org/r/20201229105046.41984-1-xiaolei.wang@windriver.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-debugfs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 8dfac7f3ed7aa..bf03cd343be23 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -582,18 +582,25 @@ void regmap_debugfs_init(struct regmap *map) devname = dev_name(map->dev); if (name) { - map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", + if (!map->debugfs_name) { + map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", devname, name); + if (!map->debugfs_name) + return; + } name = map->debugfs_name; } else { name = devname; } if (!strcmp(name, "dummy")) { - kfree(map->debugfs_name); + if (!map->debugfs_name) + kfree(map->debugfs_name); map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d", dummy_index); + if (!map->debugfs_name) + return; name = map->debugfs_name; dummy_index++; } -- GitLab From ede090f5a438e97d0586f64067bbb956e30a2a31 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Tue, 29 Dec 2020 13:27:41 +0800 Subject: [PATCH 1275/1894] spi: altera: fix return value for altera_spi_txrx() This patch fixes the return value for altera_spi_txrx. It should return 1 for interrupt transfer mode, and return 0 for polling transfer mode. The altera_spi_txrx() implements the spi_controller.transfer_one callback. According to the spi-summary.rst, the transfer_one should return 0 when transfer is finished, return 1 when transfer is still in progress. Signed-off-by: Xu Yilun Link: https://lore.kernel.org/r/1609219662-27057-2-git-send-email-yilun.xu@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c index 809bfff3690ab..cbc4c28c1541c 100644 --- a/drivers/spi/spi-altera.c +++ b/drivers/spi/spi-altera.c @@ -189,24 +189,26 @@ static int altera_spi_txrx(struct spi_master *master, /* send the first byte */ altera_spi_tx_word(hw); - } else { - while (hw->count < hw->len) { - altera_spi_tx_word(hw); - for (;;) { - altr_spi_readl(hw, ALTERA_SPI_STATUS, &val); - if (val & ALTERA_SPI_STATUS_RRDY_MSK) - break; + return 1; + } + + while (hw->count < hw->len) { + altera_spi_tx_word(hw); - cpu_relax(); - } + for (;;) { + altr_spi_readl(hw, ALTERA_SPI_STATUS, &val); + if (val & ALTERA_SPI_STATUS_RRDY_MSK) + break; - altera_spi_rx_word(hw); + cpu_relax(); } - spi_finalize_current_transfer(master); + + altera_spi_rx_word(hw); } + spi_finalize_current_transfer(master); - return t->len; + return 0; } static irqreturn_t altera_spi_irq(int irq, void *dev) -- GitLab From da4282c17d695b9311608aa63b3c633e649aadea Mon Sep 17 00:00:00 2001 From: Jiang Wang Date: Thu, 24 Dec 2020 01:12:42 +0000 Subject: [PATCH 1276/1894] selftests/bpf: Fix a compile error for BPF_F_BPRM_SECUREEXEC When CONFIG_BPF_LSM is not configured, running bpf selftesting will show BPF_F_BPRM_SECUREEXEC undefined error for bprm_opts.c. The problem is that bprm_opts.c includes vmliunx.h. The vmlinux.h is generated by "bpftool btf dump file ./vmlinux format c". On the other hand, BPF_F_BPRM_SECUREEXEC is defined in include/uapi/linux/bpf.h and used only in bpf_lsm.c. When CONFIG_BPF_LSM is not set, bpf_lsm will not be compiled, so vmlinux.h will not include definition of BPF_F_BPRM_SECUREEXEC. Ideally, we want to compile bpf selftest regardless of the configuration setting, so change the include file from vmlinux.h to bpf.h. Signed-off-by: Jiang Wang Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201224011242.585967-1-jiang.wang@bytedance.com --- tools/testing/selftests/bpf/progs/bprm_opts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c index 5bfef2887e706..418d9c6d49525 100644 --- a/tools/testing/selftests/bpf/progs/bprm_opts.c +++ b/tools/testing/selftests/bpf/progs/bprm_opts.c @@ -4,7 +4,7 @@ * Copyright 2020 Google LLC. */ -#include "vmlinux.h" +#include #include #include #include -- GitLab From a694ffed876575d1df1a47067444047182de4354 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Mon, 28 Dec 2020 23:31:30 +0200 Subject: [PATCH 1277/1894] drm/msm: Fix null dereference in _msm_gem_new The crash was caused by locking an uninitialized lock during init of drm_gem_object. The lock changed in the breaking commit, but the init was not moved accordingly. 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = (ptrval) [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: msm(+) qcom_spmi_vadc qcom_vadc_common dm_mod usb_f_rndis rmi_i2c rmi_core qnoc_msm8974 icc_smd_rpm pm8941_pwrkey CPU: 2 PID: 1020 Comm: udevd Not tainted 5.10.0-postmarketos-qcom-msm8974 #8 Hardware name: Generic DT based system PC is at ww_mutex_lock+0x20/0xb0 LR is at _msm_gem_new+0x13c/0x298 [msm] pc : [] lr : [] psr: 20000013 sp : c36e7ad0 ip : c3b3d800 fp : 00000000 r10: 00000001 r9 : c3b22800 r8 : 00000000 r7 : c3b23000 r6 : c3b3d600 r5 : c3b3d600 r4 : 00000000 r3 : c34b4780 r2 : c3b3d6f4 r1 : 00000000 r0 : 00000000 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5787d Table: 03ae406a DAC: 00000051 Process udevd (pid: 1020, stack limit = 0x(ptrval)) Stack: (0xc36e7ad0 to 0xc36e8000) [...] [] (ww_mutex_lock) from [] (_msm_gem_new+0x13c/0x298 [msm]) [] (_msm_gem_new [msm]) from [] (_msm_gem_kernel_new+0x20/0x190 [msm]) [] (_msm_gem_kernel_new [msm]) from [] (msm_gem_kernel_new+0x24/0x2c [msm]) [] (msm_gem_kernel_new [msm]) from [] (msm_gpu_init+0x308/0x548 [msm]) [] (msm_gpu_init [msm]) from [] (adreno_gpu_init+0x13c/0x240 [msm]) [] (adreno_gpu_init [msm]) from [] (a3xx_gpu_init+0x78/0x1dc [msm]) [] (a3xx_gpu_init [msm]) from [] (adreno_bind+0x1cc/0x274 [msm]) [] (adreno_bind [msm]) from [] (component_bind_all+0x11c/0x278) [] (component_bind_all) from [] (msm_drm_bind+0x18c/0x5b4 [msm]) [] (msm_drm_bind [msm]) from [] (try_to_bring_up_master+0x200/0x2c8) [] (try_to_bring_up_master) from [] (component_master_add_with_match+0xc8/0xfc) [] (component_master_add_with_match) from [] (msm_pdev_probe+0x288/0x2c4 [msm]) [] (msm_pdev_probe [msm]) from [] (platform_drv_probe+0x48/0x98) [] (platform_drv_probe) from [] (really_probe+0x108/0x528) [] (really_probe) from [] (driver_probe_device+0x78/0x1d4) [] (driver_probe_device) from [] (device_driver_attach+0xa8/0xb0) [] (device_driver_attach) from [] (__driver_attach+0xb4/0x154) [] (__driver_attach) from [] (bus_for_each_dev+0x78/0xb8) [] (bus_for_each_dev) from [] (bus_add_driver+0x10c/0x208) [] (bus_add_driver) from [] (driver_register+0x88/0x118) [] (driver_register) from [] (do_one_initcall+0x50/0x2b0) [] (do_one_initcall) from [] (do_init_module+0x60/0x288) [] (do_init_module) from [] (sys_finit_module+0xd4/0x120) [] (sys_finit_module) from [] (ret_fast_syscall+0x0/0x54) Exception stack(0xc36e7fa8 to 0xc36e7ff0) 7fa0: 00020000 00000000 00000007 b6edd5b0 00000000 b6f2ff20 7fc0: 00020000 00000000 0000017b 0000017b b6eef980 bedc3a54 00473c99 00000000 7fe0: b6edd5b0 bedc3918 b6ed8a5f b6f6a8b0 Code: e3c3303f e593300c e1a04000 f590f000 (e1940f9f) ---[ end trace 277e2a3da40bbb76 ]--- Fixes: 6c0e3ea250476 ("drm/msm/gem: Switch over to obj->resv for locking") Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a21be5b910ff6..d9a5a1895f3dc 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1101,6 +1101,8 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, struct msm_gem_vma *vma; struct page **pages; + drm_gem_private_object_init(dev, obj, size); + msm_gem_lock(obj); vma = add_vma(obj, NULL); @@ -1112,7 +1114,6 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, to_msm_bo(obj)->vram_node = &vma->node; - drm_gem_private_object_init(dev, obj, size); pages = get_pages(obj); if (IS_ERR(pages)) { -- GitLab From 07fcad0d726d5da7c43f1c8e8fdb66c93a140ca5 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Mon, 28 Dec 2020 23:31:31 +0200 Subject: [PATCH 1278/1894] drm/msm: Ensure get_pages is called when locked get_pages is only called in a locked context. Add a WARN_ON to make sure it stays that way. Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d9a5a1895f3dc..114c0711a302d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -96,6 +96,8 @@ static struct page **get_pages(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + WARN_ON(!msm_gem_is_locked(obj)); + if (!msm_obj->pages) { struct drm_device *dev = obj->dev; struct page **p; @@ -1114,8 +1116,9 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, to_msm_bo(obj)->vram_node = &vma->node; - + msm_gem_lock(obj); pages = get_pages(obj); + msm_gem_unlock(obj); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto fail; -- GitLab From 77788775c7132a8d93c6930ab1bd84fc743c7cb7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Dec 2020 10:50:46 -0700 Subject: [PATCH 1279/1894] io_uring: don't assume mm is constant across submits If we COW the identity, we assume that ->mm never changes. But this isn't true of multiple processes end up sharing the ring. Hence treat id->mm like like any other process compontent when it comes to the identity mapping. This is pretty trivial, just moving the existing grab into io_grab_identity(), and including a check for the match. Cc: stable@vger.kernel.org # 5.10 Fixes: 1e6fa5216a0e ("io_uring: COW io_identity on mismatch") Reported-by: Christian Brauner : Tested-by: Christian Brauner : Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7e35283fc0b10..eb4620ff638ed 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1501,6 +1501,13 @@ static bool io_grab_identity(struct io_kiocb *req) spin_unlock_irq(&ctx->inflight_lock); req->work.flags |= IO_WQ_WORK_FILES; } + if (!(req->work.flags & IO_WQ_WORK_MM) && + (def->work_flags & IO_WQ_WORK_MM)) { + if (id->mm != current->mm) + return false; + mmgrab(id->mm); + req->work.flags |= IO_WQ_WORK_MM; + } return true; } @@ -1525,13 +1532,6 @@ static void io_prep_async_work(struct io_kiocb *req) req->work.flags |= IO_WQ_WORK_UNBOUND; } - /* ->mm can never change on us */ - if (!(req->work.flags & IO_WQ_WORK_MM) && - (def->work_flags & IO_WQ_WORK_MM)) { - mmgrab(id->mm); - req->work.flags |= IO_WQ_WORK_MM; - } - /* if we fail grabbing identity, we must COW, regrab, and retry */ if (io_grab_identity(req)) return; -- GitLab From b000700d6db50c933ce8b661154e26cf4ad06dba Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 15:27:14 +0800 Subject: [PATCH 1280/1894] habanalabs: Fix memleak in hl_device_reset When kzalloc() fails, we should execute hl_mmu_fini() to release the MMU module. It's the same when hl_ctx_init() fails. Signed-off-by: Dinghao Liu Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0749c92cbcf63..1456eabf96010 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1092,6 +1092,7 @@ kill_processes: GFP_KERNEL); if (!hdev->kernel_ctx) { rc = -ENOMEM; + hl_mmu_fini(hdev); goto out_err; } @@ -1103,6 +1104,7 @@ kill_processes: "failed to init kernel ctx in hard reset\n"); kfree(hdev->kernel_ctx); hdev->kernel_ctx = NULL; + hl_mmu_fini(hdev); goto out_err; } } -- GitLab From 7cf22a1c88c05ea3807f95b1edfebb729016ae52 Mon Sep 17 00:00:00 2001 From: Harish Date: Tue, 29 Dec 2020 15:14:22 -0800 Subject: [PATCH 1281/1894] selftests/vm: fix building protection keys test Commit d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error") tried to include a ARCH check for powerpc, however ARCH is not defined in the Makefile before including lib.mk. This makes test building to skip on both x86 and powerpc. Fix the arch check by replacing it using machine type as it is already defined and used in the test. Link: https://lkml.kernel.org/r/20201215100402.257376-1-harish@linux.ibm.com Fixes: d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error") Signed-off-by: Harish Reviewed-by: Sandipan Das Cc: Shuah Khan Cc: Sandipan Das Cc: John Hubbard Cc: Dave Hansen Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 9a25307f61154..d42115e4284d7 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -4,7 +4,7 @@ include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) -MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') # Without this, failed build products remain, with up-to-date timestamps, # thus tricking Make (and you!) into believing that All Is Well, in subsequent @@ -43,7 +43,7 @@ TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) @@ -65,13 +65,13 @@ TEST_GEN_FILES += $(BINARIES_64) endif else -ifneq (,$(findstring $(ARCH),powerpc)) +ifneq (,$(findstring $(MACHINE),ppc64)) TEST_GEN_FILES += protection_keys endif endif -ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) +ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs @@ -84,7 +84,7 @@ TEST_FILES := test_vmalloc.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -- GitLab From e7dd91c456a8cdbcd7066997d15e36d14276a949 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 29 Dec 2020 15:14:25 -0800 Subject: [PATCH 1282/1894] mm/hugetlb: fix deadlock in hugetlb_cow error path syzbot reported the deadlock here [1]. The issue is in hugetlb cow error handling when there are not enough huge pages for the faulting task which took the original reservation. It is possible that other (child) tasks could have consumed pages associated with the reservation. In this case, we want the task which took the original reservation to succeed. So, we unmap any associated pages in children so that they can be used by the faulting task that owns the reservation. The unmapping code needs to hold i_mmap_rwsem in write mode. However, due to commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") we are already holding i_mmap_rwsem in read mode when hugetlb_cow is called. Technically, i_mmap_rwsem does not need to be held in read mode for COW mappings as they can not share pmd's. Modifying the fault code to not take i_mmap_rwsem in read mode for COW (and other non-sharable) mappings is too involved for a stable fix. Instead, we simply drop the hugetlb_fault_mutex and i_mmap_rwsem before unmapping. This is OK as it is technically not needed. They are reacquired after unmapping as expected by calling code. Since this is done in an uncommon error path, the overhead of dropping and reacquiring mutexes is acceptable. While making changes, remove redundant BUG_ON after unmap_ref_private. [1] https://lkml.kernel.org/r/000000000000b73ccc05b5cf8558@google.com Link: https://lkml.kernel.org/r/4c5781b8-3b00-761e-c0c7-c5edebb6ec1a@oracle.com Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") Signed-off-by: Mike Kravetz Reported-by: syzbot+5eee4145df3c15e96625@syzkaller.appspotmail.com Cc: Naoya Horiguchi Cc: Michal Hocko Cc: Hugh Dickins Cc: "Aneesh Kumar K . V" Cc: Davidlohr Bueso Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cbf32d2824fd4..a2602969873dc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4105,10 +4105,30 @@ retry_avoidcopy: * may get SIGKILLed if it later faults. */ if (outside_reserve) { + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx; + u32 hash; + put_page(old_page); BUG_ON(huge_pte_none(pte)); + /* + * Drop hugetlb_fault_mutex and i_mmap_rwsem before + * unmapping. unmapping needs to hold i_mmap_rwsem + * in write mode. Dropping i_mmap_rwsem in read mode + * here is OK as COW mappings do not interact with + * PMD sharing. + * + * Reacquire both after unmap operation. + */ + idx = vma_hugecache_offset(h, vma, haddr); + hash = hugetlb_fault_mutex_hash(mapping, idx); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + i_mmap_unlock_read(mapping); + unmap_ref_private(mm, vma, old_page, haddr); - BUG_ON(huge_pte_none(pte)); + + i_mmap_lock_read(mapping); + mutex_lock(&hugetlb_fault_mutex_table[hash]); spin_lock(ptl); ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); if (likely(ptep && -- GitLab From 3a176b94609a18f5f8bac7ddbf8923bd737262db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 29 Dec 2020 15:14:28 -0800 Subject: [PATCH 1283/1894] Revert "kbuild: avoid static_assert for genksyms" This reverts commit 14dc3983b5dff513a90bd5a8cc90acaf7867c3d0. Macro Elver had sent a fix proper fix earlier, and also pointed out corner cases: "I guess what you propose is simpler, but might still have corner cases where we still get warnings. In particular, if some file (for whatever reason) does not include build_bug.h and uses a raw _Static_assert(), then we still get warnings. E.g. I see 1 user of raw _Static_assert() (drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h )." I believe the raw use of _Static_assert() should be allowed, so this should be fixed in genksyms. Even after commit 14dc3983b5df ("kbuild: avoid static_assert for genksyms"), I confirmed the following test code emits the warning. ---------------->8---------------- #include _Static_assert((1 ?: 0), ""); void foo(void) { } EXPORT_SYMBOL(foo); ---------------->8---------------- WARNING: modpost: EXPORT symbol "foo" [vmlinux] version generation failed, symbol will not be versioned. Now that commit 869b91992bce ("genksyms: Ignore module scoped _Static_assert()") fixed this issue properly, the workaround should be reverted. Link: https://lkml.org/lkml/2020/12/10/845 Link: https://lkml.kernel.org/r/20201219183911.181442-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 7bb66e15b481b..e3a0be2c90ad9 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,9 +77,4 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ -- GitLab From 5dbdb2d87c294401a22e6a6002f08ebc9fbea38b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Dec 2020 15:14:31 -0800 Subject: [PATCH 1284/1894] checkpatch: prefer strscpy to strlcpy Prefer strscpy over the deprecated strlcpy function. Link: https://lkml.kernel.org/r/19fe91084890e2c16fe56f960de6c570a93fa99b.camel@perches.com Signed-off-by: Joe Perches Requested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 00085308ed9da..92e888ed939f9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6646,6 +6646,12 @@ sub process { # } # } +# strlcpy uses that should likely be strscpy + if ($line =~ /\bstrlcpy\s*\(/) { + WARN("STRLCPY", + "Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw\@mail.gmail.com/\n" . $herecurr); + } + # typecasts on min/max could be min_t/max_t if ($perl_version_ok && defined $stat && -- GitLab From 6d87d0ece58bc0022ca5247721a8eb06ef66b673 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Tue, 29 Dec 2020 15:14:34 -0800 Subject: [PATCH 1285/1894] mm: add prototype for __add_to_page_cache_locked() Otherwise it causes a gcc warning: mm/filemap.c:830:14: warning: no previous prototype for `__add_to_page_cache_locked' [-Wmissing-prototypes] A previous attempt to make this function static led to compilation errors when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() is referred to by BPF code. Adding a prototype will silence the warning. Link: https://lkml.kernel.org/r/1608693702-4665-1-git-send-email-jrdr.linux@gmail.com Signed-off-by: Souptick Joarder Cc: Alex Shi Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5299b90a6c403..c1e9081844429 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -216,6 +216,13 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +/* + * Any attempt to mark this function as static leads to build failure + * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() + * is referred to by BPF code. This must be visible for error injection. + */ +int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp, void **shadowp); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) -- GitLab From dc2da7b45ffe954a0090f5d0310ed7b0b37d2bd2 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 29 Dec 2020 15:14:37 -0800 Subject: [PATCH 1286/1894] mm: memmap defer init doesn't work as expected VMware observed a performance regression during memmap init on their platform, and bisected to commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") causing it. Before the commit: [0.033176] Normal zone: 1445888 pages used for memmap [0.033176] Normal zone: 89391104 pages, LIFO batch:63 [0.035851] ACPI: PM-Timer IO Port: 0x448 With commit [0.026874] Normal zone: 1445888 pages used for memmap [0.026875] Normal zone: 89391104 pages, LIFO batch:63 [2.028450] ACPI: PM-Timer IO Port: 0x448 The root cause is the current memmap defer init doesn't work as expected. Before, memmap_init_zone() was used to do memmap init of one whole zone, to initialize all low zones of one numa node, but defer memmap init of the last zone in that numa node. However, since commit 73a6e474cb376, function memmap_init() is adapted to iterater over memblock regions inside one zone, then call memmap_init_zone() to do memmap init for each region. E.g, on VMware's system, the memory layout is as below, there are two memory regions in node 2. The current code will mistakenly initialize the whole 1st region [mem 0xab00000000-0xfcffffffff], then do memmap defer to iniatialize only one memmory section on the 2nd region [mem 0x10000000000-0x1033fffffff]. In fact, we only expect to see that there's only one memory section's memmap initialized. That's why more time is costed at the time. [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff] [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff] [ 0.008843] ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x55ffffffff] [ 0.008844] ACPI: SRAT: Node 1 PXM 1 [mem 0x5600000000-0xaaffffffff] [ 0.008844] ACPI: SRAT: Node 2 PXM 2 [mem 0xab00000000-0xfcffffffff] [ 0.008845] ACPI: SRAT: Node 2 PXM 2 [mem 0x10000000000-0x1033fffffff] Now, let's add a parameter 'zone_end_pfn' to memmap_init_zone() to pass down the real zone end pfn so that defer_init() can use it to judge whether defer need be taken in zone wide. Link: https://lkml.kernel.org/r/20201223080811.16211-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20201223080811.16211-2-bhe@redhat.com Fixes: commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") Signed-off-by: Baoquan He Reported-by: Rahul Gopakumar Reviewed-by: Mike Rapoport Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 4 ++-- include/linux/mm.h | 5 +++-- mm/memory_hotplug.c | 2 +- mm/page_alloc.c | 8 +++++--- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 9b5acf8fb092c..e76386a3479ea 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), - args->nid, args->zone, page_to_pfn(map_start), + args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end), MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); return 0; } @@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) { - memmap_init_zone(size, nid, zone, start_pfn, + memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } else { struct page *start; diff --git a/include/linux/mm.h b/include/linux/mm.h index c1e9081844429..ecdf8a8cd6aeb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2439,8 +2439,9 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn); #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, - enum meminit_context, struct vmem_altmap *, int migratetype); +extern void memmap_init_zone(unsigned long, int, unsigned long, + unsigned long, unsigned long, enum meminit_context, + struct vmem_altmap *, int migratetype); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index af41fb9908200..f9d57b9be8c71 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -713,7 +713,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, * expects the zone spans the pfn range. All the pages in the range * are reserved so nobody should be touching them so we should be safe */ - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0, MEMINIT_HOTPLUG, altmap, migratetype); set_zone_contiguous(zone); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7a2c89b211150..bdbec4c981738 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -423,6 +423,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) return false; + if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX) + return true; /* * We start only with one section of pages, more pages are added as * needed until the rest of deferred pages are initialized. @@ -6116,7 +6118,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * zone stats (e.g., nr_isolate_pageblock) are touched. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, + unsigned long start_pfn, unsigned long zone_end_pfn, enum meminit_context context, struct vmem_altmap *altmap, int migratetype) { @@ -6152,7 +6154,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context == MEMINIT_EARLY) { if (overlap_memmap_init(zone, &pfn)) continue; - if (defer_init(nid, pfn, end_pfn)) + if (defer_init(nid, pfn, zone_end_pfn)) break; } @@ -6266,7 +6268,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid, if (end_pfn > start_pfn) { size = end_pfn - start_pfn; - memmap_init_zone(size, nid, zone, start_pfn, + memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } } -- GitLab From e05986ee7a5814bec0e0075d813daca3d46e4a9e Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Tue, 29 Dec 2020 15:14:40 -0800 Subject: [PATCH 1287/1894] mm/mremap.c: fix extent calculation When `next < old_addr`, `next - old_addr` arithmetic underflows causing `extent` to be incorrect. Make `extent` the smaller of `next - old_addr` or `old_end - old_addr`. Link: https://lkml.kernel.org/r/20201219170433.2418867-1-kaleshsingh@google.com Fixes: c49dd34018026 ("mm: speedup mremap on 1GB or larger regions") Signed-off-by: Kalesh Singh Reported-by: Guenter Roeck Tested-by: Guenter Roeck Cc: Suren Baghdasaryan Cc: Minchan Kim Cc: Lokesh Gidra Cc: Helge Deller Cc: Kalesh Singh Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index c5590afe71650..f554320281ccd 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -358,7 +358,9 @@ static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr, next = (old_addr + size) & mask; /* even if next overflowed, extent below will be ok */ - extent = (next > old_end) ? old_end - old_addr : next - old_addr; + extent = next - old_addr; + if (extent > old_end - old_addr) + extent = old_end - old_addr; next = (new_addr + size) & mask; if (extent > next - new_addr) extent = next - new_addr; -- GitLab From 111fe7186b29d172729db5e294875b9fc7a0ec1d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 29 Dec 2020 15:14:43 -0800 Subject: [PATCH 1288/1894] mm: generalise COW SMC TLB flushing race comment I'm not sure if I'm completely missing something here, but AFAIKS the reference to the mysterious "COW SMC race" confuses the issue. The original changelog and mailing list thread didn't help me either. This SMC race is where the problem was detected, but isn't the general problem bigger and more obvious: that the new PTE could be picked up at any time by any TLB while entries for the old PTE exist in other TLBs before the TLB flush takes effect? The case where the iTLB and dTLB of a CPU are pointing at different pages is an interesting one but follows from the general problem. The other (minor) thing with the comment I think it makes it a bit clearer to say what the old code was doing (i.e., it avoids the race as opposed to what?). References: 4ce072f1faf29 ("mm: fix a race condition under SMC + COW") Link: https://lkml.kernel.org/r/20201215121119.351650-1-npiggin@gmail.com Reviewed-by: Matthew Wilcox (Oracle) Cc: Suresh Siddha Cc: "David S. Miller" Cc: Hugh Dickins Cc: Peter Zijlstra Cc: Suresh Siddha Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 7d608765932b9..feff48e1465a6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2892,11 +2892,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) entry = mk_pte(new_page, vma->vm_page_prot); entry = pte_sw_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); + /* * Clear the pte entry and flush it first, before updating the - * pte with the new entry. This will avoid a race condition - * seen in the presence of one thread doing SMC and another - * thread doing COW. + * pte with the new entry, to keep TLBs on different CPUs in + * sync. This code used to set the new PTE then flush TLBs, but + * that left a window where the new PTE could be loaded into + * some TLBs while the old PTE remains in others. */ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); page_add_new_anon_rmap(new_page, vma, vmf->address, false); -- GitLab From 13384f6125ad7ebdcc8914fe1e03ded48ce76581 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Tue, 29 Dec 2020 15:14:46 -0800 Subject: [PATCH 1289/1894] kasan: fix null pointer dereference in kasan_record_aux_stack Syzbot reported the following [1]: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 2d993067 P4D 2d993067 PUD 19a3c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 3852 Comm: kworker/1:2 Not tainted 5.10.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events free_ipc RIP: 0010:kasan_record_aux_stack+0x77/0xb0 Add null checking slab object from kasan_get_alloc_meta() in order to avoid null pointer dereference. [1] https://syzkaller.appspot.com/x/log.txt?x=10a82a50d00000 Link: https://lkml.kernel.org/r/20201228080018.23041-1-walter-zh.wu@mediatek.com Signed-off-by: Walter Wu Suggested-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 1dd5a0f993726..5106b84b07d43 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -337,6 +337,8 @@ void kasan_record_aux_stack(void *addr) cache = page->slab_cache; object = nearest_obj(cache, page, addr); alloc_meta = kasan_get_alloc_meta(cache, object); + if (!alloc_meta) + return; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT); -- GitLab From 87dbc209ea04645fd2351981f09eff5d23f8e2e9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 29 Dec 2020 15:14:49 -0800 Subject: [PATCH 1290/1894] local64.h: make mandatory Make mandatory in include/asm-generic/Kbuild and remove all arch/*/include/asm/local64.h arch-specific files since they only #include . This fixes build errors on arch/c6x/ and arch/nios2/ for block/blk-iocost.c. Build-tested on 21 of 25 arch-es. (tools problems on the others) Yes, we could even rename to and change all #includes to use instead. Link: https://lkml.kernel.org/r/20201227024446.17018-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Suggested-by: Christoph Hellwig Reviewed-by: Masahiro Yamada Cc: Jens Axboe Cc: Ley Foon Tan Cc: Mark Salter Cc: Aurelien Jacquiot Cc: Peter Zijlstra Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/local64.h | 1 - arch/arc/include/asm/Kbuild | 1 - arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/csky/include/asm/Kbuild | 1 - arch/h8300/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/local64.h | 1 - arch/m68k/include/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 - arch/mips/include/asm/Kbuild | 1 - arch/nds32/include/asm/Kbuild | 1 - arch/openrisc/include/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/riscv/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/x86/include/asm/local64.h | 1 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/Kbuild | 1 + 22 files changed, 1 insertion(+), 21 deletions(-) delete mode 100644 arch/alpha/include/asm/local64.h delete mode 100644 arch/ia64/include/asm/local64.h delete mode 100644 arch/x86/include/asm/local64.h diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239b..0000000000000 --- a/arch/alpha/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 81f4edec0c2a9..3c1afa524b9c2 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += user.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 4a0848aef2075..03657ff8fbe3d 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h -generic-y += local64.h generic-y += parport.h generated-y += mach-types.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index ff9cbb6312128..07ac208edc894 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += early_ioremap.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += qspinlock.h diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 93372255984dc..cc24bb8e539fd 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += asm-offsets.h generic-y += gpio.h generic-y += kvm_para.h -generic-y += local64.h generic-y += qrwlock.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index ddf04f32b5467..60ee7f0d60a8f 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += asm-offsets.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += spinlock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 373964bb177e4..3ece3c93fe086 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -2,5 +2,4 @@ generic-y += extable.h generic-y += iomap.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239b..0000000000000 --- a/arch/ia64/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1bff55aa2d54e..0dbf9c5c6faeb 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -2,6 +2,5 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += spinlock.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 63bce836b9f10..29b0e557aa7c5 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += syscalls.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 198b3bafdac97..95b4fa7bd0d1f 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -6,7 +6,6 @@ generated-y += syscall_table_64_n64.h generated-y += syscall_table_64_o32.h generic-y += export.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += qrwlock.h diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index ff1e94299317d..82a4453c9c2d5 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -4,6 +4,5 @@ generic-y += cmpxchg.h generic-y += export.h generic-y += gpio.h generic-y += kvm_para.h -generic-y += local64.h generic-y += parport.h generic-y += user.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 442f3d3bcd904..ca5987e110538 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qspinlock_types.h generic-y += qspinlock.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index f16c4db801162..4406475a23040 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -3,6 +3,5 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += user.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 90cd5c53af666..e1f9b4ea1c537 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -5,7 +5,6 @@ generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h generic-y += kvm_types.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += vtime.h diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 59dd7be550054..445ccc97305a5 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -3,6 +3,5 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h generic-y += kvm_para.h -generic-y += local64.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 319efa0e6d024..1a18d7b82f86d 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -7,5 +7,4 @@ generated-y += unistd_nr.h generic-y += asm-offsets.h generic-y += export.h generic-y += kvm_types.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 7435182ef8465..fc44d9c88b419 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 5269a704801fa..3688fdae50e45 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -6,5 +6,4 @@ generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generic-y += export.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239b..0000000000000 --- a/arch/x86/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 9718e95935647..854c5e07e8670 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += param.h generic-y += qrwlock.h diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 4365b9aa3e3f6..267f6dfb8960b 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -34,6 +34,7 @@ mandatory-y += kmap_size.h mandatory-y += kprobes.h mandatory-y += linkage.h mandatory-y += local.h +mandatory-y += local64.h mandatory-y += mm-arch-hooks.h mandatory-y += mmiowb.h mandatory-y += mmu.h -- GitLab From 8b0fac44bd1ff17016502b3c3533f5abb8456c65 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 29 Dec 2020 15:14:52 -0800 Subject: [PATCH 1291/1894] sizes.h: add SZ_8G/SZ_16G/SZ_32G macros Add these macros, since we can use them in drivers. Link: https://lkml.kernel.org/r/20201229072819.11183-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sizes.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 9874f6f675374..1ac79bcee2bb7 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -44,6 +44,9 @@ #define SZ_2G 0x80000000 #define SZ_4G _AC(0x100000000, ULL) +#define SZ_8G _AC(0x200000000, ULL) +#define SZ_16G _AC(0x400000000, ULL) +#define SZ_32G _AC(0x800000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */ -- GitLab From aa8c7db494d0a83ecae583aa193f1134ef25d506 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Dec 2020 15:14:55 -0800 Subject: [PATCH 1292/1894] kdev_t: always inline major/minor helper functions Silly GCC doesn't always inline these trivial functions. Fixes the following warning: arch/x86/kernel/sys_ia32.o: warning: objtool: cp_stat64()+0xd8: call to new_encode_dev() with UACCESS enabled Link: https://lkml.kernel.org/r/984353b44a4484d86ba9f73884b7306232e25e30.1608737428.git.jpoimboe@redhat.com Signed-off-by: Josh Poimboeuf Reported-by: Randy Dunlap Acked-by: Randy Dunlap [build-tested] Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdev_t.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index 85b5151911cfd..4856706fbfeb4 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -21,61 +21,61 @@ }) /* acceptable for old filesystems */ -static inline bool old_valid_dev(dev_t dev) +static __always_inline bool old_valid_dev(dev_t dev) { return MAJOR(dev) < 256 && MINOR(dev) < 256; } -static inline u16 old_encode_dev(dev_t dev) +static __always_inline u16 old_encode_dev(dev_t dev) { return (MAJOR(dev) << 8) | MINOR(dev); } -static inline dev_t old_decode_dev(u16 val) +static __always_inline dev_t old_decode_dev(u16 val) { return MKDEV((val >> 8) & 255, val & 255); } -static inline u32 new_encode_dev(dev_t dev) +static __always_inline u32 new_encode_dev(dev_t dev) { unsigned major = MAJOR(dev); unsigned minor = MINOR(dev); return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); } -static inline dev_t new_decode_dev(u32 dev) +static __always_inline dev_t new_decode_dev(u32 dev) { unsigned major = (dev & 0xfff00) >> 8; unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); return MKDEV(major, minor); } -static inline u64 huge_encode_dev(dev_t dev) +static __always_inline u64 huge_encode_dev(dev_t dev) { return new_encode_dev(dev); } -static inline dev_t huge_decode_dev(u64 dev) +static __always_inline dev_t huge_decode_dev(u64 dev) { return new_decode_dev(dev); } -static inline int sysv_valid_dev(dev_t dev) +static __always_inline int sysv_valid_dev(dev_t dev) { return MAJOR(dev) < (1<<14) && MINOR(dev) < (1<<18); } -static inline u32 sysv_encode_dev(dev_t dev) +static __always_inline u32 sysv_encode_dev(dev_t dev) { return MINOR(dev) | (MAJOR(dev) << 18); } -static inline unsigned sysv_major(u32 dev) +static __always_inline unsigned sysv_major(u32 dev) { return (dev >> 18) & 0x3fff; } -static inline unsigned sysv_minor(u32 dev) +static __always_inline unsigned sysv_minor(u32 dev) { return dev & 0x3ffff; } -- GitLab From 36845663843fc59c5d794e3dc0641472e3e572da Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 29 Dec 2020 15:14:58 -0800 Subject: [PATCH 1293/1894] lib/genalloc: fix the overflow when size is too big Some graphic card has very big memory on chip, such as 32G bytes. In the following case, it will cause overflow: pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE); ret = gen_pool_add(pool, 0x1000000, SZ_32G, NUMA_NO_NODE); va = gen_pool_alloc(pool, SZ_4G); The overflow occurs in gen_pool_alloc_algo_owner(): .... size = nbits << order; .... The @nbits is "int" type, so it will overflow. Then the gen_pool_avail() will return the wrong value. This patch converts some "int" to "unsigned long", and changes the compare code in while. Link: https://lkml.kernel.org/r/20201229060657.3389-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Reported-by: Shi Jiasheng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/genalloc.c b/lib/genalloc.c index 7f1244b5294a8..dab97bb69df63 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -81,14 +81,14 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) * users set the same bit, one user will return remain bits, otherwise * return 0. */ -static int bitmap_set_ll(unsigned long *map, int start, int nr) +static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned long size = start + nr; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_set >= 0) { + while (nr >= bits_to_set) { if (set_bits_ll(p, mask_to_set)) return nr; nr -= bits_to_set; @@ -116,14 +116,15 @@ static int bitmap_set_ll(unsigned long *map, int start, int nr) * users clear the same bit, one user will return remain bits, * otherwise return 0. */ -static int bitmap_clear_ll(unsigned long *map, int start, int nr) +static unsigned long +bitmap_clear_ll(unsigned long *map, unsigned long start, unsigned long nr) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned long size = start + nr; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_clear >= 0) { + while (nr >= bits_to_clear) { if (clear_bits_ll(p, mask_to_clear)) return nr; nr -= bits_to_clear; @@ -183,8 +184,8 @@ int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t ph size_t size, int nid, void *owner) { struct gen_pool_chunk *chunk; - int nbits = size >> pool->min_alloc_order; - int nbytes = sizeof(struct gen_pool_chunk) + + unsigned long nbits = size >> pool->min_alloc_order; + unsigned long nbytes = sizeof(struct gen_pool_chunk) + BITS_TO_LONGS(nbits) * sizeof(long); chunk = vzalloc_node(nbytes, nid); @@ -242,7 +243,7 @@ void gen_pool_destroy(struct gen_pool *pool) struct list_head *_chunk, *_next_chunk; struct gen_pool_chunk *chunk; int order = pool->min_alloc_order; - int bit, end_bit; + unsigned long bit, end_bit; list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); @@ -278,7 +279,7 @@ unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size, struct gen_pool_chunk *chunk; unsigned long addr = 0; int order = pool->min_alloc_order; - int nbits, start_bit, end_bit, remain; + unsigned long nbits, start_bit, end_bit, remain; #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG BUG_ON(in_nmi()); @@ -487,7 +488,7 @@ void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size, { struct gen_pool_chunk *chunk; int order = pool->min_alloc_order; - int start_bit, nbits, remain; + unsigned long start_bit, nbits, remain; #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG BUG_ON(in_nmi()); @@ -755,7 +756,7 @@ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, index = bitmap_find_next_zero_area(map, size, start, nr, 0); while (index < size) { - int next_bit = find_next_bit(map, size, index + nr); + unsigned long next_bit = find_next_bit(map, size, index + nr); if ((next_bit - index) < len) { len = next_bit - index; start_bit = index; -- GitLab From f0bb29e8c4076444d32df00c8d32e169ceecf283 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 29 Dec 2020 15:15:01 -0800 Subject: [PATCH 1294/1894] lib/zlib: fix inflating zlib streams on s390 Decompressing zlib streams on s390 fails with "incorrect data check" error. Userspace zlib checks inflate_state.flags in order to byteswap checksums only for zlib streams, and s390 hardware inflate code, which was ported from there, tries to match this behavior. At the same time, kernel zlib does not use inflate_state.flags, so it contains essentially random values. For many use cases either zlib stream is zeroed out or checksum is not used, so this problem is masked, but at least SquashFS is still affected. Fix by always passing a checksum to and from the hardware as is, which matches zlib_inflate()'s expectations. Link: https://lkml.kernel.org/r/20201215155551.894884-1-iii@linux.ibm.com Fixes: 126196100063 ("lib/zlib: add s390 hardware support for kernel zlib_inflate") Signed-off-by: Ilya Leoshkevich Tested-by: Christian Borntraeger Acked-by: Mikhail Zaslonko Acked-by: Christian Borntraeger Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Mikhail Zaslonko Cc: [5.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/zlib_dfltcc/dfltcc_inflate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c index db107016d29b3..fb60b5a6a1cb6 100644 --- a/lib/zlib_dfltcc/dfltcc_inflate.c +++ b/lib/zlib_dfltcc/dfltcc_inflate.c @@ -125,7 +125,7 @@ dfltcc_inflate_action dfltcc_inflate( param->ho = (state->write - state->whave) & ((1 << HB_BITS) - 1); if (param->hl) param->nt = 0; /* Honor history for the first block */ - param->cv = state->flags ? REVERSE(state->check) : state->check; + param->cv = state->check; /* Inflate */ do { @@ -138,7 +138,7 @@ dfltcc_inflate_action dfltcc_inflate( state->bits = param->sbb; state->whave = param->hl; state->write = (param->ho + param->hl) & ((1 << HB_BITS) - 1); - state->check = state->flags ? REVERSE(param->cv) : param->cv; + state->check = param->cv; if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { /* Report an error if stream is corrupted */ state->mode = BAD; -- GitLab From 605cc30dea249edf1b659e7d0146a2cf13cbbf71 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 29 Dec 2020 15:15:04 -0800 Subject: [PATCH 1295/1894] zlib: move EXPORT_SYMBOL() and MODULE_LICENSE() out of dfltcc_syms.c In commit 11fb479ff5d9 ("zlib: export S390 symbols for zlib modules"), I added EXPORT_SYMBOL()s to dfltcc_inflate.c but then Mikhail said that these should probably be in dfltcc_syms.c with the other EXPORT_SYMBOL()s. However, that is contrary to the current kernel style, which places EXPORT_SYMBOL() immediately after the function that it applies to, so move all EXPORT_SYMBOL()s to their respective function locations and drop the dfltcc_syms.c file. Also move MODULE_LICENSE() from the deleted file to dfltcc.c. [rdunlap@infradead.org: remove dfltcc_syms.o from Makefile] Link: https://lkml.kernel.org/r/20201227171837.15492-1-rdunlap@infradead.org Link: https://lkml.kernel.org/r/20201219052530.28461-1-rdunlap@infradead.org Fixes: 11fb479ff5d9 ("zlib: export S390 symbols for zlib modules") Signed-off-by: Randy Dunlap Cc: Acked-by: Ilya Leoshkevich Acked-by: Christian Borntraeger Cc: Zaslonko Mikhail Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/zlib_dfltcc/Makefile | 2 +- lib/zlib_dfltcc/dfltcc.c | 6 +++++- lib/zlib_dfltcc/dfltcc_deflate.c | 3 +++ lib/zlib_dfltcc/dfltcc_syms.c | 17 ----------------- 4 files changed, 9 insertions(+), 19 deletions(-) delete mode 100644 lib/zlib_dfltcc/dfltcc_syms.c diff --git a/lib/zlib_dfltcc/Makefile b/lib/zlib_dfltcc/Makefile index 8e4d5afbbb109..66e1c96387c40 100644 --- a/lib/zlib_dfltcc/Makefile +++ b/lib/zlib_dfltcc/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_ZLIB_DFLTCC) += zlib_dfltcc.o -zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o dfltcc_syms.o +zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o diff --git a/lib/zlib_dfltcc/dfltcc.c b/lib/zlib_dfltcc/dfltcc.c index c30de430b30ca..782f76e9d4dab 100644 --- a/lib/zlib_dfltcc/dfltcc.c +++ b/lib/zlib_dfltcc/dfltcc.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: Zlib /* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */ -#include +#include +#include #include "dfltcc_util.h" #include "dfltcc.h" @@ -53,3 +54,6 @@ void dfltcc_reset( dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; dfltcc_state->param.ribm = DFLTCC_RIBM; } +EXPORT_SYMBOL(dfltcc_reset); + +MODULE_LICENSE("GPL"); diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index 00c185101c6d1..6c946e8532eec 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -4,6 +4,7 @@ #include "dfltcc_util.h" #include "dfltcc.h" #include +#include #include /* @@ -34,6 +35,7 @@ int dfltcc_can_deflate( return 1; } +EXPORT_SYMBOL(dfltcc_can_deflate); static void dfltcc_gdht( z_streamp strm @@ -277,3 +279,4 @@ again: goto again; /* deflate() must use all input or all output */ return 1; } +EXPORT_SYMBOL(dfltcc_deflate); diff --git a/lib/zlib_dfltcc/dfltcc_syms.c b/lib/zlib_dfltcc/dfltcc_syms.c deleted file mode 100644 index 6f23481804c1d..0000000000000 --- a/lib/zlib_dfltcc/dfltcc_syms.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/lib/zlib_dfltcc/dfltcc_syms.c - * - * Exported symbols for the s390 zlib dfltcc support. - * - */ - -#include -#include -#include -#include "dfltcc.h" - -EXPORT_SYMBOL(dfltcc_can_deflate); -EXPORT_SYMBOL(dfltcc_deflate); -EXPORT_SYMBOL(dfltcc_reset); -MODULE_LICENSE("GPL"); -- GitLab From 1f3147b49d75b47b6be54a1e6dfa87a4921e1e51 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 29 Dec 2020 15:15:07 -0800 Subject: [PATCH 1296/1894] mm: slub: call account_slab_page() after slab page initialization It's convenient to have page->objects initialized before calling into account_slab_page(). In particular, this information can be used to pre-alloc the obj_cgroup vector. Let's call account_slab_page() a bit later, after the initialization of page->objects. This commit doesn't bring any functional change, but is required for further optimizations. [akpm@linux-foundation.org: undo changes needed by forthcoming mm-memcg-slab-pre-allocate-obj_cgroups-for-slab-caches-with-slab_account.patch] Link: https://lkml.kernel.org/r/20201110195753.530157-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 0c8b43a5b3b03..dc5b42e700b85 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1619,9 +1619,6 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, else page = __alloc_pages_node(node, flags, order); - if (page) - account_slab_page(page, order, s); - return page; } @@ -1774,6 +1771,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) page->objects = oo_objects(oo); + account_slab_page(page, oo_order(oo), s); + page->slab_cache = s; __SetPageSlab(page); if (page_is_pfmemalloc(page)) -- GitLab From 875b2376fd663832bf45f7285c9d26cb8c52929a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 19:47:06 -0800 Subject: [PATCH 1297/1894] fs: block_dev.c: fix kernel-doc warnings from struct block_device changes Fix new kernel-doc warnings in fs/block_dev.c: ../fs/block_dev.c:1066: warning: Excess function parameter 'whole' description in 'bd_abort_claiming' ../fs/block_dev.c:1837: warning: Function parameter or member 'dev' not described in 'lookup_bdev' Fixes: 4e7b5671c6a8 ("block: remove i_bdev") Fixes: 37c3fc9abb25 ("block: simplify the block device claiming interface") Signed-off-by: Randy Dunlap Cc: Jens Axboe Cc: Christoph Hellwig Cc: linux-fsdevel@vger.kernel.org Cc: Alexander Viro Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 9293045e128cd..3e5b02f6606c4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1055,7 +1055,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder) /** * bd_abort_claiming - abort claiming of a block device * @bdev: block device of interest - * @whole: whole block device * @holder: holder that has claimed @bdev * * Abort claiming of a block device when the exclusive open failed. This can be @@ -1828,6 +1827,7 @@ const struct file_operations def_blk_fops = { /** * lookup_bdev - lookup a struct block_device by name * @pathname: special file representing the block device + * @dev: return value of the block device's dev_t * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) -- GitLab From dc30432605bbbd486dfede3852ea4d42c40a84b4 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 28 Dec 2020 11:27:18 -0800 Subject: [PATCH 1298/1894] block: add debugfs stanza for QUEUE_FLAG_NOWAIT This was missed in 021a24460dc2. Leads to the numeric value of QUEUE_FLAG_NOWAIT (i.e. 29) showing up in /sys/kernel/debug/block/*/state. Fixes: 021a24460dc28e7412aecfae89f60e1847e685c0 Cc: Konstantin Khlebnikov Cc: Mike Snitzer Cc: Christoph Hellwig Cc: Jens Axboe Signed-off-by: Andres Freund Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3094542e12ae0..e21eed20a1551 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -129,6 +129,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(PCI_P2PDMA), QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), + QUEUE_FLAG_NAME(NOWAIT), }; #undef QUEUE_FLAG_NAME -- GitLab From 44362a3c353aeec5904c2ae6d1737f20fe7e9c79 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Dec 2020 12:08:54 +0000 Subject: [PATCH 1299/1894] KVM: arm64: Fix hyp_cpu_pm_{init,exit} __init annotation The __init annotations on hyp_cpu_pm_{init,exit} are obviously incorrect, and the build system shouts at you if you enable DEBUG_SECTION_MISMATCH. Nothing really bad happens as we never execute that code outside of the init context, but we can't label the callers as __int either, as kvm_init isn't __init itself. Oh well. Signed-off-by: Marc Zyngier Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201223120854.255347-1-maz@kernel.org --- arch/arm64/kvm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ab782c480e9a5..04c44853b103b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1568,12 +1568,12 @@ static struct notifier_block hyp_init_cpu_pm_nb = { .notifier_call = hyp_init_cpu_pm_notifier, }; -static void __init hyp_cpu_pm_init(void) +static void hyp_cpu_pm_init(void) { if (!is_protected_kvm_enabled()) cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } -static void __init hyp_cpu_pm_exit(void) +static void hyp_cpu_pm_exit(void) { if (!is_protected_kvm_enabled()) cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); -- GitLab From 6820e812dafb4258bc14692f686eec5bde6fba86 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 16 Dec 2020 11:23:21 +0200 Subject: [PATCH 1300/1894] spi: Fix the clamping of spi->max_speed_hz If spi->controller->max_speed_hz is zero, a non-zero spi->max_speed_hz will be overwritten by zero. Make sure spi->controller->max_speed_hz is not zero when clamping spi->max_speed_hz. Put the spi->controller->max_speed_hz non-zero check higher in the if, so that we avoid a superfluous init to zero when both spi->max_speed_hz and spi->controller->max_speed_hz are zero. Fixes: 9326e4f1e5dd ("spi: Limit the spi device max speed to controller's max speed") Reported-by: Geert Uytterhoeven Suggested-by: Geert Uytterhoeven Signed-off-by: Tudor Ambarus Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201216092321.413262-1-tudor.ambarus@microchip.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 51d7c004fbab5..f59bf5094adb9 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3378,8 +3378,9 @@ int spi_setup(struct spi_device *spi) if (status) return status; - if (!spi->max_speed_hz || - spi->max_speed_hz > spi->controller->max_speed_hz) + if (spi->controller->max_speed_hz && + (!spi->max_speed_hz || + spi->max_speed_hz > spi->controller->max_speed_hz)) spi->max_speed_hz = spi->controller->max_speed_hz; mutex_lock(&spi->controller->io_mutex); -- GitLab From e042f151ec7474b88b8c1edaaddd1ff7415d7117 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 19:54:28 -0800 Subject: [PATCH 1301/1894] hwmon: (sbtsi_temp) Fix Documenation kernel-doc warning Fix Documentation/hwmon/ kernel-doc warning in 5.11-rc1: lnx-511-rc1/Documentation/hwmon/sbtsi_temp.rst:4: WARNING: Title underline too short. Kernel driver sbtsi_temp ================== Fixes: 6ec3fcf556fe ("hwmon: (sbtsi) Add documentation") Signed-off-by: Randy Dunlap Cc: Kun Yi Cc: Guenter Roeck Cc: Jean Delvare Cc: linux-hwmon@vger.kernel.org Link: https://lore.kernel.org/r/20201229035428.31270-1-rdunlap@infradead.org Signed-off-by: Guenter Roeck --- Documentation/hwmon/sbtsi_temp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/sbtsi_temp.rst b/Documentation/hwmon/sbtsi_temp.rst index 922b3c8db666e..749f518389c38 100644 --- a/Documentation/hwmon/sbtsi_temp.rst +++ b/Documentation/hwmon/sbtsi_temp.rst @@ -1,7 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0-or-later Kernel driver sbtsi_temp -================== +======================== Supported hardware: -- GitLab From 742eb4750ff35fd62784b04b675d672b8dee2524 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Nov 2020 21:23:33 +0100 Subject: [PATCH 1302/1894] s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 12 +++++++++--- arch/s390/configs/defconfig | 11 +++++++---- arch/s390/configs/zfcpdump_defconfig | 2 ++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 1be32fcf6f2ee..c4f6ff98a612c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -61,7 +61,9 @@ CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SECCOMP_CACHE_DEBUG=y CONFIG_LOCK_EVENT_COUNTS=y +# CONFIG_GCC_PLUGINS is not set CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -410,12 +412,12 @@ CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y +CONFIG_ZFCP=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m @@ -444,6 +446,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m +CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -542,7 +545,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_NULL_TTY=m CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -574,6 +576,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -655,6 +658,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG is not set CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_SWN_UPCALL=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -826,6 +830,8 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_FTRACE_STARTUP_TEST=y +# CONFIG_EVENT_TRACE_STARTUP_TEST is not set CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index e2171a0088094..51135893cffe3 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -58,6 +58,7 @@ CONFIG_S390_UNWIND_SELFTEST=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y +# CONFIG_GCC_PLUGINS is not set CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -95,7 +96,6 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y -CONFIG_GUP_TEST=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -403,12 +403,12 @@ CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y +CONFIG_ZFCP=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m @@ -437,6 +437,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m +CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -536,7 +537,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_NULL_TTY=m CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -568,6 +568,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -645,6 +646,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG is not set CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_SWN_UPCALL=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -778,6 +780,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_DEBUG_USER_ASCE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index a302630341eff..1ef211dae77a0 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -22,6 +22,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set # CONFIG_SECCOMP is not set +# CONFIG_GCC_PLUGINS is not set CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set @@ -58,6 +59,7 @@ CONFIG_RAW_DRIVER=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set +# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set -- GitLab From 129975e75b9a2ba528d7f58be2e338cd644f6ed8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Dec 2020 09:51:57 +0100 Subject: [PATCH 1303/1894] s390/Kconfig: sort config S390 select list once again ...and add comments at the top and bottom. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e84bdd15150bf..c72874f09741c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -54,17 +54,23 @@ config KASAN_SHADOW_OFFSET config S390 def_bool y + # + # Note: keep this list sorted alphabetically + # + imply IMA_SECURE_AND_OR_TRUSTED_BOOT select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -111,8 +117,10 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 + select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER + select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT @@ -126,22 +134,21 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC - select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_FAST_GUP + select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_FAST_GUP select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ERROR_INJECTION @@ -163,16 +170,15 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP select HAVE_MEMBLOCK_PHYS_MAP - select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI select HAVE_NOP_MCOUNT select HAVE_OPROFILE select HAVE_PCI select HAVE_PERF_EVENTS - select MMU_GATHER_RCU_TABLE_FREE + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ @@ -181,6 +187,8 @@ config S390 select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select MMU_GATHER_NO_GATHER + select MMU_GATHER_RCU_TABLE_FREE select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_SG_DMA_LENGTH if PCI @@ -190,17 +198,12 @@ config S390 select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select SPARSE_IRQ + select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TTY select VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME - select HAVE_NMI - select ARCH_HAS_FORCE_DMA_UNENCRYPTED - select SWIOTLB - select GENERIC_ALLOCATOR - imply IMA_SECURE_AND_OR_TRUSTED_BOOT - + # Note: keep the above list sorted alphabetically config SCHED_OMIT_FRAME_POINTER def_bool y -- GitLab From 1eda52334e6d13eb1a85f713ce06dd39342b5020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 15 Dec 2020 10:20:30 +0100 Subject: [PATCH 1304/1894] hwmon: (pwm-fan) Ensure that calculation doesn't discard big period values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With MAX_PWM being defined to 255 the code unsigned long period; ... period = ctx->pwm->args.period; state.duty_cycle = DIV_ROUND_UP(pwm * (period - 1), MAX_PWM); calculates a too small value for duty_cycle if the configured period is big (either by discarding the 64 bit value ctx->pwm->args.period or by overflowing the multiplication). As this results in a too slow fan and so maybe an overheating machine better be safe than sorry and error out in .probe. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20201215092031.152243-1-u.kleine-koenig@pengutronix.de Signed-off-by: Guenter Roeck --- drivers/hwmon/pwm-fan.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 777439f48c147..111a91dc6b798 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -334,8 +334,18 @@ static int pwm_fan_probe(struct platform_device *pdev) ctx->pwm_value = MAX_PWM; - /* Set duty cycle to maximum allowed and enable PWM output */ pwm_init_state(ctx->pwm, &state); + /* + * __set_pwm assumes that MAX_PWM * (period - 1) fits into an unsigned + * long. Check this here to prevent the fan running at a too low + * frequency. + */ + if (state.period > ULONG_MAX / MAX_PWM + 1) { + dev_err(dev, "Configured period too big\n"); + return -EINVAL; + } + + /* Set duty cycle to maximum allowed and enable PWM output */ state.duty_cycle = ctx->pwm->args.period - 1; state.enabled = true; -- GitLab From c318840fb2a42ce25febc95c4c19357acf1ae5ca Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Dec 2020 11:20:44 -0500 Subject: [PATCH 1305/1894] USB: Gadget: dummy-hcd: Fix shift-out-of-bounds bug The dummy-hcd driver was written under the assumption that all the parameters in URBs sent to its root hub would be valid. With URBs sent from userspace via usbfs, that assumption can be violated. In particular, the driver doesn't fully check the port-feature values stored in the wValue entry of Clear-Port-Feature and Set-Port-Feature requests. Values that are too large can cause the driver to perform an invalid left shift of more than 32 bits. Ironically, two of those left shifts are unnecessary, because they implement Set-Port-Feature requests that hubs are not required to support, according to section 11.24.2.13 of the USB-2.0 spec. This patch adds the appropriate checks for the port feature selector values and removes the unnecessary feature settings. It also rejects requests to set the TEST feature or to set or clear the INDICATOR and C_OVERCURRENT features, as none of these are relevant to dummy-hcd's root-hub emulation. CC: Reported-and-tested-by: syzbot+5925509f78293baa7331@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201230162044.GA727759@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 35 ++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index ab5e978b5052c..1a953f44183aa 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2118,9 +2118,21 @@ static int dummy_hub_control( dum_hcd->port_status &= ~USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; - default: + case USB_PORT_FEAT_ENABLE: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3 */ + if (hcd->speed == HCD_USB3) + goto error; + fallthrough; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: dum_hcd->port_status &= ~(1 << wValue); set_link_state(dum_hcd); + break; + default: + /* Disallow INDICATOR and C_OVER_CURRENT */ + goto error; } break; case GetHubDescriptor: @@ -2281,18 +2293,17 @@ static int dummy_hub_control( */ dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50); fallthrough; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3, and ignored for USB-2 */ + if (hcd->speed == HCD_USB3) + goto error; + break; default: - if (hcd->speed == HCD_USB3) { - if ((dum_hcd->port_status & - USB_SS_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } - } else - if ((dum_hcd->port_status & - USB_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } - set_link_state(dum_hcd); + /* Disallow TEST, INDICATOR, and C_OVER_CURRENT */ + goto error; } break; case GetPortErrorCount: -- GitLab From be1283454b61a1f3b089f1a74b73e20532262e32 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Dec 2020 18:08:18 +0100 Subject: [PATCH 1306/1894] cpufreq: intel_pstate: Fix fast-switch fallback path When sugov_update_single_perf() falls back to the "frequency" path due to the missing scale-invariance, it will call cpufreq_driver_fast_switch() via sugov_fast_switch() and the driver's ->fast_switch() callback will be invoked, so it must not be NULL. However, after commit a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") intel_pstate sets ->fast_switch() to NULL when it is going to use intel_cpufreq_adjust_perf(), which is a mistake, because on x86 the scale-invariance may be turned off dynamically, so modify it to retain the original ->adjust_perf() callback pointer. Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") Reported-by: Kenneth R. Crudup Tested-by: Kenneth R. Crudup Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6e23376548ced..1a660466dd756 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3086,7 +3086,6 @@ static int __init intel_pstate_init(void) intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS; - intel_cpufreq.fast_switch = NULL; intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf; if (!default_driver) default_driver = &intel_pstate; -- GitLab From 9cf93f056f783f986c19f40d5304d1bcffa0fc0d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 27 Dec 2020 12:11:16 +0200 Subject: [PATCH 1307/1894] intel_idle: add SnowRidge C-state table Add C-state table for the SnowRidge SoC which is found on Intel Jacobsville platforms. The following has been changed. 1. C1E latency changed from 10us to 15us. It was measured using the open source "wult" tool (the "nic" method, 15us is the 99.99th percentile). 2. C1E power break even changed from 20us to 25us, which may result in less C1E residency in some workloads. 3. C6 latency changed from 50us to 130us. Measured the same way as C1E. The C6 C-state is supported only by some SnowRidge revisions, so add a C-state table commentary about this. On SnowRidge, C6 support is enumerated via the usual mechanism: "mwait" leaf of the "cpuid" instruction. The 'intel_idle' driver does check this leaf, so even though C6 is present in the table, the driver will only use it if the CPU does support it. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 41 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index d79335506ecd3..28f93b9aa51bf 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -963,6 +963,39 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .enter = NULL } }; +/* + * Note, depending on HW and FW revision, SnowRidge SoC may or may not support + * C6, and this is indicated in the CPUID mwait leaf. + */ +static struct cpuidle_state snr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 15, + .target_residency = 25, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 130, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1084,6 +1117,12 @@ static const struct idle_cpu idle_cpu_dnv __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_snr __initconst = { + .state_table = snr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), @@ -1122,7 +1161,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), {} }; -- GitLab From 1642b4450d20e31439c80c28256c8eee08684698 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 30 Dec 2020 21:34:14 +0000 Subject: [PATCH 1308/1894] io_uring: add a helper for setting a ref node Setting a new reference node to a file data is not trivial, don't repeat it, add and use a helper. Cc: stable@vger.kernel.org # 5.6+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index eb4620ff638ed..6372aba8d0c2d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7231,6 +7231,16 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } +static void io_sqe_files_set_node(struct fixed_file_data *file_data, + struct fixed_file_ref_node *ref_node) +{ + spin_lock_bh(&file_data->lock); + file_data->node = ref_node; + list_add_tail(&ref_node->node, &file_data->ref_list); + spin_unlock_bh(&file_data->lock); + percpu_ref_get(&file_data->refs); +} + static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; @@ -7758,11 +7768,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return PTR_ERR(ref_node); } - file_data->node = ref_node; - spin_lock_bh(&file_data->lock); - list_add_tail(&ref_node->node, &file_data->ref_list); - spin_unlock_bh(&file_data->lock); - percpu_ref_get(&file_data->refs); + io_sqe_files_set_node(file_data, ref_node); return ret; out_fput: for (i = 0; i < ctx->nr_user_files; i++) { @@ -7918,11 +7924,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); - spin_lock_bh(&data->lock); - list_add_tail(&ref_node->node, &data->ref_list); - data->node = ref_node; - spin_unlock_bh(&data->lock); - percpu_ref_get(&ctx->file_data->refs); + io_sqe_files_set_node(data, ref_node); } else destroy_fixed_file_ref_node(ref_node); -- GitLab From 1ffc54220c444774b7f09e6d2121e732f8e19b94 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 30 Dec 2020 21:34:15 +0000 Subject: [PATCH 1309/1894] io_uring: fix io_sqe_files_unregister() hangs io_sqe_files_unregister() uninterruptibly waits for enqueued ref nodes, however requests keeping them may never complete, e.g. because of some userspace dependency. Make sure it's interruptible otherwise it would hang forever. Cc: stable@vger.kernel.org # 5.6+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6372aba8d0c2d..ca46f314640b1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -992,6 +992,10 @@ enum io_mem_account { ACCT_PINNED, }; +static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); +static struct fixed_file_ref_node *alloc_fixed_file_ref_node( + struct io_ring_ctx *ctx); + static void __io_complete_rw(struct io_kiocb *req, long res, long res2, struct io_comp_state *cs); static void io_cqring_fill_event(struct io_kiocb *req, long res); @@ -7244,11 +7248,15 @@ static void io_sqe_files_set_node(struct fixed_file_data *file_data, static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; - struct fixed_file_ref_node *ref_node = NULL; + struct fixed_file_ref_node *backup_node, *ref_node = NULL; unsigned nr_tables, i; + int ret; if (!data) return -ENXIO; + backup_node = alloc_fixed_file_ref_node(ctx); + if (!backup_node) + return -ENOMEM; spin_lock_bh(&data->lock); ref_node = data->node; @@ -7260,7 +7268,18 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) /* wait for all refs nodes to complete */ flush_delayed_work(&ctx->file_put_work); - wait_for_completion(&data->done); + do { + ret = wait_for_completion_interruptible(&data->done); + if (!ret) + break; + ret = io_run_task_work_sig(); + if (ret < 0) { + percpu_ref_resurrect(&data->refs); + reinit_completion(&data->done); + io_sqe_files_set_node(data, backup_node); + return ret; + } + } while (1); __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); @@ -7271,6 +7290,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) kfree(data); ctx->file_data = NULL; ctx->nr_user_files = 0; + destroy_fixed_file_ref_node(backup_node); return 0; } -- GitLab From b1b6b5a30dce872f500dc43f067cba8e7f86fc7d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 30 Dec 2020 21:34:16 +0000 Subject: [PATCH 1310/1894] kernel/io_uring: cancel io_uring before task works For cancelling io_uring requests it needs either to be able to run currently enqueued task_works or having it shut down by that moment. Otherwise io_uring_cancel_files() may be waiting for requests that won't ever complete. Go with the first way and do cancellations before setting PF_EXITING and so before putting the task_work infrastructure into a transition state where task_work_run() would better not be called. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/file.c | 2 -- kernel/exit.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index c0b60961c6722..dab120b71e44d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -21,7 +21,6 @@ #include #include #include -#include unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; @@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk) struct files_struct * files = tsk->files; if (files) { - io_uring_files_cancel(files); task_lock(tsk); tsk->files = NULL; task_unlock(tsk); diff --git a/kernel/exit.c b/kernel/exit.c index 3594291a85428..04029e35e69af 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -776,6 +777,7 @@ void __noreturn do_exit(long code) schedule(); } + io_uring_files_cancel(tsk->files); exit_signals(tsk); /* sets PF_EXITING */ /* sync mm's RSS info before statistics gathering */ -- GitLab From f93274ef0fe972c120c96b3207f8fce376231a60 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 09:01:36 +0100 Subject: [PATCH 1311/1894] crypto: asym_tpm: correct zero out potential secrets The function derive_pub_key() should be calling memzero_explicit() instead of memset() in case the complier decides to optimize away the call to memset() because it "knows" no one is going to touch the memory anymore. Cc: stable Reported-by: Ilil Blum Shem-Tov Tested-by: Ilil Blum Shem-Tov Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/X8ns4AfwjKudpyfe@kroah.com Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/asym_tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c index 511932aa94a6f..0959613560b9e 100644 --- a/crypto/asymmetric_keys/asym_tpm.c +++ b/crypto/asymmetric_keys/asym_tpm.c @@ -354,7 +354,7 @@ static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf) memcpy(cur, e, sizeof(e)); cur += sizeof(e); /* Zero parameters to satisfy set_pub_key ABI. */ - memset(cur, 0, SETKEY_PARAMS_SIZE); + memzero_explicit(cur, SETKEY_PARAMS_SIZE); return cur - buf; } -- GitLab From 744a11abc56405c5a106e63da30a941b6d27f737 Mon Sep 17 00:00:00 2001 From: bo liu Date: Tue, 29 Dec 2020 11:52:26 +0800 Subject: [PATCH 1312/1894] ALSA: hda/conexant: add a new hda codec CX11970 The current kernel does not support the cx11970 codec chip. Add a codec configuration item to kernel. [ Minor coding style fix by tiwai ] Signed-off-by: bo liu Cc: Link: https://lore.kernel.org/r/20201229035226.62120-1-bo.liu@senarytech.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index be5000dd15853..d49cc4409d59c 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1070,6 +1070,7 @@ static int patch_conexant_auto(struct hda_codec *codec) static const struct hda_device_id snd_hda_id_conexant[] = { HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f120d0, "CX11970", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto), -- GitLab From 484229585a5e91eeb00ee10e05d5204e1ca6c481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Jim=C3=A9nez?= Date: Tue, 29 Dec 2020 15:38:56 +0100 Subject: [PATCH 1313/1894] ALSA: hda/realtek: Add mute LED quirk for more HP laptops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HP Pavilion 13-bb0000 (SSID 103c:87c8) needs the same quirk as other models with ALC287. Signed-off-by: Manuel Jiménez Cc: Link: https://lore.kernel.org/r/X+s/gKNydVrI6nLj@HP-Pavilion-13 Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f49cf0f227c44..a7015b3a62c7b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7969,6 +7969,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), -- GitLab From ce2e79b223867b9e586021b55dee7035517a236b Mon Sep 17 00:00:00 2001 From: PeiSen Hou Date: Thu, 31 Dec 2020 11:57:28 +0100 Subject: [PATCH 1314/1894] ALSA: hda/realtek: Add two "Intel Reference board" SSID in the ALC256. Add two "Intel Reference boad" SSID in the alc256. Enable "power saving mode" and Enable "headset jack mode". Signed-off-by: PeiSen Hou Cc: Link: https://lore.kernel.org/r/5978d2267f034c28973d117925ec9c63@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a7015b3a62c7b..71a85bf990557 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8027,6 +8027,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), -- GitLab From a598098cc9737f612dbab52294433fc26c51cc9b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 30 Dec 2020 20:56:35 +0800 Subject: [PATCH 1315/1894] ALSA: hda/realtek: Enable mute and micmute LED on HP EliteBook 850 G7 HP EliteBook 850 G7 uses the same GPIO pins as ALC285_FIXUP_HP_GPIO_LED to enable mute and micmute LED. So apply the quirk to enable the LEDs. Signed-off-by: Kai-Heng Feng Cc: Link: https://lore.kernel.org/r/20201230125636.45028-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 71a85bf990557..3c1d2a3fb1a4f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7964,6 +7964,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), -- GitLab From 957cbca7317f7413e1bac555a6b567af06598b10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Dec 2020 15:05:46 +0000 Subject: [PATCH 1316/1894] KVM: arm64: Remove spurious semicolon in reg_to_encoding() Although not a problem right now, it flared up while working on some other aspects of the code-base. Remove the useless semicolon. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d46e7f706cb06..42ccc27fb684c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -923,7 +923,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, #define reg_to_encoding(x) \ sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ - (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2); + (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ -- GitLab From 4f8af077a02eed4831885048a10e04daa4e61a72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 28 Dec 2020 14:46:07 +0000 Subject: [PATCH 1317/1894] docs: Fix reST markup when linking to sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the process of converting the documentation to reST, some links were converted using the following wrong syntax (and sometimes using %20 instead of spaces): `Display text <#section-name-in-html>`__ This syntax isn't valid according to the docutils' spec [1], but more importantly, it is specific to HTML, since it uses '#' to link to an HTML anchor. The right syntax would instead use a docutils hyperlink reference as the embedded URI to point to the section [2], that is: `Display text
`__ This syntax works in both HTML and PDF. The LaTeX toolchain doesn't mind the HTML anchor syntax when generating the pdf documentation (make pdfdocs), that is, the build succeeds but the links don't work, but that syntax causes errors when trying to build using the not-yet-merged rst2pdf: ValueError: format not resolved, probably missing URL scheme or undefined destination target for 'Forcing%20Quiescent%20States' So, use the correct syntax in order to have it work in all different output formats. [1]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names [2]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases Fixes: ccc9971e2147 ("docs: rcu: convert some articles from html to ReST") Fixes: c8cce10a62aa ("docs: Fix the reference labels in Locking.rst") Fixes: e548cdeffcd8 ("docs-rst: convert kernel-locking to ReST") Fixes: 7ddedebb03b7 ("ALSA: doc: ReSTize writing-an-alsa-driver document") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: Takashi Iwai Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20201228144537.135353-1-nfraprado@protonmail.com Signed-off-by: Jonathan Corbet --- .../Tree-RCU-Memory-Ordering.rst | 8 ++++---- .../RCU/Design/Requirements/Requirements.rst | 20 +++++++++---------- Documentation/kernel-hacking/locking.rst | 8 ++++---- .../kernel-api/writing-an-alsa-driver.rst | 16 +++++++-------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst index 83ae3b79a6439..a648b423ba0eb 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst @@ -473,7 +473,7 @@ read-side critical sections that follow the idle period (the oval near the bottom of the diagram above). Plumbing this into the full grace-period execution is described -`below <#Forcing%20Quiescent%20States>`__. +`below `__. CPU-Hotplug Interface ^^^^^^^^^^^^^^^^^^^^^ @@ -494,7 +494,7 @@ mask to detect CPUs having gone offline since the beginning of this grace period. Plumbing this into the full grace-period execution is described -`below <#Forcing%20Quiescent%20States>`__. +`below `__. Forcing Quiescent States ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -532,7 +532,7 @@ from other CPUs. | RCU. But this diagram is complex enough as it is, so simplicity | | overrode accuracy. You can think of it as poetic license, or you can | | think of it as misdirection that is resolved in the | -| `stitched-together diagram <#Putting%20It%20All%20Together>`__. | +| `stitched-together diagram `__. | +-----------------------------------------------------------------------+ Grace-Period Cleanup @@ -596,7 +596,7 @@ maintain ordering. For example, if the callback function wakes up a task that runs on some other CPU, proper ordering must in place in both the callback function and the task being awakened. To see why this is important, consider the top half of the `grace-period -cleanup <#Grace-Period%20Cleanup>`__ diagram. The callback might be +cleanup`_ diagram. The callback might be running on a CPU corresponding to the leftmost leaf ``rcu_node`` structure, and awaken a task that is to run on a CPU corresponding to the rightmost leaf ``rcu_node`` structure, and the grace-period kernel diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index e8c84fcc05071..d4c9a016074b3 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -45,7 +45,7 @@ requirements: #. `Other RCU Flavors`_ #. `Possible Future Changes`_ -This is followed by a `summary <#Summary>`__, however, the answers to +This is followed by a summary_, however, the answers to each quick quiz immediately follows the quiz. Select the big white space with your mouse to see the answer. @@ -1096,7 +1096,7 @@ memory barriers. | case, voluntary context switch) within an RCU read-side critical | | section. However, sleeping locks may be used within userspace RCU | | read-side critical sections, and also within Linux-kernel sleepable | -| RCU `(SRCU) <#Sleepable%20RCU>`__ read-side critical sections. In | +| RCU `(SRCU) `__ read-side critical sections. In | | addition, the -rt patchset turns spinlocks into a sleeping locks so | | that the corresponding critical sections can be preempted, which also | | means that these sleeplockified spinlocks (but not other sleeping | @@ -1186,7 +1186,7 @@ non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny RCU `__ was born. Josh Triplett has since taken over the small-memory banner with his `Linux kernel tinification `__ -project, which resulted in `SRCU <#Sleepable%20RCU>`__ becoming optional +project, which resulted in `SRCU `__ becoming optional for those kernels not needing it. The remaining performance requirements are, for the most part, @@ -1457,8 +1457,8 @@ will vary as the value of ``HZ`` varies, and can also be changed using the relevant Kconfig options and kernel boot parameters. RCU currently does not do much sanity checking of these parameters, so please use caution when changing them. Note that these forward-progress measures -are provided only for RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks -RCU <#Tasks%20RCU>`__. +are provided only for RCU, not for `SRCU `__ or `Tasks +RCU`_. RCU takes the following steps in ``call_rcu()`` to encourage timely invocation of callbacks when any given non-\ ``rcu_nocbs`` CPU has @@ -1477,8 +1477,8 @@ encouragement was provided: Again, these are default values when running at ``HZ=1000``, and can be overridden. Again, these forward-progress measures are provided only for -RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks -RCU <#Tasks%20RCU>`__. Even for RCU, callback-invocation forward +RCU, not for `SRCU `__ or `Tasks +RCU`_. Even for RCU, callback-invocation forward progress for ``rcu_nocbs`` CPUs is much less well-developed, in part because workloads benefiting from ``rcu_nocbs`` CPUs tend to invoke ``call_rcu()`` relatively infrequently. If workloads emerge that need @@ -1920,7 +1920,7 @@ Hotplug CPU The Linux kernel supports CPU hotplug, which means that CPUs can come and go. It is of course illegal to use any RCU API member from an -offline CPU, with the exception of `SRCU <#Sleepable%20RCU>`__ read-side +offline CPU, with the exception of `SRCU `__ read-side critical sections. This requirement was present from day one in DYNIX/ptx, but on the other hand, the Linux kernel's CPU-hotplug implementation is “interesting.” @@ -2177,7 +2177,7 @@ handles these states differently: However, RCU must be reliably informed as to whether any given CPU is currently in the idle loop, and, for ``NO_HZ_FULL``, also whether that CPU is executing in usermode, as discussed -`earlier <#Energy%20Efficiency>`__. It also requires that the +`earlier `__. It also requires that the scheduling-clock interrupt be enabled when RCU needs it to be: #. If a CPU is either idle or executing in usermode, and RCU believes it @@ -2294,7 +2294,7 @@ Performance, Scalability, Response Time, and Reliability ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Expanding on the `earlier -discussion <#Performance%20and%20Scalability>`__, RCU is used heavily by +discussion `__, RCU is used heavily by hot code paths in performance-critical portions of the Linux kernel's networking, security, virtualization, and scheduling code paths. RCU must therefore use efficient implementations, especially in its diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index 6ed806e6061bb..c3448929a824a 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -118,11 +118,11 @@ spinlock, but you may block holding a mutex. If you can't lock a mutex, your task will suspend itself, and be woken up when the mutex is released. This means the CPU can do something else while you are waiting. There are many cases when you simply can't sleep (see -`What Functions Are Safe To Call From Interrupts? <#sleeping-things>`__), +`What Functions Are Safe To Call From Interrupts?`_), and so have to use a spinlock instead. Neither type of lock is recursive: see -`Deadlock: Simple and Advanced <#deadlock>`__. +`Deadlock: Simple and Advanced`_. Locks and Uniprocessor Kernels ------------------------------ @@ -179,7 +179,7 @@ perfect world). Note that you can also use spin_lock_irq() or spin_lock_irqsave() here, which stop hardware interrupts -as well: see `Hard IRQ Context <#hard-irq-context>`__. +as well: see `Hard IRQ Context`_. This works perfectly for UP as well: the spin lock vanishes, and this macro simply becomes local_bh_disable() @@ -230,7 +230,7 @@ The Same Softirq ~~~~~~~~~~~~~~~~ The same softirq can run on the other CPUs: you can use a per-CPU array -(see `Per-CPU Data <#per-cpu-data>`__) for better performance. If you're +(see `Per-CPU Data`_) for better performance. If you're going so far as to use a softirq, you probably care about scalable performance enough to justify the extra complexity. diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst index 73bbd59afc33a..e6365836fa8bd 100644 --- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst +++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst @@ -71,7 +71,7 @@ core/oss The codes for PCM and mixer OSS emulation modules are stored in this directory. The rawmidi OSS emulation is included in the ALSA rawmidi code since it's quite small. The sequencer code is stored in -``core/seq/oss`` directory (see `below <#core-seq-oss>`__). +``core/seq/oss`` directory (see `below `__). core/seq ~~~~~~~~ @@ -382,7 +382,7 @@ where ``enable[dev]`` is the module option. Each time the ``probe`` callback is called, check the availability of the device. If not available, simply increment the device index and returns. dev will be incremented also later (`step 7 -<#set-the-pci-driver-data-and-return-zero>`__). +<7) Set the PCI driver data and return zero._>`__). 2) Create a card instance ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -450,10 +450,10 @@ field contains the information shown in ``/proc/asound/cards``. 5) Create other components, such as mixer, MIDI, etc. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here you define the basic components such as `PCM <#PCM-Interface>`__, -mixer (e.g. `AC97 <#API-for-AC97-Codec>`__), MIDI (e.g. -`MPU-401 <#MIDI-MPU401-UART-Interface>`__), and other interfaces. -Also, if you want a `proc file <#Proc-Interface>`__, define it here, +Here you define the basic components such as `PCM `__, +mixer (e.g. `AC97 `__), MIDI (e.g. +`MPU-401 `__), and other interfaces. +Also, if you want a `proc file `__, define it here, too. 6) Register the card instance. @@ -941,7 +941,7 @@ The allocation of an interrupt source is done like this: chip->irq = pci->irq; where :c:func:`snd_mychip_interrupt()` is the interrupt handler -defined `later <#pcm-interface-interrupt-handler>`__. Note that +defined `later `__. Note that ``chip->irq`` should be defined only when :c:func:`request_irq()` succeeded. @@ -3104,7 +3104,7 @@ processing the output stream in the irq handler. If the MPU-401 interface shares its interrupt with the other logical devices on the card, set ``MPU401_INFO_IRQ_HOOK`` (see -`below <#MIDI-Interrupt-Handler>`__). +`below `__). Usually, the port address corresponds to the command port and port + 1 corresponds to the data port. If not, you may change the ``cport`` -- GitLab From 81e79063004f32aae5196f0c929192e69aca1694 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Dec 2020 09:44:33 -0800 Subject: [PATCH 1318/1894] Documentation: admin: early_param()s are also listed in kernel-parameters Add info that "early_param()" kernel boot parameters are also listed in kernel-parameters.txt. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20201226174433.7885-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 06fb1b4aa849c..682ab28b5c94b 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -3,8 +3,8 @@ The kernel's command-line parameters ==================================== -The following is a consolidated list of the kernel parameters as -implemented by the __setup(), core_param() and module_param() macros +The following is a consolidated list of the kernel parameters as implemented +by the __setup(), early_param(), core_param() and module_param() macros and sorted into English Dictionary order (defined as ignoring all punctuation and sorting digits before letters in a case insensitive manner), and with descriptions where known. -- GitLab From c7e74b3c7b1cf4c04164ff16e6c047232fd3bcef Mon Sep 17 00:00:00 2001 From: Liao Pingfang Date: Sun, 27 Dec 2020 15:15:19 +0800 Subject: [PATCH 1319/1894] docs/mm: concepts.rst: Correct the threshold to low watermark It should be "low watermark" where we wake up kswapd daemon. Signed-off-by: Liao Pingfang Link: https://lore.kernel.org/r/1609053319-3112-1-git-send-email-winndows@163.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/mm/concepts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/mm/concepts.rst b/Documentation/admin-guide/mm/concepts.rst index fa0974fbeae7c..b966fcff993b2 100644 --- a/Documentation/admin-guide/mm/concepts.rst +++ b/Documentation/admin-guide/mm/concepts.rst @@ -184,7 +184,7 @@ pages either asynchronously or synchronously, depending on the state of the system. When the system is not loaded, most of the memory is free and allocation requests will be satisfied immediately from the free pages supply. As the load increases, the amount of the free pages goes -down and when it reaches a certain threshold (high watermark), an +down and when it reaches a certain threshold (low watermark), an allocation request will awaken the ``kswapd`` daemon. It will asynchronously scan memory pages and either just free them if the data they contain is available elsewhere, or evict to the backing storage -- GitLab From 0be1511f516e2b9766597336cedc6dc6d19e5af1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 15:12:12 -0800 Subject: [PATCH 1320/1894] Documentation: doc-guide: fixes to sphinx.rst Various fixes to sphinx.rst: - eliminate a double-space between 2 words - grammar/wording - punctuation - call rows in a table 'rows' instead of 'columns' (or does Sphinx call everything a column?) - It seems that "amdfonts" should be "amsfonts". I can't find any amdfonts. Signed-off-by: Randy Dunlap Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20201228231212.22448-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/sphinx.rst | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 2fb2ff297d69c..36ac2166ad675 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -48,12 +48,12 @@ or ``virtualenv``, depending on how your distribution packaged Python 3. those versions, you should run ``pip install 'docutils==0.12'``. #) It is recommended to use the RTD theme for html output. Depending - on the Sphinx version, it should be installed in separate, + on the Sphinx version, it should be installed separately, with ``pip install sphinx_rtd_theme``. - #) Some ReST pages contain math expressions. Due to the way Sphinx work, + #) Some ReST pages contain math expressions. Due to the way Sphinx works, those expressions are written using LaTeX notation. It needs texlive - installed with amdfonts and amsmath in order to evaluate them. + installed with amsfonts and amsmath in order to evaluate them. In summary, if you want to install Sphinx version 1.7.9, you should do:: @@ -128,7 +128,7 @@ Sphinx Build ============ The usual way to generate the documentation is to run ``make htmldocs`` or -``make pdfdocs``. There are also other formats available, see the documentation +``make pdfdocs``. There are also other formats available: see the documentation section of ``make help``. The generated documentation is placed in format-specific subdirectories under ``Documentation/output``. @@ -303,17 +303,17 @@ and *targets* (e.g. a ref to ``:ref:`last row ``` / :ref:`last row - head col 3 - head col 4 - * - column 1 + * - row 1 - field 1.1 - field 1.2 with autospan - * - column 2 + * - row 2 - field 2.1 - :rspan:`1` :cspan:`1` field 2.2 - 3.3 * .. _`last row`: - - column 3 + - row 3 Rendered as: @@ -325,17 +325,17 @@ Rendered as: - head col 3 - head col 4 - * - column 1 + * - row 1 - field 1.1 - field 1.2 with autospan - * - column 2 + * - row 2 - field 2.1 - :rspan:`1` :cspan:`1` field 2.2 - 3.3 * .. _`last row`: - - column 3 + - row 3 Cross-referencing ----------------- @@ -361,7 +361,7 @@ Figures & Images If you want to add an image, you should use the ``kernel-figure`` and ``kernel-image`` directives. E.g. to insert a figure with a scalable -image format use SVG (:ref:`svg_image_example`):: +image format, use SVG (:ref:`svg_image_example`):: .. kernel-figure:: svg_image.svg :alt: simple SVG image @@ -375,7 +375,7 @@ image format use SVG (:ref:`svg_image_example`):: SVG image example -The kernel figure (and image) directive support **DOT** formatted files, see +The kernel figure (and image) directive supports **DOT** formatted files, see * DOT: http://graphviz.org/pdf/dotguide.pdf * Graphviz: http://www.graphviz.org/content/dot-language @@ -394,7 +394,7 @@ A simple example (:ref:`hello_dot_file`):: DOT's hello world example -Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the +Embedded *render* markups (or languages) like Graphviz's **DOT** are provided by the ``kernel-render`` directives.:: .. kernel-render:: DOT @@ -406,7 +406,7 @@ Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the } How this will be rendered depends on the installed tools. If Graphviz is -installed, you will see an vector image. If not the raw markup is inserted as +installed, you will see a vector image. If not, the raw markup is inserted as *literal-block* (:ref:`hello_dot_render`). .. _hello_dot_render: @@ -421,8 +421,8 @@ installed, you will see an vector image. If not the raw markup is inserted as The *render* directive has all the options known from the *figure* directive, plus option ``caption``. If ``caption`` has a value, a *figure* node is -inserted. If not, a *image* node is inserted. A ``caption`` is also needed, if -you want to refer it (:ref:`hello_svg_render`). +inserted. If not, an *image* node is inserted. A ``caption`` is also needed, if +you want to refer to it (:ref:`hello_svg_render`). Embedded **SVG**:: -- GitLab From 798ed7800e20dfc3304de1b99df5ac71ad48966b Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 20 Dec 2020 07:09:27 +0100 Subject: [PATCH 1321/1894] atomic: remove further references to atomic_ops Commit f0400a77ebdc ("atomic: Delete obsolete documentation") removed ./Documentation/core-api/atomic_ops.rst, but missed to remove further references to that file. Hence, make htmldocs warns: Documentation/core-api/index.rst:53: WARNING: toctree contains reference to nonexisting document 'core-api/atomic_ops' Also, ./scripts/get_maintainer.pl --self-test=patterns warns: warning: no file matches F: Documentation/core-api/atomic_ops.rst Remove further references to ./Documentation/core-api/atomic_ops.rst. Fixes: f0400a77ebdc ("atomic: Delete obsolete documentation") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20201220060927.21582-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/core-api/index.rst | 1 - MAINTAINERS | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 69171b1799f21..f1c9d20bd42dd 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -53,7 +53,6 @@ How Linux keeps everything from happening at the same time. See .. toctree:: :maxdepth: 1 - atomic_ops refcount-vs-atomic irq/index local_ops diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..ddc8e90c12247 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10260,7 +10260,6 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: Documentation/atomic_bitops.txt F: Documentation/atomic_t.txt -F: Documentation/core-api/atomic_ops.rst F: Documentation/core-api/refcount-vs-atomic.rst F: Documentation/litmus-tests/ F: Documentation/memory-barriers.txt -- GitLab From 3d5c5fdcee0f9a94deb0472e594706018b00aa31 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 1 Jan 2021 09:38:52 +0100 Subject: [PATCH 1322/1894] ALSA: hda/hdmi: Fix incorrect mutex unlock in silent_stream_disable() The silent_stream_disable() function introduced by the commit b1a5039759cb ("ALSA: hda/hdmi: fix silent stream for first playback to DP") takes the per_pin->lock mutex, but it unlocks the wrong one, spec->pcm_lock, which causes a deadlock. This patch corrects it. Fixes: b1a5039759cb ("ALSA: hda/hdmi: fix silent stream for first playback to DP") Reported-by: Jan Alexander Steffens (heftig) Cc: Acked-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210101083852.12094-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 1e4a4b83fbf6f..74d246a0dc6de 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1733,7 +1733,7 @@ static void silent_stream_disable(struct hda_codec *codec, per_pin->silent_stream = false; unlock_out: - mutex_unlock(&spec->pcm_lock); + mutex_unlock(&per_pin->lock); } /* update ELD and jack state via audio component */ -- GitLab From cedd1862be7e666be87ec824dabc6a2b05618f36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Dec 2020 11:40:22 -0800 Subject: [PATCH 1323/1894] depmod: handle the case of /sbin/depmod without /sbin in PATH Commit 436e980e2ed5 ("kbuild: don't hardcode depmod path") stopped hard-coding the path of depmod, but in the process caused trouble for distributions that had that /sbin location, but didn't have it in the PATH (generally because /sbin is limited to the super-user path). Work around it for now by just adding /sbin to the end of PATH in the depmod.sh script. Reported-and-tested-by: Sedat Dilek Signed-off-by: Linus Torvalds --- scripts/depmod.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/depmod.sh b/scripts/depmod.sh index e083bcae343f3..3643b4f896ede 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -15,6 +15,8 @@ if ! test -r System.map ; then exit 0 fi +# legacy behavior: "depmod" in /sbin, no /sbin in PATH +PATH="$PATH:/sbin" if [ -z $(command -v $DEPMOD) ]; then echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2 echo "This is probably in the kmod package." >&2 -- GitLab From fd16931a2f518a32753920ff20895e5cf04c8ff1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 13 Dec 2020 15:39:29 +0100 Subject: [PATCH 1324/1894] crypto: arm/chacha-neon - add missing counter increment Commit 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples") refactored the chacha block handling in the glue code in a way that may result in the counter increment to be omitted when calling chacha_block_xor_neon() to process a full block. This violates the skcipher API, which requires that the output IV is suitable for handling more input as long as the preceding input has been presented in round multiples of the block size. Also, the same code is exposed via the chacha library interface whose callers may actually rely on this increment to occur even for final blocks that are smaller than the chacha block size. So increment the counter after calling chacha_block_xor_neon(). Fixes: 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples") Reported-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/chacha-glue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 7b5cf8430c6dc..cdde8fd01f8f9 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -60,6 +60,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, chacha_block_xor_neon(state, d, s, nrounds); if (d != dst) memcpy(dst, buf, bytes); + state[12]++; } } -- GitLab From 0aa171e9b267ce7c52d3a3df7bc9c1fc0203dec5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 2 Jan 2021 14:59:09 +0100 Subject: [PATCH 1325/1894] crypto: ecdh - avoid buffer overflow in ecdh_set_secret() Pavel reports that commit 17858b140bf4 ("crypto: ecdh - avoid unaligned accesses in ecdh_set_secret()") fixes one problem but introduces another: the unconditional memcpy() introduced by that commit may overflow the target buffer if the source data is invalid, which could be the result of intentional tampering. So check params.key_size explicitly against the size of the target buffer before validating the key further. Fixes: 17858b140bf4 ("crypto: ecdh - avoid unaligned accesses in ecdh_set_secret()") Reported-by: Pavel Machek Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/ecdh.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index d56b8603dec95..96f80c8f8e304 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -39,7 +39,8 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, struct ecdh params; unsigned int ndigits; - if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0) + if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0 || + params.key_size > sizeof(ctx->private_key)) return -EINVAL; ndigits = ecdh_supported_curve(params.curve_id); -- GitLab From 04901aab40ea3779f6fc6383ef74d8e130e817bf Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 30 Dec 2020 21:24:18 -0800 Subject: [PATCH 1326/1894] bpf: Fix a task_iter bug caused by a merge conflict resolution Latest bpf tree has a bug for bpf_iter selftest: $ ./test_progs -n 4/25 test_bpf_sk_storage_get:PASS:bpf_iter_bpf_sk_storage_helpers__open_and_load 0 nsec test_bpf_sk_storage_get:PASS:socket 0 nsec ... do_dummy_read:PASS:read 0 nsec test_bpf_sk_storage_get:FAIL:bpf_map_lookup_elem map value wasn't set correctly (expected 1792, got -1, err=0) #4/25 bpf_sk_storage_get:FAIL #4 bpf_iter:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 2 FAILED When doing merge conflict resolution, Commit 4bfc4714849d missed to save curr_task to seq_file private data. The task pointer in seq_file private data is passed to bpf program. This caused NULL-pointer task passed to bpf program which will immediately return upon checking whether task pointer is NULL. This patch added back the assignment of curr_task to seq_file private data and fixed the issue. Fixes: 4bfc4714849d ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201231052418.577024-1-yhs@fb.com --- kernel/bpf/task_iter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 3efe38191d1cf..175b7b42bfc46 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -159,6 +159,7 @@ again: } /* set info->task and info->tid */ + info->task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; } else { -- GitLab From b0e1306302018d876472ed074c1bfaa8020bf9df Mon Sep 17 00:00:00 2001 From: Timon Reinold Date: Sat, 2 Jan 2021 22:08:35 +0100 Subject: [PATCH 1327/1894] ALSA: usb-audio: Add quirk for RC-505 BOSS RC-505 (shown by lsusb as "Roland Corp. RC-505") does require the same quirk as these other BOSS devices. Without this quirk it is neither possible to capture audio from nor to write audio to the RC-505. Both just result in an empty audio stream. With these changes both capture and playback seem to work quite fine. MIDI funtionality was not tested. Tested-by: Harry Reinold Signed-off-by: Timon Reinold Link: https://lore.kernel.org/r/20210102210835.21268-1-tirei@agon.one Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 70b9777b2e639..931042a6a051e 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -74,6 +74,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { /* No quirk for playback but with capture quirk (see below) */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0130), /* BOSS BR-80 */ + IMPLICIT_FB_SKIP_DEV(0x0582, 0x0171), /* BOSS RC-505 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0189), /* BOSS GT-100v2 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6), /* BOSS GT-1 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8), /* BOSS Katana */ @@ -86,6 +87,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { /* Implicit feedback quirk table for capture: only FIXED type */ static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x0582, 0x0130, 0x0d, 0x01), /* BOSS BR-80 */ + IMPLICIT_FB_FIXED_DEV(0x0582, 0x0171, 0x0d, 0x01), /* BOSS RC-505 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x0189, 0x0d, 0x01), /* BOSS GT-100v2 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */ -- GitLab From 36a106a4c1c100d55ba3d32a21ef748cfcd4fa99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:42:39 +0100 Subject: [PATCH 1328/1894] block: rsxx: select CONFIG_CRC32 Without crc32, the driver fails to link: arm-linux-gnueabi-ld: drivers/block/rsxx/config.o: in function `rsxx_load_config': config.c:(.text+0x124): undefined reference to `crc32_le' Fixes: 8722ff8cdbfa ("block: IBM RamSan 70/80 device driver") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 262326973ee01..583b671b1d2d2 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -445,6 +445,7 @@ config BLK_DEV_RBD config BLK_DEV_RSXX tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" depends on PCI + select CRC32 help Device driver for IBM's high speed PCIe SSD storage device: Flash Adapter 900GB Full Height. -- GitLab From 19cd3403cb0d522dd5e10188eef85817de29e26e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:43:09 +0100 Subject: [PATCH 1329/1894] lightnvm: select CONFIG_CRC32 Without CRC32 support, this fails to link: arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_init': pblk-init.c:(.text+0x2654): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_exit': pblk-init.c:(.text+0x2a7c): undefined reference to `crc32_le' Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 8f39f9ba5c80e..4c2ce210c1237 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -19,6 +19,7 @@ if NVM config NVM_PBLK tristate "Physical Block Device Open-Channel SSD target" + select CRC32 help Allows an open-channel SSD to be exposed as a block device to the host. The target assumes the device exposes raw flash and must be -- GitLab From e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jan 2021 15:55:30 -0800 Subject: [PATCH 1330/1894] Linux 5.11-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3d328b7ab2003..8b2c3f88ee5ea 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 4f8b848788f77c7f5c3bd98febce66b7aa14785f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:43:52 +0100 Subject: [PATCH 1331/1894] zonefs: select CONFIG_CRC32 When CRC32 is disabled, zonefs cannot be linked: ld: fs/zonefs/super.o: in function `zonefs_fill_super': Add a Kconfig 'select' statement for it. Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal --- fs/zonefs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig index ef2697b78820d..827278f937fe7 100644 --- a/fs/zonefs/Kconfig +++ b/fs/zonefs/Kconfig @@ -3,6 +3,7 @@ config ZONEFS_FS depends on BLOCK depends on BLK_DEV_ZONED select FS_IOMAP + select CRC32 help zonefs is a simple file system which exposes zones of a zoned block device (e.g. host-managed or host-aware SMR disk drives) as files. -- GitLab From 5136bb8c8b5872676f397b27f93a30568baf3a25 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 19 Dec 2020 17:24:56 +0100 Subject: [PATCH 1332/1894] MAINTAINERS: adjust GCC PLUGINS after gcc-plugin.sh removal Commit 1e860048c53e ("gcc-plugins: simplify GCC plugin-dev capability test") removed ./scripts/gcc-plugin.sh, but missed to adjust MAINTAINERS. Hence, ./scripts/get_maintainers.pl --self-test=patterns warns: warning: no file matches F: scripts/gcc-plugin.sh Adjust entries in GGC PLUGINS section after this file removal. Signed-off-by: Lukas Bulwahn Signed-off-by: Masahiro Yamada --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6eff4f720c721..181c32c18aaf1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7363,7 +7363,6 @@ L: linux-hardening@vger.kernel.org S: Maintained F: Documentation/kbuild/gcc-plugins.rst F: scripts/Makefile.gcc-plugins -F: scripts/gcc-plugin.sh F: scripts/gcc-plugins/ GCOV BASED KERNEL PROFILING -- GitLab From d39648eb67ac851c7918c794424c266a5d2635b9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 19 Dec 2020 09:08:05 -0800 Subject: [PATCH 1333/1894] kconfig: config script: add a little user help Give the user a clue about the problem along with the 35 lines of usage/help text. Signed-off-by: Randy Dunlap Cc: Andi Kleen Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Signed-off-by: Masahiro Yamada --- scripts/config | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/config b/scripts/config index 8c8d7c3d7accc..ff88e2faefd35 100755 --- a/scripts/config +++ b/scripts/config @@ -223,6 +223,7 @@ while [ "$1" != "" ] ; do ;; *) + echo "bad command: $CMD" >&2 usage ;; esac -- GitLab From c0f975af1745391749e4306aa8081b9a4d2cced8 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Wed, 23 Dec 2020 14:04:23 +0900 Subject: [PATCH 1334/1894] kconfig: Support building mconf with vendor sysroot ncurses Changes the final fallback path in the ncurses locator for mconf to support host compilers with a non-default sysroot. This is similar to the hardcoded search for ncurses under '/usr/include', but can support compilers that keep their default header and library directories elsewhere. For nconfig, do nothing because the only vendor compiler I'm aware of with this layout (Apple Clang) ships an ncurses version that's too old for nconfig anyway. Signed-off-by: John Millikin Signed-off-by: Masahiro Yamada --- scripts/kconfig/mconf-cfg.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh index aa68ec95620d6..fcd4acd4e9cbc 100755 --- a/scripts/kconfig/mconf-cfg.sh +++ b/scripts/kconfig/mconf-cfg.sh @@ -33,7 +33,9 @@ if [ -f /usr/include/ncurses/ncurses.h ]; then exit 0 fi -if [ -f /usr/include/ncurses.h ]; then +# As a final fallback before giving up, check if $HOSTCC knows of a default +# ncurses installation (e.g. from a vendor-specific sysroot). +if echo '#include ' | "${HOSTCC}" -E - >/dev/null 2>&1; then echo cflags=\"-D_GNU_SOURCE\" echo libs=\"-lncurses\" exit 0 -- GitLab From 0c36d88cff4d72149f94809303c5180b6f716d39 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Wed, 23 Dec 2020 15:23:25 +0900 Subject: [PATCH 1335/1894] lib/raid6: Let $(UNROLL) rules work with macOS userland Older versions of BSD awk are fussy about the order of '-v' and '-f' flags, and require a space after the flag name. This causes build failures on platforms with an old awk, such as macOS and NetBSD. Since GNU awk and modern versions of BSD awk (distributed with FreeBSD/OpenBSD) are fine with either form, the definition of 'cmd_unroll' can be trivially tweaked to let the lib/raid6 Makefile work with both old and new awk flag dialects. Signed-off-by: John Millikin Signed-off-by: Masahiro Yamada --- lib/raid6/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index b4c0df6d706dc..c770570bfe4f2 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -48,7 +48,7 @@ endif endif quiet_cmd_unroll = UNROLL $@ - cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$* < $< > $@ + cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@ targets += int1.c int2.c int4.c int8.c int16.c int32.c $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE -- GitLab From 9bba03d4473df0b707224d4d2067b62d1e1e2a77 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 23 Dec 2020 15:35:42 +0900 Subject: [PATCH 1336/1894] kconfig: remove 'kvmconfig' and 'xenconfig' shorthands Linux 5.10 is out. Remove the 'kvmconfig' and 'xenconfig' shorthands as previously announced. Signed-off-by: Masahiro Yamada --- scripts/kconfig/Makefile | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index e46df0a2d4f9d..2c40e68853dde 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -94,16 +94,6 @@ configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/c $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles) $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig -PHONY += kvmconfig -kvmconfig: kvm_guest.config - @echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10" - @echo >&2 " Please use 'make $<' instead." - -PHONY += xenconfig -xenconfig: xen.config - @echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10" - @echo >&2 " Please use 'make $<' instead." - PHONY += tinyconfig tinyconfig: $(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config -- GitLab From d6c1ddd938d84a1adef7e19e8efc10e1b4df5034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 30 Dec 2020 16:25:34 +0100 Subject: [PATCH 1337/1894] USB: serial: option: add Quectel EM160R-GL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New modem using ff/ff/30 for QCDM, ff/00/00 for AT and NMEA, and ff/ff/ff for RMNET/QMI. T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0620 Rev= 4.09 S: Manufacturer=Quectel S: Product=EM160R-GL S: SerialNumber=e31cedc1 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork [ johan: add model comment ] Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2c21e34235bbb..ee726a1067066 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1117,6 +1117,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */ + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), -- GitLab From 42e85f90171a4ba59a1e1cedbbc30ce3f68f2317 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 15 Dec 2020 11:30:26 +0100 Subject: [PATCH 1338/1894] arm64/smp: Remove unused irq variable in arch_show_interrupts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/arm64/kernel/smp.c: In function ‘arch_show_interrupts’: arch/arm64/kernel/smp.c:808:16: warning: unused variable ‘irq’ [-Wunused-variable] 808 | unsigned int irq = irq_desc_get_irq(ipi_desc[i]); | ^~~ The removal of the last user forgot to remove the variable. Fixes: 5089bc51f81f ("arm64/smp: Use irq_desc_kstat_cpu() in arch_show_interrupts()") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201215103026.2872532-1-geert+renesas@glider.be Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6bc3a3698c3d1..376343d6f13ae 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -807,7 +807,6 @@ int arch_show_interrupts(struct seq_file *p, int prec) unsigned int cpu, i; for (i = 0; i < NR_IPI; i++) { - unsigned int irq = irq_desc_get_irq(ipi_desc[i]); seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) -- GitLab From b614231dec7864a338ce85032aa3d2d7ea2bc46d Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 2 Dec 2020 23:34:58 -0800 Subject: [PATCH 1339/1894] arm64: mte: remove an ISB on kernel exit This ISB is unnecessary because we will soon do an ERET. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I69f1ee6bb09b1372dd744a0e01cedaf090c8d448 Link: https://lore.kernel.org/r/20201203073458.2675400-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2a93fa5f4e49d..a8c3e7aaca749 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -182,7 +182,6 @@ alternative_else_nop_endif mrs_s \tmp2, SYS_GCR_EL1 bfi \tmp2, \tmp, #0, #16 msr_s SYS_GCR_EL1, \tmp2 - isb #endif .endm @@ -194,6 +193,7 @@ alternative_else_nop_endif ldr_l \tmp, gcr_kernel_excl mte_set_gcr \tmp, \tmp2 + isb 1: #endif .endm -- GitLab From 095507dc1350b3a2b8b39fdc05edba0c10859eca Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 18 Dec 2020 17:33:07 +0100 Subject: [PATCH 1340/1894] arm64: mm: Fix ARCH_LOW_ADDRESS_LIMIT when !CONFIG_ZONE_DMA Systems configured with CONFIG_ZONE_DMA32, CONFIG_ZONE_NORMAL and !CONFIG_ZONE_DMA will fail to properly setup ARCH_LOW_ADDRESS_LIMIT. The limit will default to ~0ULL, effectively spanning the whole memory, which is too high for a configuration that expects low memory to be capped at 4GB. Fix ARCH_LOW_ADDRESS_LIMIT by falling back to arm64_dma32_phys_limit when arm64_dma_phys_limit isn't set. arm64_dma32_phys_limit will honour CONFIG_ZONE_DMA32, or span the entire memory when not enabled. Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20201218163307.10150-1-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 3 ++- arch/arm64/mm/init.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ca2cd75d32863..69ad25fbeae4a 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -94,7 +94,8 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; -#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) +extern phys_addr_t arm64_dma32_phys_limit; +#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1) struct debug_info { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 75addb36354aa..7deddf56f7c3e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -59,7 +59,7 @@ EXPORT_SYMBOL(memstart_addr); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* -- GitLab From 26982a89cad77c0efc1c0c79bee0e3d75e9281d4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 21 Dec 2020 22:37:58 +0000 Subject: [PATCH 1341/1894] afs: Work around strnlen() oops with CONFIG_FORTIFIED_SOURCE=y AFS has a structured layout in its directory contents (AFS dirs are downloaded as files and parsed locally by the client for lookup/readdir). The slots in the directory are defined by union afs_xdr_dirent. This, however, only directly allows a name of a length that will fit into that union. To support a longer name, the next 1-8 contiguous entries are annexed to the first one and the name flows across these. afs_dir_iterate_block() uses strnlen(), limited to the space to the end of the page, to find out how long the name is. This worked fine until 6a39e62abbaf. With that commit, the compiler determines the size of the array and asserts that the string fits inside that array. This is a problem for AFS because we *expect* it to overflow one or more arrays. A similar problem also occurs in afs_dir_scan_block() when a directory file is being locally edited to avoid the need to redownload it. There strlen() was being used safely because each page has the last byte set to 0 when the file is downloaded and validated (in afs_dir_check_page()). Fix this by changing the afs_xdr_dirent union name field to an indeterminate-length array and dropping the overflow field. (Note that whilst looking at this, I realised that the calculation of the number of slots a dirent used is non-standard and not quite right, but I'll address that in a separate patch.) The issue can be triggered by something like: touch /afs/example.com/thisisaveryveryverylongname and it generates a report that looks like: detected buffer overflow in strnlen ------------[ cut here ]------------ kernel BUG at lib/string.c:1149! ... RIP: 0010:fortify_panic+0xf/0x11 ... Call Trace: afs_dir_iterate_block+0x12b/0x35b afs_dir_iterate+0x14e/0x1ce afs_do_lookup+0x131/0x417 afs_lookup+0x24f/0x344 lookup_open.isra.0+0x1bb/0x27d open_last_lookups+0x166/0x237 path_openat+0xe0/0x159 do_filp_open+0x48/0xa4 ? kmem_cache_alloc+0xf5/0x16e ? __clear_close_on_exec+0x13/0x22 ? _raw_spin_unlock+0xa/0xb do_sys_openat2+0x72/0xde do_sys_open+0x3b/0x58 do_syscall_64+0x2d/0x3a entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 6a39e62abbaf ("lib: string.h: detect intra-object overflow in fortified string functions") Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne cc: Daniel Axtens --- fs/afs/xdr_fs.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index 94f1f398eefad..c926430fd08ad 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -54,10 +54,15 @@ union afs_xdr_dirent { __be16 hash_next; __be32 vnode; __be32 unique; - u8 name[16]; - u8 overflow[4]; /* if any char of the name (inc - * NUL) reaches here, consume - * the next dirent too */ + u8 name[]; + /* When determining the number of dirent slots needed to + * represent a directory entry, name should be assumed to be 16 + * bytes, due to a now-standardised (mis)calculation, but it is + * in fact 20 bytes in size. + * + * For names longer than (16 or) 20 bytes, extra slots should + * be annexed to this one using the extended_name format. + */ } u; u8 extended_name[32]; } __packed; -- GitLab From 366911cd762db02c2dd32fad1be96b72a66f205d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 23 Dec 2020 10:39:57 +0000 Subject: [PATCH 1342/1894] afs: Fix directory entry size calculation The number of dirent records used by an AFS directory entry should be calculated using the assumption that there is a 16-byte name field in the first block, rather than a 20-byte name field (which is actually the case). This miscalculation is historic and effectively standard, so we have to use it. The calculation we need to use is: 1 + (((strlen(name) + 1) + 15) >> 5) where we are adding one to the strlen() result to account for the NUL termination. Fix this by the following means: (1) Create an inline function to do the calculation for a given name length. (2) Use the function to calculate the number of records used for a dirent in afs_dir_iterate_block(). Use this to move the over-end check out of the loop since it only needs to be done once. Further use this to only go through the loop for the 2nd+ records composing an entry. The only test there now is for if the record is allocated - and we already checked the first block at the top of the outer loop. (3) Add a max name length check in afs_dir_iterate_block(). (4) Make afs_edit_dir_add() and afs_edit_dir_remove() use the function from (1) to calculate the number of blocks rather than doing it incorrectly themselves. Fixes: 63a4681ff39c ("afs: Locally edit directory data for mkdir/create/unlink/...") Fixes: ^1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/dir.c | 49 ++++++++++++++++++++------------------ fs/afs/dir_edit.c | 6 ++--- fs/afs/xdr_fs.h | 14 ++++++++++- include/trace/events/afs.h | 2 ++ 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9068d5578a26f..7bd659ad959ec 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -350,7 +350,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, unsigned blkoff) { union afs_xdr_dirent *dire; - unsigned offset, next, curr; + unsigned offset, next, curr, nr_slots; size_t nlen; int tmp; @@ -363,13 +363,12 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, offset < AFS_DIR_SLOTS_PER_BLOCK; offset = next ) { - next = offset + 1; - /* skip entries marked unused in the bitmap */ if (!(block->hdr.bitmap[offset / 8] & (1 << (offset % 8)))) { _debug("ENT[%zu.%u]: unused", blkoff / sizeof(union afs_xdr_dir_block), offset); + next = offset + 1; if (offset >= curr) ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); @@ -381,35 +380,39 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, nlen = strnlen(dire->u.name, sizeof(*block) - offset * sizeof(union afs_xdr_dirent)); + if (nlen > AFSNAMEMAX - 1) { + _debug("ENT[%zu]: name too long (len %u/%zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, nlen); + return afs_bad(dvnode, afs_file_error_dir_name_too_long); + } _debug("ENT[%zu.%u]: %s %zu \"%s\"", blkoff / sizeof(union afs_xdr_dir_block), offset, (offset < curr ? "skip" : "fill"), nlen, dire->u.name); - /* work out where the next possible entry is */ - for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) { - if (next >= AFS_DIR_SLOTS_PER_BLOCK) { - _debug("ENT[%zu.%u]:" - " %u travelled beyond end dir block" - " (len %u/%zu)", - blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); - return afs_bad(dvnode, afs_file_error_dir_over_end); - } - if (!(block->hdr.bitmap[next / 8] & - (1 << (next % 8)))) { - _debug("ENT[%zu.%u]:" - " %u unmarked extension (len %u/%zu)", + nr_slots = afs_dir_calc_slots(nlen); + next = offset + nr_slots; + if (next > AFS_DIR_SLOTS_PER_BLOCK) { + _debug("ENT[%zu.%u]:" + " %u extends beyond end dir block" + " (len %zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, next, nlen); + return afs_bad(dvnode, afs_file_error_dir_over_end); + } + + /* Check that the name-extension dirents are all allocated */ + for (tmp = 1; tmp < nr_slots; tmp++) { + unsigned int ix = offset + tmp; + if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) { + _debug("ENT[%zu.u]:" + " %u unmarked extension (%u/%u)", blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); + offset, tmp, nr_slots); return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); } - - _debug("ENT[%zu.%u]: ext %u/%zu", - blkoff / sizeof(union afs_xdr_dir_block), - next, tmp, nlen); - next++; } /* skip if starts before the current position */ diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index 2ffe09abae7fc..f4600c1353adf 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -215,8 +215,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode, } /* Work out how many slots we're going to need. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); meta_page = kmap(page0); meta = &meta_page->blocks[0]; @@ -393,8 +392,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, } /* Work out how many slots we're going to discard. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); meta_page = kmap(page0); meta = &meta_page->blocks[0]; diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index c926430fd08ad..8ca8681645077 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -58,7 +58,8 @@ union afs_xdr_dirent { /* When determining the number of dirent slots needed to * represent a directory entry, name should be assumed to be 16 * bytes, due to a now-standardised (mis)calculation, but it is - * in fact 20 bytes in size. + * in fact 20 bytes in size. afs_dir_calc_slots() should be + * used for this. * * For names longer than (16 or) 20 bytes, extra slots should * be annexed to this one using the extended_name format. @@ -101,4 +102,15 @@ struct afs_xdr_dir_page { union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE]; }; +/* + * Calculate the number of dirent slots required for any given name length. + * The calculation is made assuming the part of the name in the first slot is + * 16 bytes, rather than 20, but this miscalculation is now standardised. + */ +static inline unsigned int afs_dir_calc_slots(size_t name_len) +{ + name_len++; /* NUL-terminated */ + return 1 + ((name_len + 15) / AFS_DIR_DIRENT_SIZE); +} + #endif /* XDR_FS_H */ diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 4eef374d4413e..4a5cc8c64be34 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -231,6 +231,7 @@ enum afs_file_error { afs_file_error_dir_bad_magic, afs_file_error_dir_big, afs_file_error_dir_missing_page, + afs_file_error_dir_name_too_long, afs_file_error_dir_over_end, afs_file_error_dir_small, afs_file_error_dir_unmarked_ext, @@ -488,6 +489,7 @@ enum afs_cb_break_reason { EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \ EM(afs_file_error_dir_big, "DIR_BIG") \ EM(afs_file_error_dir_missing_page, "DIR_MISSING_PAGE") \ + EM(afs_file_error_dir_name_too_long, "DIR_NAME_TOO_LONG") \ EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \ EM(afs_file_error_dir_small, "DIR_SMALL") \ EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \ -- GitLab From 0bd1bf86ab79555425b9f0b63005e181defe4da6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:57:29 +0100 Subject: [PATCH 1343/1894] dmaengine: qcom: fix gpi undefined behavior gcc points out an incorrect error handling loop: drivers/dma/qcom/gpi.c: In function 'gpi_ch_init': drivers/dma/qcom/gpi.c:1254:15: error: iteration 2 invokes undefined behavior [-Werror=aggressive-loop-optimizations] 1254 | struct gpii *gpii = gchan->gpii; | ^~~~ drivers/dma/qcom/gpi.c:1951:2: note: within this loop 1951 | for (i = i - 1; i >= 0; i++) { | ^~~ Change the loop to correctly walk backwards through the initialized fields rather than off into the woods. Fixes: 5d0c3533a19f ("dmaengine: qcom: Add GPI dma driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210103135738.3741123-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 556c070a514ca..1a0bf6b0567a5 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1948,7 +1948,7 @@ static int gpi_ch_init(struct gchan *gchan) return ret; error_start_chan: - for (i = i - 1; i >= 0; i++) { + for (i = i - 1; i >= 0; i--) { gpi_stop_chan(&gpii->gchan[i]); gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET); } -- GitLab From 99974aedbd73523969afb09f33c6e3047cd0ddae Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:00 +0530 Subject: [PATCH 1344/1894] dmaengine: xilinx_dma: check dma_async_device_register return value dma_async_device_register() can return non-zero error code. Add condition to check the return value of dma_async_device_register function and handle the error path. Addresses-Coverity: Event check_return. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-2-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 22faea653ea82..091f1f80dcff6 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -3112,7 +3112,11 @@ static int xilinx_dma_probe(struct platform_device *pdev) } /* Register the DMA engine with the core */ - dma_async_device_register(&xdev->common); + err = dma_async_device_register(&xdev->common); + if (err) { + dev_err(xdev->dev, "failed to register the dma device\n"); + goto error; + } err = of_dma_controller_register(node, of_dma_xilinx_xlate, xdev); -- GitLab From faeb0731be0a31e2246b21a85fa7dabbd750101d Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:01 +0530 Subject: [PATCH 1345/1894] dmaengine: xilinx_dma: fix incompatible param warning in _child_probe() In xilinx_dma_child_probe function, the nr_channels variable is passed to of_property_read_u32() which expects an u32 return value pointer. Modify the nr_channels variable type from int to u32 to fix the incompatible parameter coverity warning. Addresses-Coverity: Event incompatible_param. Fixes: 1a9e7a03c761 ("dmaengine: vdma: Add support for mulit-channel dma mode") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-3-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 091f1f80dcff6..3e374e8d22444 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2900,7 +2900,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, struct device_node *node) { - int ret, i, nr_channels = 1; + int ret, i; + u32 nr_channels = 1; ret = of_property_read_u32(node, "dma-channels", &nr_channels); if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0) -- GitLab From 2d5efea64472469117dc1a9a39530069e95b21e9 Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:02 +0530 Subject: [PATCH 1346/1894] dmaengine: xilinx_dma: fix mixed_enum_type coverity warning Typecast the fls(width -1) with (enum dmaengine_alignment) in xilinx_dma_chan_probe function to fix the coverity warning. Addresses-Coverity: Event mixed_enum_type. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-4-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 3e374e8d22444..79777550a6ffc 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2781,7 +2781,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, has_dre = false; if (!has_dre) - xdev->common.copy_align = fls(width - 1); + xdev->common.copy_align = (enum dmaengine_alignment)fls(width - 1); if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") || of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") || -- GitLab From 98bf2d3f4970179c702ef64db658e0553bc6ef3a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Dec 2020 07:11:18 +0000 Subject: [PATCH 1347/1894] powerpc/32s: Fix RTAS machine check with VMAP stack When we have VMAP stack, exception prolog 1 sets r1, not r11. When it is not an RTAS machine check, don't trash r1 because it is needed by prolog 1. Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU") Fixes: d2e006036082 ("powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Christophe Leroy [mpe: Squash in fixup for RTAS machine check from Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bc77d61d1c18940e456a2dee464f1e2eda65a3f0.1608621048.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 349bf3f0c3afa..858fbc8b19f32 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -260,10 +260,19 @@ __secondary_hold_acknowledge: MachineCheck: EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH2,r1 + mfspr r1, SPRN_SPRG_THREAD + lwz r1, RTAS_SP(r1) + cmpwi cr1, r1, 0 + bne cr1, 7f + mfspr r1, SPRN_SPRG_SCRATCH2 +#else mfspr r11, SPRN_SPRG_THREAD lwz r11, RTAS_SP(r11) cmpwi cr1, r11, 0 bne cr1, 7f +#endif #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 for_rtas=1 7: EXCEPTION_PROLOG_2 -- GitLab From c3d6eb6e54373f297313b65c1f2319d36914d579 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 30 Dec 2020 20:44:07 +0800 Subject: [PATCH 1348/1894] HID: multitouch: Enable multi-input for Synaptics pointstick/touchpad device Pointstick and its left/right buttons on HP EliteBook 850 G7 need multi-input quirk to work correctly. Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index d670bcd57bdef..0743ef51d3b24 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2054,6 +2054,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xce09) }, + /* TopSeed panels */ { .driver_data = MT_CLS_TOPSEED, MT_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, -- GitLab From 6170d077bf92c5b3dfbe1021688d3c0404f7c9e9 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Mon, 4 Jan 2021 09:29:09 +0800 Subject: [PATCH 1349/1894] spi: fix the divide by 0 error when calculating xfer waiting time The xfer waiting time is the result of xfer->len / xfer->speed_hz. This patch makes the assumption of 100khz xfer speed if the xfer->speed_hz is not assigned and stays 0. This avoids the divide by 0 issue and ensures a reasonable tolerant waiting time. Signed-off-by: Xu Yilun Link: https://lore.kernel.org/r/1609723749-3557-1-git-send-email-yilun.xu@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f59bf5094adb9..720ab34784c1d 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1108,6 +1108,7 @@ static int spi_transfer_wait(struct spi_controller *ctlr, { struct spi_statistics *statm = &ctlr->statistics; struct spi_statistics *stats = &msg->spi->statistics; + u32 speed_hz = xfer->speed_hz; unsigned long long ms; if (spi_controller_is_slave(ctlr)) { @@ -1116,8 +1117,11 @@ static int spi_transfer_wait(struct spi_controller *ctlr, return -EINTR; } } else { + if (!speed_hz) + speed_hz = 100000; + ms = 8LL * 1000LL * xfer->len; - do_div(ms, xfer->speed_hz); + do_div(ms, speed_hz); ms += ms + 200; /* some tolerance */ if (ms > UINT_MAX) -- GitLab From 2bf3a72b08e7f6356a2db9e1571ca65f683510bb Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 31 Dec 2020 15:23:45 +0300 Subject: [PATCH 1350/1894] dt-bindings: regulator: qcom,rpmh-regulator: add pm8009 revision PMIC pm8009 has special revision (P=1) made for sm8250 platform. The major difference is the S2 regulator which supplies 0.95 V instead of 2.848V. Add special compatibility string for this chip revision. The datasheet calls the chip just pm8009-1, so use the same name. Signed-off-by: Dmitry Baryshkov Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201231122348.637917-2-dmitry.baryshkov@linaro.org Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/qcom,rpmh-regulator.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt index b8f0b7809c021..7d462b899473a 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt @@ -44,6 +44,7 @@ First Level Nodes - PMIC Definition: Must be one of below: "qcom,pm8005-rpmh-regulators" "qcom,pm8009-rpmh-regulators" + "qcom,pm8009-1-rpmh-regulators" "qcom,pm8150-rpmh-regulators" "qcom,pm8150l-rpmh-regulators" "qcom,pm8350-rpmh-regulators" -- GitLab From df6b92fa40050e59ea89784294bf6d04c0c47705 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 31 Dec 2020 15:23:46 +0300 Subject: [PATCH 1351/1894] regulator: qcom-rpmh-regulator: correct hfsmps515 definition According to the datasheet pm8009's HFS515 regulators have 16mV resolution rather than declared 1.6 mV. Correct the resolution. Signed-off-by: Dmitry Baryshkov Fixes: 06369bcc15a1 ("regulator: qcom-rpmh: Add support for SM8150") Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201231122348.637917-3-dmitry.baryshkov@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index fe030ec4b7db4..c395a8dda6f7c 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -726,7 +726,7 @@ static const struct rpmh_vreg_hw_data pmic5_ftsmps510 = { static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = { .regulator_type = VRM, .ops = &rpmh_regulator_vrm_ops, - .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 1600), + .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 16000), .n_voltages = 5, .pmic_mode_map = pmic_mode_map_pmic5_smps, .of_map_mode = rpmh_regulator_pmic4_smps_of_map_mode, -- GitLab From d957d1610c661e758426654de3b04bea6fb29f8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 15:56:23 +0100 Subject: [PATCH 1352/1894] regulator: qcom-rpmh: add QCOM_COMMAND_DB dependency A built-in regulator driver cannot link against a modular cmd_db driver: qcom-rpmh-regulator.c:(.text+0x174): undefined reference to `cmd_db_read_addr' There is already a dependency for RPMh, so add another one of this type for cmd_db. Fixes: 34c5aa2666db ("regulator: Kconfig: Fix REGULATOR_QCOM_RPMH dependencies to avoid build error") Fixes: 46fc033eba42 ("regulator: add QCOM RPMh regulator driver") Signed-off-by: Arnd Bergmann Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201230145712.3133110-1-arnd@kernel.org Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 53fa84f4d1e19..5abdd29fb9f33 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -881,6 +881,7 @@ config REGULATOR_QCOM_RPM config REGULATOR_QCOM_RPMH tristate "Qualcomm Technologies, Inc. RPMh regulator driver" depends on QCOM_RPMH || (QCOM_RPMH=n && COMPILE_TEST) + depends on QCOM_COMMAND_DB || (QCOM_COMMAND_DB=n && COMPILE_TEST) help This driver supports control of PMIC regulators via the RPMh hardware block found on Qualcomm Technologies Inc. SoCs. RPMh regulator -- GitLab From 4b1a60a1bb8f03d82c3f6da424adc96667b59f2a Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Mon, 4 Jan 2021 15:50:43 +0200 Subject: [PATCH 1353/1894] MAINTAINERS: Update Georgi's email address Use my kernel.org email as main address to make things a bit easier for me to handle. Link: https://lore.kernel.org/r/20210104135043.31262-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..a15e306123ef8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9240,7 +9240,7 @@ F: tools/testing/selftests/sgx/* K: \bSGX_ INTERCONNECT API -M: Georgi Djakov +M: Georgi Djakov L: linux-pm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/interconnect/ -- GitLab From de30491e8bfeeba1500bba293333eb51ece529d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:53:55 +0100 Subject: [PATCH 1354/1894] HID: sfh: fix address space confusion The new driver uses a phys_addr_t to store a DMA address, which does not work when the two are different size: drivers/hid/amd-sfh-hid/amd_sfh_client.c:157:11: error: incompatible pointer types passing 'phys_addr_t *' (aka 'unsigned int *') to parameter of type 'dma_addr_t *' (aka 'unsigned long long *') [-Werror,-Wincompatible-pointer-types] &cl_data->sensor_phys_addr[i], ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:393:15: note: passing argument to parameter 'dma_handle' here dma_addr_t *dma_handle, gfp_t gfp) ^ Change both the type and the variable name to dma_addr for consistency. Fixes: 4b2c53d93a4b ("SFH:Transport Driver to add support of AMD Sensor Fusion Hub (SFH)") Signed-off-by: Arnd Bergmann Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 8 ++++---- drivers/hid/amd-sfh-hid/amd_sfh_hid.h | 2 +- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 2 +- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 3d1ccac5d99a3..2ab38b7153477 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -154,7 +154,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8, - &cl_data->sensor_phys_addr[i], + &cl_data->sensor_dma_addr[i], GFP_KERNEL); cl_data->sensor_sts[i] = 0; cl_data->sensor_requested_cnt[i] = 0; @@ -187,7 +187,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) } info.period = msecs_to_jiffies(AMD_SFH_IDLE_LOOP); info.sensor_idx = cl_idx; - info.phys_address = cl_data->sensor_phys_addr[i]; + info.dma_address = cl_data->sensor_dma_addr[i]; cl_data->report_descr[i] = kzalloc(cl_data->report_descr_sz[i], GFP_KERNEL); if (!cl_data->report_descr[i]) { @@ -212,7 +212,7 @@ cleanup: if (cl_data->sensor_virt_addr[i]) { dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int), cl_data->sensor_virt_addr[i], - cl_data->sensor_phys_addr[i]); + cl_data->sensor_dma_addr[i]); } kfree(cl_data->feature_report[i]); kfree(cl_data->input_report[i]); @@ -238,7 +238,7 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata) if (cl_data->sensor_virt_addr[i]) { dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int), cl_data->sensor_virt_addr[i], - cl_data->sensor_phys_addr[i]); + cl_data->sensor_dma_addr[i]); } } kfree(cl_data); diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h index 6be0783d885ce..d7eac1728e314 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h @@ -27,7 +27,7 @@ struct amdtp_cl_data { int hid_descr_size[MAX_HID_DEVICES]; phys_addr_t phys_addr_base; u32 *sensor_virt_addr[MAX_HID_DEVICES]; - phys_addr_t sensor_phys_addr[MAX_HID_DEVICES]; + dma_addr_t sensor_dma_addr[MAX_HID_DEVICES]; u32 sensor_sts[MAX_HID_DEVICES]; u32 sensor_requested_cnt[MAX_HID_DEVICES]; u8 report_type[MAX_HID_DEVICES]; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index a51c7b76283bb..dbac166416627 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -41,7 +41,7 @@ void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info i cmd_param.s.buf_layout = 1; cmd_param.s.buf_length = 16; - writeq(info.phys_address, privdata->mmio + AMD_C2P_MSG2); + writeq(info.dma_address, privdata->mmio + AMD_C2P_MSG2); writel(cmd_param.ul, privdata->mmio + AMD_C2P_MSG1); writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index e8be94f935b78..8f8d19b2cfe5b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -67,7 +67,7 @@ struct amd_mp2_dev { struct amd_mp2_sensor_info { u8 sensor_idx; u32 period; - phys_addr_t phys_address; + dma_addr_t dma_address; }; void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info); -- GitLab From 273435a1d4e5826f039625c23ba4fe9a09f24d75 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:41:44 +0100 Subject: [PATCH 1355/1894] HID: sony: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/hid/hid-sony.o: in function `sony_raw_event': hid-sony.c:(.text+0x8f4): undefined reference to `crc32_le' arm-linux-gnueabi-ld: hid-sony.c:(.text+0x900): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/hid/hid-sony.o:hid-sony.c:(.text+0x4408): more undefined references to `crc32_le' follow Signed-off-by: Arnd Bergmann Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 7bdda1b5b2217..09fa75a2b289e 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -899,6 +899,7 @@ config HID_SONY depends on NEW_LEDS depends on LEDS_CLASS select POWER_SUPPLY + select CRC32 help Support for -- GitLab From 0e2d6795e8dbe91c2f5473564c6b25d11df3778b Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sun, 27 Dec 2020 12:17:16 +0900 Subject: [PATCH 1356/1894] USB: serial: option: add LongSung M5710 module support Add a device-id entry for the LongSung M5710 module. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2df3 ProdID=9d03 Rev= 1.00 S: Manufacturer=Marvell S: Product=Mobile Composite Device Bus S: SerialNumber= C:* #Ifs= 5 Cfg#= 1 Atr=c0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0c(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0a(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniel Palmer https://lore.kernel.org/r/20201227031716.1343300-1-daniel@0x0f.com [ johan: drop id defines, only bind to vendor class ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ee726a1067066..3fe959104311b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2059,6 +2059,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ -- GitLab From 54d0a3ab80f49f19ee916def62fe067596833403 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 Jan 2021 15:50:07 +0100 Subject: [PATCH 1357/1894] USB: serial: iuu_phoenix: fix DMA from stack Stack-allocated buffers cannot be used for DMA (on all architectures) so allocate the flush command buffer using kmalloc(). Fixes: 60a8fc017103 ("USB: add iuu_phoenix driver") Cc: stable # 2.6.25 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/iuu_phoenix.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index f1201d4de2970..e8f06b41a5039 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -532,23 +532,29 @@ static int iuu_uart_flush(struct usb_serial_port *port) struct device *dev = &port->dev; int i; int status; - u8 rxcmd = IUU_UART_RX; + u8 *rxcmd; struct iuu_private *priv = usb_get_serial_port_data(port); if (iuu_led(port, 0xF000, 0, 0, 0xFF) < 0) return -EIO; + rxcmd = kmalloc(1, GFP_KERNEL); + if (!rxcmd) + return -ENOMEM; + + rxcmd[0] = IUU_UART_RX; + for (i = 0; i < 2; i++) { - status = bulk_immediate(port, &rxcmd, 1); + status = bulk_immediate(port, rxcmd, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_write error\n", __func__); - return status; + goto out_free; } status = read_immediate(port, &priv->len, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } if (priv->len > 0) { @@ -556,12 +562,16 @@ static int iuu_uart_flush(struct usb_serial_port *port) status = read_immediate(port, priv->buf, priv->len); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } } } dev_dbg(dev, "%s - uart_flush_read OK!\n", __func__); iuu_led(port, 0, 0xF000, 0, 0xFF); + +out_free: + kfree(rxcmd); + return status; } -- GitLab From 4bfd6247fa9164c8e193a55ef9c0ea3ee22f82d8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 4 Jan 2021 16:30:46 +0100 Subject: [PATCH 1358/1894] ALSA: hda/via: Fix runtime PM for Clevo W35xSS Clevo W35xSS_370SS with VIA codec has had the runtime PM problem that looses the power state of some nodes after the runtime resume. This was worked around by disabling the default runtime PM via a denylist entry. Since 5.10.x made the runtime PM applied (casually) even though it's disabled in the denylist, this problem was revisited. The result was that disabling power_save_node feature suffices for the runtime PM problem. This patch implements the disablement of power_save_node feature in VIA codec for the device. It also drops the former denylist entry, too, as the runtime PM should work in the codec side properly now. Fixes: b529ef2464ad ("ALSA: hda: Add Clevo W35xSS_370SS to the power_save blacklist") Reported-by: Christian Labisch Cc: Link: https://lore.kernel.org/r/20210104153046.19993-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 -- sound/pci/hda/patch_via.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 6852668f1bcb4..770ad25f1907c 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2220,8 +2220,6 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1849, 0x7662, "Asrock H81M-HDS", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), - /* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */ - SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1558, 0x6504, "Clevo W65_67SB", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 7ef8f3105cdb7..0ab40a8a68fb5 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1002,6 +1002,7 @@ static const struct hda_verb vt1802_init_verbs[] = { enum { VIA_FIXUP_INTMIC_BOOST, VIA_FIXUP_ASUS_G75, + VIA_FIXUP_POWER_SAVE, }; static void via_fixup_intmic_boost(struct hda_codec *codec, @@ -1011,6 +1012,13 @@ static void via_fixup_intmic_boost(struct hda_codec *codec, override_mic_boost(codec, 0x30, 0, 2, 40); } +static void via_fixup_power_save(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + codec->power_save_node = 0; +} + static const struct hda_fixup via_fixups[] = { [VIA_FIXUP_INTMIC_BOOST] = { .type = HDA_FIXUP_FUNC, @@ -1025,11 +1033,16 @@ static const struct hda_fixup via_fixups[] = { { } } }, + [VIA_FIXUP_POWER_SAVE] = { + .type = HDA_FIXUP_FUNC, + .v.func = via_fixup_power_save, + }, }; static const struct snd_pci_quirk vt2002p_fixups[] = { SND_PCI_QUIRK(0x1043, 0x1487, "Asus G75", VIA_FIXUP_ASUS_G75), SND_PCI_QUIRK(0x1043, 0x8532, "Asus X202E", VIA_FIXUP_INTMIC_BOOST), + SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", VIA_FIXUP_POWER_SAVE), {} }; -- GitLab From 020a1f453449294926ca548d8d5ca970926e8dfd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 Jan 2021 15:53:02 +0100 Subject: [PATCH 1359/1894] USB: usblp: fix DMA to stack Stack-allocated buffers cannot be used for DMA (on all architectures). Replace the HP-channel macro with a helper function that allocates a dedicated transfer buffer so that it can continue to be used with arguments from the stack. Note that the buffer is cleared on allocation as usblp_ctrl_msg() returns success also on short transfers (the buffer is only used for debugging). Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210104145302.2087-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 67cbd42421bee..134dc2005ce97 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -274,8 +274,25 @@ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, i #define usblp_reset(usblp)\ usblp_ctrl_msg(usblp, USBLP_REQ_RESET, USB_TYPE_CLASS, USB_DIR_OUT, USB_RECIP_OTHER, 0, NULL, 0) -#define usblp_hp_channel_change_request(usblp, channel, buffer) \ - usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, channel, buffer, 1) +static int usblp_hp_channel_change_request(struct usblp *usblp, int channel, u8 *new_channel) +{ + u8 *buf; + int ret; + + buf = kzalloc(1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, + USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, + channel, buf, 1); + if (ret == 0) + *new_channel = buf[0]; + + kfree(buf); + + return ret; +} /* * See the description for usblp_select_alts() below for the usage -- GitLab From 718bf42b119de652ebcc93655a1f33a9c0d04b3c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 23:13:09 -0800 Subject: [PATCH 1360/1894] usb: usbip: vhci_hcd: protect shift size Fix shift out-of-bounds in vhci_hcd.c: UBSAN: shift-out-of-bounds in ../drivers/usb/usbip/vhci_hcd.c:399:41 shift exponent 768 is too large for 32-bit type 'int' Fixes: 03cd00d538a6 ("usbip: vhci-hcd: Set the vhci structure up to work") Signed-off-by: Randy Dunlap Reported-by: syzbot+297d20e437b79283bf6d@syzkaller.appspotmail.com Cc: Yuyang Du Cc: Shuah Khan Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: stable Link: https://lore.kernel.org/r/20201229071309.18418-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 66cde5e5f7964..3209b5ddd30c9 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -396,6 +396,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n", wValue); + if (wValue >= 32) + goto error; vhci_hcd->port_status[rhport] &= ~(1 << wValue); break; } -- GitLab From a1383b3537a7bea1c213baa7878ccc4ecf4413b5 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 29 Dec 2020 15:00:37 -0800 Subject: [PATCH 1361/1894] usb: dwc3: gadget: Restart DWC3 gadget when enabling pullup usb_gadget_deactivate/usb_gadget_activate does not execute the UDC start operation, which may leave EP0 disabled and event IRQs disabled when re-activating the function. Move the enabling/disabling of USB EP0 and device event IRQs to be performed in the pullup routine. Fixes: ae7e86108b12 ("usb: dwc3: Stop active transfers before halting the controller") Tested-by: Michael Tretter Cc: stable Reported-by: Michael Tretter Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1609282837-21666-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 78cb4db8a6e45..25f654b79e480 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2083,6 +2083,7 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend) static void dwc3_gadget_disable_irq(struct dwc3 *dwc); static void __dwc3_gadget_stop(struct dwc3 *dwc); +static int __dwc3_gadget_start(struct dwc3 *dwc); static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) { @@ -2145,6 +2146,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) % dwc->ev_buf->length; } + } else { + __dwc3_gadget_start(dwc); } ret = dwc3_gadget_run_stop(dwc, is_on, false); @@ -2319,10 +2322,6 @@ static int dwc3_gadget_start(struct usb_gadget *g, } dwc->gadget_driver = driver; - - if (pm_runtime_active(dwc->dev)) - __dwc3_gadget_start(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); return 0; @@ -2348,13 +2347,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g) unsigned long flags; spin_lock_irqsave(&dwc->lock, flags); - - if (pm_runtime_suspended(dwc->dev)) - goto out; - - __dwc3_gadget_stop(dwc); - -out: dwc->gadget_driver = NULL; spin_unlock_irqrestore(&dwc->lock, flags); -- GitLab From 64e6bbfff52db4bf6785fab9cffab850b2de6870 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Tue, 29 Dec 2020 18:53:35 +0800 Subject: [PATCH 1362/1894] usb: gadget: configfs: Fix use-after-free issue with udc_name There is a use-after-free issue, if access udc_name in function gadget_dev_desc_UDC_store after another context free udc_name in function unregister_gadget. Context 1: gadget_dev_desc_UDC_store()->unregister_gadget()-> free udc_name->set udc_name to NULL Context 2: gadget_dev_desc_UDC_show()-> access udc_name Call trace: dump_backtrace+0x0/0x340 show_stack+0x14/0x1c dump_stack+0xe4/0x134 print_address_description+0x78/0x478 __kasan_report+0x270/0x2ec kasan_report+0x10/0x18 __asan_report_load1_noabort+0x18/0x20 string+0xf4/0x138 vsnprintf+0x428/0x14d0 sprintf+0xe4/0x12c gadget_dev_desc_UDC_show+0x54/0x64 configfs_read_file+0x210/0x3a0 __vfs_read+0xf0/0x49c vfs_read+0x130/0x2b4 SyS_read+0x114/0x208 el0_svc_naked+0x34/0x38 Add mutex_lock to protect this kind of scenario. Signed-off-by: Eddie Hung Signed-off-by: Macpaul Lin Reviewed-by: Peter Chen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1609239215-21819-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 56051bb973498..d9743f4b56c34 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -221,9 +221,16 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct config_item *item, static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page) { - char *udc_name = to_gadget_info(item)->composite.gadget_driver.udc_name; + struct gadget_info *gi = to_gadget_info(item); + char *udc_name; + int ret; + + mutex_lock(&gi->lock); + udc_name = gi->composite.gadget_driver.udc_name; + ret = sprintf(page, "%s\n", udc_name ?: ""); + mutex_unlock(&gi->lock); - return sprintf(page, "%s\n", udc_name ?: ""); + return ret; } static int unregister_gadget(struct gadget_info *gi) -- GitLab From 7043e311a57625467b6fdb032dec8a6dea878208 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 30 Dec 2020 13:11:14 +0800 Subject: [PATCH 1363/1894] usb: gadget: core: change the comment for usb_gadget_connect The pullup does not need to be enabled at below situations: - For platforms which the hardware pullup starts after setting register even they do not see the VBUS. If the pullup is always enabled for these platforms, it will consume more power and break the USB IF compliance tests [1]. - For platforms which need to do BC 1.2 charger detection after seeing the VBUS. Pullup D+ will break the charger detection flow. - For platforms which the system suspend is allowed when the connection with host is there but the bus is not at suspend. For these platforms, it is better to disable pullup when entering the system suspend otherwise the host may confuse the device behavior after controller is in low power mode. Disable pullup is considered as a disconnection event from host side. [1] USB-IF Full and Low Speed Compliance Test Procedure F Back-voltage Testing Section 7.2.1 of the USB specification requires that no device shall supply (source) current on VBUS at its upstream facing port at any time. From VBUS on its upstream facing port, a device may only draw (sink) current. They may not provide power to the pull-up resistor on D+/D- unless VBUS is present (see Section 7.1.5). Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/20201230051114.21370-1-peter.chen@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 5b5cfeb6c14a6..6a62bbd01324f 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -659,8 +659,7 @@ EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect); * * Enables the D+ (or potentially D-) pullup. The host will start * enumerating this gadget when the pullup is active and a VBUS session - * is active (the link is powered). This pullup is always enabled unless - * usb_gadget_disconnect() has been used to disable it. + * is active (the link is powered). * * Returns zero on success, else negative errno. */ -- GitLab From d7889c2020e08caab0d7e36e947f642d91015bd0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:42:17 +0100 Subject: [PATCH 1364/1894] usb: gadget: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_eem.o: in function `eem_unwrap': f_eem.c:(.text+0x11cc): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_ncm.o:f_ncm.c:(.text+0x1e40): more undefined references to `crc32_le' follow Fixes: 6d3865f9d41f ("usb: gadget: NCM: Add transmit multi-frame.") Signed-off-by: Arnd Bergmann Cc: stable Link: https://lore.kernel.org/r/20210103214224.1996535-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 7e47e6223089c..2d152571a7de8 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -265,6 +265,7 @@ config USB_CONFIGFS_NCM depends on NET select USB_U_ETHER select USB_F_NCM + select CRC32 help NCM is an advanced protocol for Ethernet encapsulation, allows grouping of several ethernet frames into one USB transfer and @@ -314,6 +315,7 @@ config USB_CONFIGFS_EEM depends on NET select USB_U_ETHER select USB_F_EEM + select CRC32 help CDC EEM is a newer USB standard that is somewhat simpler than CDC ECM and therefore can be supported by more hardware. Technically ECM and -- GitLab From 6cd0fe91387917be48e91385a572a69dfac2f3f7 Mon Sep 17 00:00:00 2001 From: Chandana Kishori Chiluveru Date: Tue, 29 Dec 2020 14:44:43 -0800 Subject: [PATCH 1365/1894] usb: gadget: configfs: Preserve function ordering after bind failure When binding the ConfigFS gadget to a UDC, the functions in each configuration are added in list order. However, if usb_add_function() fails, the failed function is put back on its configuration's func_list and purge_configs_funcs() is called to further clean up. purge_configs_funcs() iterates over the configurations and functions in forward order, calling unbind() on each of the previously added functions. But after doing so, each function gets moved to the tail of the configuration's func_list. This results in reshuffling the original order of the functions within a configuration such that the failed function now appears first even though it may have originally appeared in the middle or even end of the list. At this point if the ConfigFS gadget is attempted to re-bind to the UDC, the functions will be added in a different order than intended, with the only recourse being to remove and relink the functions all over again. An example of this as follows: ln -s functions/mass_storage.0 configs/c.1 ln -s functions/ncm.0 configs/c.1 ln -s functions/ffs.adb configs/c.1 # oops, forgot to start adbd echo "" > UDC # fails start adbd echo "" > UDC # now succeeds, but... # bind order is # "ADB", mass_storage, ncm [30133.118289] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520 [30133.119875] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520 [30133.119974] using random self ethernet address [30133.120002] using random host ethernet address [30133.139604] usb0: HOST MAC 3e:27:46:ba:3e:26 [30133.140015] usb0: MAC 6e:28:7e:42:66:6a [30133.140062] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520 [30133.140081] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 --> -19 [30133.140098] configfs-gadget gadget: unbind function 'Mass Storage Function'/ffffff810af87200 [30133.140119] configfs-gadget gadget: unbind function 'cdc_network'/ffffff80f48d1a00 [30133.173201] configfs-gadget a600000.dwc3: failed to start g1: -19 [30136.661933] init: starting service 'adbd'... [30136.700126] read descriptors [30136.700413] read strings [30138.574484] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520 [30138.575497] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520 [30138.575554] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520 [30138.575631] using random self ethernet address [30138.575660] using random host ethernet address [30138.595338] usb0: HOST MAC 2e:cf:43:cd:ca:c8 [30138.597160] usb0: MAC 6a:f0:9f:ee:82:a0 [30138.791490] configfs-gadget gadget: super-speed config #1: c Fix this by reversing the iteration order of the functions in purge_config_funcs() when unbinding them, and adding them back to the config's func_list at the head instead of the tail. This ensures that we unbind and unwind back to the original list order. Fixes: 88af8bbe4ef7 ("usb: gadget: the start of the configfs interface") Signed-off-by: Chandana Kishori Chiluveru Signed-off-by: Jack Pham Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20201229224443.31623-1-jackp@codeaurora.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index d9743f4b56c34..408a5332a975b 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1255,9 +1255,9 @@ static void purge_configs_funcs(struct gadget_info *gi) cfg = container_of(c, struct config_usb_cfg, c); - list_for_each_entry_safe(f, tmp, &c->functions, list) { + list_for_each_entry_safe_reverse(f, tmp, &c->functions, list) { - list_move_tail(&f->list, &cfg->func_list); + list_move(&f->list, &cfg->func_list); if (f->unbind) { dev_dbg(&gi->cdev.gadget->dev, "unbind function '%s'/%p\n", -- GitLab From e1263f9277bad198c2acc8092a41aea1edbea0e4 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 4 Jan 2021 15:20:45 +0100 Subject: [PATCH 1366/1894] dmaengine: stm32-mdma: fix STM32_MDMA_VERY_HIGH_PRIORITY value STM32_MDMA_VERY_HIGH_PRIORITY is b11 not 0x11, so fix it with 0x3. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20210104142045.25583-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index e4637ec786d39..36ba8b43e78de 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -199,7 +199,7 @@ #define STM32_MDMA_MAX_CHANNELS 63 #define STM32_MDMA_MAX_REQUESTS 256 #define STM32_MDMA_MAX_BURST 128 -#define STM32_MDMA_VERY_HIGH_PRIORITY 0x11 +#define STM32_MDMA_VERY_HIGH_PRIORITY 0x3 enum stm32_mdma_trigger_mode { STM32_MDMA_BUFFER, -- GitLab From 65a4e5299739abe0888cda0938d21f8ea3b5c606 Mon Sep 17 00:00:00 2001 From: David Gow Date: Mon, 21 Dec 2020 23:39:00 -0800 Subject: [PATCH 1367/1894] kunit: tool: Force the use of the 'tty' console for UML kunit_tool relies on the UML console outputting printk() output to the tty in order to get results. Since the default console driver could change, pass 'console=tty' to the kernel. This is triggered by a change[1] to use ttynull as a fallback console driver which -- by chance or by design -- seems to have changed the default console output on UML, breaking kunit_tool. While this may be fixed, we should be less fragile to such changes in the default. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=757055ae8dedf5333af17b3b5b4b70ba9bc9da4e Signed-off-by: David Gow Fixes: 757055ae8ded ("init/console: Use ttynull as a fallback when there is no console") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 57c1724b7e5da..698358c9c0d60 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -198,7 +198,7 @@ class LinuxSourceTree(object): return self.validate_config(build_dir) def run_kernel(self, args=[], build_dir='', timeout=None): - args.extend(['mem=1G']) + args.extend(['mem=1G', 'console=tty']) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) -- GitLab From 3b4cf848dad5dad4bf239ba664c809c8cf29f1ed Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:31:40 +0100 Subject: [PATCH 1368/1894] selftests/vDSO: add additional binaries to .gitignore Add the test binaries introduced by commit 693f5ca08ca0 ("kselftest: Extend vDSO selftest"), commit 03f55c7952c9 ("kselftest: Extend vDSO selftest to clock_getres") and commit c7e5789b24d3 ("kselftest: Move test_vdso to the vDSO test suite") to .gitignore. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 5eb64d41e5419..a8dc51af5a9c0 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test +vdso_test_abi +vdso_test_clock_getres +vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu vdso_standalone_test_x86 -- GitLab From df00d02989024d193a6efd1a85513a5658c6a10f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:32:35 +0100 Subject: [PATCH 1369/1894] selftests/vDSO: fix -Wformat warning in vdso_test_correctness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following -Wformat warnings in vdso_test_correctness.c: vdso_test_correctness.c: In function ‘test_one_clock_gettime64’: vdso_test_correctness.c:352:21: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 3 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, | ~~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:32: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 5 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, 354 | (unsigned long long)vdso.tv_sec, vdso.tv_nsec, | ~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:43: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 7 has type ‘long long int’ [-Wformat=] The tv_sec member of __kernel_timespec is long long, both in uapi/linux/time_types.h and locally in vdso_test_correctness.c. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_correctness.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 5029ef9b228c3..c4aea794725a7 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -349,7 +349,7 @@ static void test_one_clock_gettime64(int clock, const char *name) return; } - printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + printf("\t%llu.%09lld %llu.%09lld %llu.%09lld\n", (unsigned long long)start.tv_sec, start.tv_nsec, (unsigned long long)vdso.tv_sec, vdso.tv_nsec, (unsigned long long)end.tv_sec, end.tv_nsec); -- GitLab From 7a6eb7c34a78498742b5f82543b7a68c1c443329 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Dec 2020 18:52:33 +0000 Subject: [PATCH 1370/1894] selftests: Skip BPF seftests by default The BPF selftests have build time dependencies on cutting edge versions of tools in the BPF ecosystem including LLVM which are more involved to satisfy than more typical requirements like installing a package from your distribution. This causes issues for users looking at kselftest in as a whole who find that a default build of kselftest fails and that resolving this is time consuming and adds administrative overhead. The fast pace of BPF development and the need for a full BPF stack to do substantial development or validation work on the code mean that people working directly on it don't see a reasonable way to keep supporting older environments without causing problems with the usability of the BPF tests in BPF development so these requirements are unlikely to be relaxed in the immediate future. There is already support for skipping targets so in order to reduce the barrier to entry for people interested in kselftest as a whole let's use that to skip the BPF tests by default when people work with the top level kselftest build system. Users can still build the BPF selftests as part of the wider kselftest build by specifying SKIP_TARGETS, including setting an empty SKIP_TARGETS to build everything. They can also continue to build the BPF selftests individually in cases where they are specifically focused on BPF. This isn't ideal since it means people will need to take special steps to build the BPF tests but the dependencies mean that realistically this is already the case to some extent and it makes it easier for people to pick up and work with the other selftests which is hopefully a net win. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index afbab4aeef3c8..8a917cb4426a0 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -77,8 +77,10 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug -# User can optionally provide a TARGETS skiplist. -SKIP_TARGETS ?= +# User can optionally provide a TARGETS skiplist. By default we skip +# BPF since it has cutting edge build time dependencies which require +# more effort to install. +SKIP_TARGETS ?= bpf ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) -- GitLab From 8cbebc4118b5933b3ae6351ceb433f75ac6b7c6b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Jan 2021 16:50:16 +0000 Subject: [PATCH 1371/1894] KVM: arm64: Replace KVM_ARM_PMU with HW_PERF_EVENTS KVM_ARM_PMU only existed for the benefit of 32bit ARM hosts, and makes no sense now that we are 64bit only. Get rid of it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/Kconfig | 8 -------- arch/arm64/kvm/Makefile | 2 +- include/kvm/arm_pmu.h | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 043756db8f6ec..3964acf5451ea 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -49,14 +49,6 @@ if KVM source "virt/kvm/Kconfig" -config KVM_ARM_PMU - bool "Virtual Performance Monitoring Unit (PMU) support" - depends on HW_PERF_EVENTS - default y - help - Adds support for a virtual Performance Monitoring Unit (PMU) in - virtual machines. - endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 60fd181df6243..13b017284bf96 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,4 +24,4 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o +kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index fc85f50fa0e96..8dcb3e1477bc9 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,7 +13,7 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) -#ifdef CONFIG_KVM_ARM_PMU +#ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ -- GitLab From 0b884fe71f9ee6a5df35e677154256ea2099ebb8 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 14 Dec 2020 12:58:50 +0800 Subject: [PATCH 1372/1894] i2c: sprd: use a specific timeout to avoid system hang up issue If the i2c device SCL bus being pulled up due to some exception before message transfer done, the system cannot receive the completing interrupt signal any more, it would not exit waiting loop until MAX_SCHEDULE_TIMEOUT jiffies eclipse, that would make the system seemed hang up. To avoid that happen, this patch adds a specific timeout for message transfer. Fixes: 8b9ec0719834 ("i2c: Add Spreadtrum I2C controller driver") Signed-off-by: Linhua Xu Signed-off-by: Chunyan Zhang [wsa: changed errno to ETIMEDOUT] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sprd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 19cda6742423d..2917fecf6c80d 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -72,6 +72,8 @@ /* timeout (ms) for pm runtime autosuspend */ #define SPRD_I2C_PM_TIMEOUT 1000 +/* timeout (ms) for transfer message */ +#define I2C_XFER_TIMEOUT 1000 /* SPRD i2c data structure */ struct sprd_i2c { @@ -244,6 +246,7 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, struct i2c_msg *msg, bool is_last_msg) { struct sprd_i2c *i2c_dev = i2c_adap->algo_data; + unsigned long time_left; i2c_dev->msg = msg; i2c_dev->buf = msg->buf; @@ -273,7 +276,10 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, sprd_i2c_opt_start(i2c_dev); - wait_for_completion(&i2c_dev->complete); + time_left = wait_for_completion_timeout(&i2c_dev->complete, + msecs_to_jiffies(I2C_XFER_TIMEOUT)); + if (!time_left) + return -ETIMEDOUT; return i2c_dev->err; } -- GitLab From 0b3ea2a06de1f52ea30865e227e109a5fd3b6214 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 21 Dec 2020 14:42:25 +0100 Subject: [PATCH 1373/1894] i2c: i801: Fix the i2c-mux gpiod_lookup_table not being properly terminated gpiod_add_lookup_table() expects the gpiod_lookup_table->table passed to it to be terminated with a zero-ed out entry. So we need to allocate one more entry then we will use. Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing") Signed-off-by: Hans de Goede Reviewed-by: Mika Westerberg Acked-by: Jean Delvare Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ae90713443fa6..877fe3733a42b 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1449,7 +1449,7 @@ static int i801_add_mux(struct i801_priv *priv) /* Register GPIO descriptor lookup table */ lookup = devm_kzalloc(dev, - struct_size(lookup, table, mux_config->n_gpios), + struct_size(lookup, table, mux_config->n_gpios + 1), GFP_KERNEL); if (!lookup) return -ENOMEM; -- GitLab From cc07d72bf350b77faeffee1c37bc52197171473f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 24 Sep 2020 13:14:52 -0400 Subject: [PATCH 1374/1894] dm raid: fix discard limits for raid1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Block core warned that discard_granularity was 0 for dm-raid with personality of raid1. Reason is that raid_io_hints() was incorrectly special-casing raid1 rather than raid0. Fix raid_io_hints() by removing discard limits settings for raid1. Check for raid0 instead. Fixes: 61697a6abd24a ("dm: eliminate 'split_discard_bios' flag from DM target interface") Cc: stable@vger.kernel.org Reported-by: Zdenek Kabelac Reported-by: Mikulas Patocka Reported-by: Stephan Bärwolf Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 23c38777e8f63..cab12b2251bac 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3729,10 +3729,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); /* - * RAID1 and RAID10 personalities require bio splitting, - * RAID0/4/5/6 don't and process large discard bios properly. + * RAID0 and RAID10 personalities require bio splitting, + * RAID1/4/5/6 don't and process large discard bios properly. */ - if (rs_is_raid1(rs) || rs_is_raid10(rs)) { + if (rs_is_raid0(rs) || rs_is_raid10(rs)) { limits->discard_granularity = chunk_size_bytes; limits->max_discard_sectors = rs->md.chunk_sectors; } -- GitLab From f7b347acb5f6c29d9229bb64893d8b6a2c7949fb Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Mon, 14 Dec 2020 18:18:11 +0100 Subject: [PATCH 1375/1894] dm integrity: select CRYPTO_SKCIPHER The integrity target relies on skcipher for encryption/decryption, but certain kernel configurations may not enable CRYPTO_SKCIPHER, leading to compilation errors due to unresolved symbols. Explicitly select CRYPTO_SKCIPHER for DM_INTEGRITY, since it is unconditionally dependent on it. Signed-off-by: Anthony Iliopoulos Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b7e2d96666142..a378a89670949 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -605,6 +605,7 @@ config DM_INTEGRITY select BLK_DEV_INTEGRITY select DM_BUFIO select CRYPTO + select CRYPTO_SKCIPHER select ASYNC_XOR help This device-mapper target emulates a block device that has -- GitLab From b690bd546b227c32b860dae985a18bed8aa946fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:40:51 +0100 Subject: [PATCH 1376/1894] dm zoned: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/md/dm-zoned-metadata.o: in function `dmz_write_sb': dm-zoned-metadata.c:(.text+0xe98): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/md/dm-zoned-metadata.o: in function `dmz_check_sb': dm-zoned-metadata.c:(.text+0x7978): undefined reference to `crc32_le' Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Signed-off-by: Arnd Bergmann Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index a378a89670949..9e44c09f64108 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -623,6 +623,7 @@ config DM_ZONED tristate "Drive-managed zoned block device target support" depends on BLK_DEV_DM depends on BLK_DEV_ZONED + select CRC32 help This device-mapper target takes a host-managed or host-aware zoned block device and exposes most of its capacity as a regular block -- GitLab From 8abec36d1274bbd5ae8f36f3658b9abb3db56c31 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 4 Jan 2021 14:59:47 +0000 Subject: [PATCH 1377/1894] dm crypt: do not wait for backlogged crypto request completion in softirq Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. When Crypto API backlogs a crypto request, dm-crypt uses wait_for_completion to avoid sending further requests to an already overloaded crypto driver. However, if the code is executing in softirq context, we might get the following stacktrace: [ 210.235213][ C0] BUG: scheduling while atomic: fio/2602/0x00000102 [ 210.236701][ C0] Modules linked in: [ 210.237566][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.239292][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.241233][ C0] Call Trace: [ 210.241946][ C0] [ 210.242561][ C0] dump_stack+0x7d/0xa3 [ 210.243466][ C0] __schedule_bug.cold+0xb3/0xc2 [ 210.244539][ C0] __schedule+0x156f/0x20d0 [ 210.245518][ C0] ? io_schedule_timeout+0x140/0x140 [ 210.246660][ C0] schedule+0xd0/0x270 [ 210.247541][ C0] schedule_timeout+0x1fb/0x280 [ 210.248586][ C0] ? usleep_range+0x150/0x150 [ 210.249624][ C0] ? unpoison_range+0x3a/0x60 [ 210.250632][ C0] ? ____kasan_kmalloc.constprop.0+0x82/0xa0 [ 210.251949][ C0] ? unpoison_range+0x3a/0x60 [ 210.252958][ C0] ? __prepare_to_swait+0xa7/0x190 [ 210.254067][ C0] do_wait_for_common+0x2ab/0x370 [ 210.255158][ C0] ? usleep_range+0x150/0x150 [ 210.256192][ C0] ? bit_wait_io_timeout+0x160/0x160 [ 210.257358][ C0] ? blk_update_request+0x757/0x1150 [ 210.258582][ C0] ? _raw_spin_lock_irq+0x82/0xd0 [ 210.259674][ C0] ? _raw_read_unlock_irqrestore+0x30/0x30 [ 210.260917][ C0] wait_for_completion+0x4c/0x90 [ 210.261971][ C0] crypt_convert+0x19a6/0x4c00 [ 210.263033][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.264193][ C0] ? kasan_set_track+0x1c/0x30 [ 210.265191][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.266283][ C0] ? kmem_cache_free+0x104/0x470 [ 210.267363][ C0] ? crypt_endio+0x91/0x180 [ 210.268327][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.269565][ C0] blk_update_request+0x757/0x1150 [ 210.270563][ C0] blk_mq_end_request+0x4b/0x480 [ 210.271680][ C0] blk_done_softirq+0x21d/0x340 [ 210.272775][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.273847][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.275031][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.276182][ C0] __do_softirq+0x190/0x611 [ 210.277203][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.278340][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.279514][ C0] [ 210.280164][ C0] do_softirq_own_stack+0x37/0x40 [ 210.281281][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.282286][ C0] common_interrupt+0x74/0x120 [ 210.283376][ C0] asm_common_interrupt+0x1e/0x40 [ 210.284496][ C0] RIP: 0010:_aesni_enc1+0x65/0xb0 Fix this by making crypt_convert function reentrant from the point of a single bio and make dm-crypt defer further bio processing to a workqueue, if Crypto API backlogs a request in interrupt context. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 103 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53791138d78bf..527b1475b7a07 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1529,13 +1529,19 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ * Encrypt / decrypt data from one bio to another one (can be the same one) */ static blk_status_t crypt_convert(struct crypt_config *cc, - struct convert_context *ctx, bool atomic) + struct convert_context *ctx, bool atomic, bool reset_pending) { unsigned int tag_offset = 0; unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; int r; - atomic_set(&ctx->cc_pending, 1); + /* + * if reset_pending is set we are dealing with the bio for the first time, + * else we're continuing to work on the previous bio, so don't mess with + * the cc_pending counter + */ + if (reset_pending) + atomic_set(&ctx->cc_pending, 1); while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { @@ -1553,7 +1559,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc, * but the driver request queue is full, let's wait. */ case -EBUSY: - wait_for_completion(&ctx->restart); + if (in_interrupt()) { + if (try_wait_for_completion(&ctx->restart)) { + /* + * we don't have to block to wait for completion, + * so proceed + */ + } else { + /* + * we can't wait for completion without blocking + * exit and continue processing in a workqueue + */ + ctx->r.req = NULL; + ctx->cc_sector += sector_step; + tag_offset++; + return BLK_STS_DEV_RESOURCE; + } + } else { + wait_for_completion(&ctx->restart); + } reinit_completion(&ctx->restart); fallthrough; /* @@ -1945,6 +1969,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc, } } +static void kcryptd_crypt_write_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + struct convert_context *ctx = &io->ctx; + int crypt_finished; + sector_t sector = io->sector; + blk_status_t r; + + wait_for_completion(&ctx->restart); + reinit_completion(&ctx->restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + crypt_finished = atomic_dec_and_test(&ctx->cc_pending); + if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) { + /* Wait for completion signaled by kcryptd_async_done() */ + wait_for_completion(&ctx->restart); + crypt_finished = 1; + } + + /* Encryption was already finished, submit io now */ + if (crypt_finished) { + kcryptd_crypt_write_io_submit(io, 0); + io->sector = sector; + } + + crypt_dec_pending(io); +} + static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -1973,7 +2028,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); r = crypt_convert(cc, ctx, - test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + * (TODO: is it actually possible to be in softirq in the write path?) + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_write_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; crypt_finished = atomic_dec_and_test(&ctx->cc_pending); @@ -1998,6 +2063,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) crypt_dec_pending(io); } +static void kcryptd_crypt_read_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + blk_status_t r; + + wait_for_completion(&io->ctx.restart); + reinit_completion(&io->ctx.restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + + if (atomic_dec_and_test(&io->ctx.cc_pending)) + kcryptd_crypt_read_done(io); + + crypt_dec_pending(io); +} + static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2009,7 +2093,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) io->sector); r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_read_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; -- GitLab From d68b29584c25dbacd01ed44a3e45abb35353f1de Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 4 Jan 2021 14:59:48 +0000 Subject: [PATCH 1378/1894] dm crypt: use GFP_ATOMIC when allocating crypto requests from softirq Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. In this case sometimes when allocating a new crypto request we may get a stacktrace like below: [ 210.103008][ C0] BUG: sleeping function called from invalid context at mm/mempool.c:381 [ 210.104746][ C0] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2602, name: fio [ 210.106599][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.108331][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.110212][ C0] Call Trace: [ 210.110921][ C0] [ 210.111527][ C0] dump_stack+0x7d/0xa3 [ 210.112411][ C0] ___might_sleep.cold+0x122/0x151 [ 210.113527][ C0] mempool_alloc+0x16b/0x2f0 [ 210.114524][ C0] ? __queue_work+0x515/0xde0 [ 210.115553][ C0] ? mempool_resize+0x700/0x700 [ 210.116586][ C0] ? crypt_endio+0x91/0x180 [ 210.117479][ C0] ? blk_update_request+0x757/0x1150 [ 210.118513][ C0] ? blk_mq_end_request+0x4b/0x480 [ 210.119572][ C0] ? blk_done_softirq+0x21d/0x340 [ 210.120628][ C0] ? __do_softirq+0x190/0x611 [ 210.121626][ C0] crypt_convert+0x29f9/0x4c00 [ 210.122668][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.123824][ C0] ? kasan_set_track+0x1c/0x30 [ 210.124858][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.125930][ C0] ? kmem_cache_free+0x104/0x470 [ 210.126973][ C0] ? crypt_endio+0x91/0x180 [ 210.127947][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.129165][ C0] blk_update_request+0x757/0x1150 [ 210.130231][ C0] blk_mq_end_request+0x4b/0x480 [ 210.131294][ C0] blk_done_softirq+0x21d/0x340 [ 210.132332][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.133289][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.134399][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.135458][ C0] __do_softirq+0x190/0x611 [ 210.136409][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.137447][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.138507][ C0] [ 210.139118][ C0] do_softirq_own_stack+0x37/0x40 [ 210.140191][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.141151][ C0] common_interrupt+0x74/0x120 [ 210.142171][ C0] asm_common_interrupt+0x1e/0x40 Fix this by allocating crypto requests with GFP_ATOMIC mask in interrupt context. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Reported-by: Maciej S. Szmigiero Signed-off-by: Ignat Korchagin Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 527b1475b7a07..d2d6d3000f5bd 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, static void kcryptd_async_done(struct crypto_async_request *async_req, int error); -static void crypt_alloc_req_skcipher(struct crypt_config *cc, +static int crypt_alloc_req_skcipher(struct crypt_config *cc, struct convert_context *ctx) { unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); - if (!ctx->r.req) - ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + } skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); @@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, skcipher_request_set_callback(ctx->r.req, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); + + return 0; } -static void crypt_alloc_req_aead(struct crypt_config *cc, +static int crypt_alloc_req_aead(struct crypt_config *cc, struct convert_context *ctx) { - if (!ctx->r.req_aead) - ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + } aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]); @@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, aead_request_set_callback(ctx->r.req_aead, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); + + return 0; } -static void crypt_alloc_req(struct crypt_config *cc, +static int crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { if (crypt_integrity_aead(cc)) - crypt_alloc_req_aead(cc, ctx); + return crypt_alloc_req_aead(cc, ctx); else - crypt_alloc_req_skcipher(cc, ctx); + return crypt_alloc_req_skcipher(cc, ctx); } static void crypt_free_req_skcipher(struct crypt_config *cc, @@ -1545,7 +1555,12 @@ static blk_status_t crypt_convert(struct crypt_config *cc, while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { - crypt_alloc_req(cc, ctx); + r = crypt_alloc_req(cc, ctx); + if (r) { + complete(&ctx->restart); + return BLK_STS_DEV_RESOURCE; + } + atomic_inc(&ctx->cc_pending); if (crypt_integrity_aead(cc)) -- GitLab From a0a6df9afcaf439a6b4c88a3b522e3d05fdef46f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Jan 2021 15:25:34 -0500 Subject: [PATCH 1379/1894] umount(2): move the flag validity checks first Unfortunately, there's userland code that used to rely upon these checks being done before anything else to check for UMOUNT_NOFOLLOW support. That broke in 41525f56e256 ("fs: refactor ksys_umount"). Separate those from the rest of checks and move them to ksys_umount(); unlike everything else in there, this can be sanely done there. Reported-by: Sargun Dhillon Fixes: 41525f56e256 ("fs: refactor ksys_umount") Signed-off-by: Al Viro --- fs/namespace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d2db7dfe232b3..9d33909d0f9e3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1713,8 +1713,6 @@ static int can_umount(const struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); - if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) - return -EINVAL; if (!may_mount()) return -EPERM; if (path->dentry != path->mnt->mnt_root) @@ -1728,6 +1726,7 @@ static int can_umount(const struct path *path, int flags) return 0; } +// caller is responsible for flags being sane int path_umount(struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); @@ -1749,6 +1748,10 @@ static int ksys_umount(char __user *name, int flags) struct path path; int ret; + // basic validity checks done first + if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) + return -EINVAL; + if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); -- GitLab From d9e44981739a96f1a468c13bbbd54ace378caf1c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 28 Dec 2020 15:21:36 +0000 Subject: [PATCH 1380/1894] bareudp: set NETIF_F_LLTX flag Like other tunneling interfaces, the bareudp doesn't need TXLOCK. So, It is good to set the NETIF_F_LLTX flag to improve performance and to avoid lockdep's false-positive warning. Test commands: ip netns add A ip netns add B ip link add veth0 netns A type veth peer name veth1 netns B ip netns exec A ip link set veth0 up ip netns exec A ip a a 10.0.0.1/24 dev veth0 ip netns exec B ip link set veth1 up ip netns exec B ip a a 10.0.0.2/24 dev veth1 for i in {2..1} do let A=$i-1 ip netns exec A ip link add bareudp$i type bareudp \ dstport $i ethertype ip ip netns exec A ip link set bareudp$i up ip netns exec A ip a a 10.0.$i.1/24 dev bareudp$i ip netns exec A ip r a 10.0.$i.2 encap ip src 10.0.$A.1 \ dst 10.0.$A.2 via 10.0.$i.2 dev bareudp$i ip netns exec B ip link add bareudp$i type bareudp \ dstport $i ethertype ip ip netns exec B ip link set bareudp$i up ip netns exec B ip a a 10.0.$i.2/24 dev bareudp$i ip netns exec B ip r a 10.0.$i.1 encap ip src 10.0.$A.2 \ dst 10.0.$A.1 via 10.0.$i.1 dev bareudp$i done ip netns exec A ping 10.0.2.2 Splat looks like: [ 96.992803][ T822] ============================================ [ 96.993954][ T822] WARNING: possible recursive locking detected [ 96.995102][ T822] 5.10.0+ #819 Not tainted [ 96.995927][ T822] -------------------------------------------- [ 96.997091][ T822] ping/822 is trying to acquire lock: [ 96.998083][ T822] ffff88810f753898 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960 [ 96.999813][ T822] [ 96.999813][ T822] but task is already holding lock: [ 97.001192][ T822] ffff88810c385498 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960 [ 97.002908][ T822] [ 97.002908][ T822] other info that might help us debug this: [ 97.004401][ T822] Possible unsafe locking scenario: [ 97.004401][ T822] [ 97.005784][ T822] CPU0 [ 97.006407][ T822] ---- [ 97.007010][ T822] lock(_xmit_NONE#2); [ 97.007779][ T822] lock(_xmit_NONE#2); [ 97.008550][ T822] [ 97.008550][ T822] *** DEADLOCK *** [ 97.008550][ T822] [ 97.010057][ T822] May be due to missing lock nesting notation [ 97.010057][ T822] [ 97.011594][ T822] 7 locks held by ping/822: [ 97.012426][ T822] #0: ffff888109a144f0 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x12f7/0x2b00 [ 97.014191][ T822] #1: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: ip_finish_output2+0x249/0x2020 [ 97.016045][ T822] #2: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x1fd/0x2960 [ 97.017897][ T822] #3: ffff88810c385498 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960 [ 97.019684][ T822] #4: ffffffffbce2f600 (rcu_read_lock){....}-{1:2}, at: bareudp_xmit+0x31b/0x3690 [bareudp] [ 97.021573][ T822] #5: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: ip_finish_output2+0x249/0x2020 [ 97.023424][ T822] #6: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x1fd/0x2960 [ 97.025259][ T822] [ 97.025259][ T822] stack backtrace: [ 97.026349][ T822] CPU: 3 PID: 822 Comm: ping Not tainted 5.10.0+ #819 [ 97.027609][ T822] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 97.029407][ T822] Call Trace: [ 97.030015][ T822] dump_stack+0x99/0xcb [ 97.030783][ T822] __lock_acquire.cold.77+0x149/0x3a9 [ 97.031773][ T822] ? stack_trace_save+0x81/0xa0 [ 97.032661][ T822] ? register_lock_class+0x1910/0x1910 [ 97.033673][ T822] ? register_lock_class+0x1910/0x1910 [ 97.034679][ T822] ? rcu_read_lock_sched_held+0x91/0xc0 [ 97.035697][ T822] ? rcu_read_lock_bh_held+0xa0/0xa0 [ 97.036690][ T822] lock_acquire+0x1b2/0x730 [ 97.037515][ T822] ? __dev_queue_xmit+0x1f52/0x2960 [ 97.038466][ T822] ? check_flags+0x50/0x50 [ 97.039277][ T822] ? netif_skb_features+0x296/0x9c0 [ 97.040226][ T822] ? validate_xmit_skb+0x29/0xb10 [ 97.041151][ T822] _raw_spin_lock+0x30/0x70 [ 97.041977][ T822] ? __dev_queue_xmit+0x1f52/0x2960 [ 97.042927][ T822] __dev_queue_xmit+0x1f52/0x2960 [ 97.043852][ T822] ? netdev_core_pick_tx+0x290/0x290 [ 97.044824][ T822] ? mark_held_locks+0xb7/0x120 [ 97.045712][ T822] ? lockdep_hardirqs_on_prepare+0x12c/0x3e0 [ 97.046824][ T822] ? __local_bh_enable_ip+0xa5/0xf0 [ 97.047771][ T822] ? ___neigh_create+0x12a8/0x1eb0 [ 97.048710][ T822] ? trace_hardirqs_on+0x41/0x120 [ 97.049626][ T822] ? ___neigh_create+0x12a8/0x1eb0 [ 97.050556][ T822] ? __local_bh_enable_ip+0xa5/0xf0 [ 97.051509][ T822] ? ___neigh_create+0x12a8/0x1eb0 [ 97.052443][ T822] ? check_chain_key+0x244/0x5f0 [ 97.053352][ T822] ? rcu_read_lock_bh_held+0x56/0xa0 [ 97.054317][ T822] ? ip_finish_output2+0x6ea/0x2020 [ 97.055263][ T822] ? pneigh_lookup+0x410/0x410 [ 97.056135][ T822] ip_finish_output2+0x6ea/0x2020 [ ... ] Acked-by: Guillaume Nault Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20201228152136.24215-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 85ebd2b7e4461..aea10196c222c 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -534,6 +534,7 @@ static void bareudp_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &bareudp_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; -- GitLab From 10ad3e998fa0c25315f27cf3002ff8b02dc31c38 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 28 Dec 2020 15:21:46 +0000 Subject: [PATCH 1381/1894] bareudp: Fix use of incorrect min_headroom size In the bareudp6_xmit_skb(), it calculates min_headroom. At that point, it uses struct iphdr, but it's not correct. So panic could occur. The struct ipv6hdr should be used. Test commands: ip netns add A ip netns add B ip link add veth0 netns A type veth peer name veth1 netns B ip netns exec A ip link set veth0 up ip netns exec A ip a a 2001:db8:0::1/64 dev veth0 ip netns exec B ip link set veth1 up ip netns exec B ip a a 2001:db8:0::2/64 dev veth1 for i in {10..1} do let A=$i-1 ip netns exec A ip link add bareudp$i type bareudp dstport $i \ ethertype 0x86dd ip netns exec A ip link set bareudp$i up ip netns exec A ip -6 a a 2001:db8:$i::1/64 dev bareudp$i ip netns exec A ip -6 r a 2001:db8:$i::2 encap ip6 src \ 2001:db8:$A::1 dst 2001:db8:$A::2 via 2001:db8:$i::2 \ dev bareudp$i ip netns exec B ip link add bareudp$i type bareudp dstport $i \ ethertype 0x86dd ip netns exec B ip link set bareudp$i up ip netns exec B ip -6 a a 2001:db8:$i::2/64 dev bareudp$i ip netns exec B ip -6 r a 2001:db8:$i::1 encap ip6 src \ 2001:db8:$A::2 dst 2001:db8:$A::1 via 2001:db8:$i::1 \ dev bareudp$i done ip netns exec A ping 2001:db8:7::2 Splat looks like: [ 66.436679][ C2] skbuff: skb_under_panic: text:ffffffff928614c8 len:454 put:14 head:ffff88810abb4000 data:ffff88810abb3ffa tail:0x1c0 end:0x3ec0 dev:veth0 [ 66.441626][ C2] ------------[ cut here ]------------ [ 66.443458][ C2] kernel BUG at net/core/skbuff.c:109! [ 66.445313][ C2] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 66.447606][ C2] CPU: 2 PID: 913 Comm: ping Not tainted 5.10.0+ #819 [ 66.450251][ C2] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 66.453713][ C2] RIP: 0010:skb_panic+0x15d/0x15f [ 66.455345][ C2] Code: 98 fe 4c 8b 4c 24 10 53 8b 4d 70 45 89 e0 48 c7 c7 60 8b 78 93 41 57 41 56 41 55 48 8b 54 24 20 48 8b 74 24 28 e8 b5 40 f9 ff <0f> 0b 48 8b 6c 24 20 89 34 24 e8 08 c9 98 fe 8b 34 24 48 c7 c1 80 [ 66.462314][ C2] RSP: 0018:ffff888119209648 EFLAGS: 00010286 [ 66.464281][ C2] RAX: 0000000000000089 RBX: ffff888003159000 RCX: 0000000000000000 [ 66.467216][ C2] RDX: 0000000000000089 RSI: 0000000000000008 RDI: ffffed10232412c0 [ 66.469768][ C2] RBP: ffff88810a53d440 R08: ffffed102328018d R09: ffffed102328018d [ 66.472297][ C2] R10: ffff888119400c67 R11: ffffed102328018c R12: 000000000000000e [ 66.474833][ C2] R13: ffff88810abb3ffa R14: 00000000000001c0 R15: 0000000000003ec0 [ 66.477361][ C2] FS: 00007f37c0c72f00(0000) GS:ffff888119200000(0000) knlGS:0000000000000000 [ 66.480214][ C2] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 66.482296][ C2] CR2: 000055a058808570 CR3: 000000011039e002 CR4: 00000000003706e0 [ 66.484811][ C2] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 66.487793][ C2] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 66.490424][ C2] Call Trace: [ 66.491469][ C2] [ 66.492374][ C2] ? eth_header+0x28/0x190 [ 66.494054][ C2] ? eth_header+0x28/0x190 [ 66.495401][ C2] skb_push.cold.99+0x22/0x22 [ 66.496700][ C2] eth_header+0x28/0x190 [ 66.497867][ C2] neigh_resolve_output+0x3de/0x720 [ 66.499615][ C2] ? __neigh_update+0x7e8/0x20a0 [ 66.501176][ C2] __neigh_update+0x8bd/0x20a0 [ 66.502749][ C2] ndisc_update+0x34/0xc0 [ 66.504010][ C2] ndisc_recv_na+0x8da/0xb80 [ 66.505041][ C2] ? pndisc_redo+0x20/0x20 [ 66.505888][ C2] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 66.506965][ C2] ndisc_rcv+0x3a0/0x470 [ 66.507797][ C2] icmpv6_rcv+0xad9/0x1b00 [ 66.508645][ C2] ip6_protocol_deliver_rcu+0xcd6/0x1560 [ 66.509719][ C2] ip6_input_finish+0x5b/0xf0 [ 66.510615][ C2] ip6_input+0xcd/0x2d0 [ 66.511406][ C2] ? ip6_input_finish+0xf0/0xf0 [ 66.512327][ C2] ? rcu_read_lock_held+0x91/0xa0 [ 66.513279][ C2] ? ip6_protocol_deliver_rcu+0x1560/0x1560 [ 66.514414][ C2] ipv6_rcv+0xe8/0x300 [ ... ] Acked-by: Guillaume Nault Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20201228152146.24270-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index aea10196c222c..708171c0d6281 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -380,7 +380,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, goto free_dst; min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + - BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct ipv6hdr); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) -- GitLab From 01e31bea7e622f1890c274f4aaaaf8bccd296aa5 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Tue, 29 Dec 2020 10:01:48 +0800 Subject: [PATCH 1382/1894] vhost_net: fix ubuf refcount incorrectly when sendmsg fails Currently the vhost_zerocopy_callback() maybe be called to decrease the refcount when sendmsg fails in tun. The error handling in vhost handle_tx_zerocopy() will try to decrease the same refcount again. This is wrong. To fix this issue, we only call vhost_net_ubuf_put() when vq->heads[nvq->desc].len == VHOST_DMA_IN_PROGRESS. Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support") Signed-off-by: Yunjian Wang Acked-by: Willem de Bruijn Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://lore.kernel.org/r/1609207308-20544-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski --- drivers/vhost/net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 531a00d703cdf..c8784dfafdd73 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -863,6 +863,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) size_t len, total_len = 0; int err; struct vhost_net_ubuf_ref *ubufs; + struct ubuf_info *ubuf; bool zcopy_used; int sent_pkts = 0; @@ -895,9 +896,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { - struct ubuf_info *ubuf; ubuf = nvq->ubuf_info + nvq->upend_idx; - vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; @@ -927,7 +926,8 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { - vhost_net_ubuf_put(ubufs); + if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) + vhost_net_ubuf_put(ubufs); nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } -- GitLab From 17e94567c57df3d9609e6bacaed9247c4f2629e2 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 29 Dec 2020 11:08:38 +0200 Subject: [PATCH 1383/1894] docs: networking: packet_mmap: fix formatting for C macros The citation of macro definitions should appear in a code block. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/5cb47005e7a59b64299e038827e295822193384c.1609232919.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- Documentation/networking/packet_mmap.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst index 6c009ceb11832..f3646c80b019c 100644 --- a/Documentation/networking/packet_mmap.rst +++ b/Documentation/networking/packet_mmap.rst @@ -437,7 +437,7 @@ and the following flags apply: Capture process ^^^^^^^^^^^^^^^ - from include/linux/if_packet.h +From include/linux/if_packet.h:: #define TP_STATUS_COPY (1 << 1) #define TP_STATUS_LOSING (1 << 2) -- GitLab From e4da63cda51f17fa1e86a10e84d47d692932530d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 29 Dec 2020 11:08:39 +0200 Subject: [PATCH 1384/1894] docs: networking: packet_mmap: fix old config reference Before commit 889b8f964f2f ("packet: Kill CONFIG_PACKET_MMAP.") there used to be a CONFIG_PACKET_MMAP config symbol that depended on CONFIG_PACKET. The text still implies that PACKET_MMAP can be disabled. Remove that from the text, as well as reference to old kernel versions. Also, drop reference to broken link to information for pre 2.6.5 kernels. Make a slight working improvement (s/In/On/) while at it. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/80089f3783372c8fd7833f28ce774a171b2ef252.1609232919.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- Documentation/networking/packet_mmap.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst index f3646c80b019c..500ef60b1b821 100644 --- a/Documentation/networking/packet_mmap.rst +++ b/Documentation/networking/packet_mmap.rst @@ -8,7 +8,7 @@ Abstract ======== This file documents the mmap() facility available with the PACKET -socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for +socket interface. This type of sockets is used for i) capture network traffic with utilities like tcpdump, ii) transmit network traffic, or any other that needs raw @@ -25,12 +25,12 @@ Please send your comments to Why use PACKET_MMAP =================== -In Linux 2.4/2.6/3.x if PACKET_MMAP is not enabled, the capture process is very +Non PACKET_MMAP capture process (plain AF_PACKET) is very inefficient. It uses very limited buffers and requires one system call to capture each packet, it requires two if you want to get packet's timestamp (like libpcap always does). -In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size +On the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size configurable circular buffer mapped in user space that can be used to either send or receive packets. This way reading packets just needs to wait for them, most of the time there is no need to issue a single system call. Concerning @@ -252,8 +252,7 @@ PACKET_MMAP setting constraints In kernel versions prior to 2.4.26 (for the 2.4 branch) and 2.6.5 (2.6 branch), the PACKET_MMAP buffer could hold only 32768 frames in a 32 bit architecture or -16384 in a 64 bit architecture. For information on these kernel versions -see http://pusa.uv.es/~ulisses/packet_mmap/packet_mmap.pre-2.4.26_2.6.5.txt +16384 in a 64 bit architecture. Block size limit ---------------- -- GitLab From 862aecbd9569e563b979c0e23a908b43cda4b0b9 Mon Sep 17 00:00:00 2001 From: YANG LI Date: Wed, 30 Dec 2020 15:23:14 +0800 Subject: [PATCH 1385/1894] ibmvnic: fix: NULL pointer dereference. The error is due to dereference a null pointer in function reset_one_sub_crq_queue(): if (!scrq) { netdev_dbg(adapter->netdev, "Invalid scrq reset. irq (%d) or msgs(%p).\n", scrq->irq, scrq->msgs); return -EINVAL; } If the expression is true, scrq must be a null pointer and cannot dereference. Fixes: 9281cf2d5840 ("ibmvnic: avoid memset null scrq msgs") Signed-off-by: YANG LI Reported-by: Abaci Acked-by: Lijun Pan Link: https://lore.kernel.org/r/1609312994-121032-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 91ebcde30292c..9778c83150f1c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2981,9 +2981,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, int rc; if (!scrq) { - netdev_dbg(adapter->netdev, - "Invalid scrq reset. irq (%d) or msgs (%p).\n", - scrq->irq, scrq->msgs); + netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); return -EINVAL; } -- GitLab From 1d0d561ad1d7606bb745c1ed9478e7206860e56e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 4 Jan 2021 10:38:02 +0000 Subject: [PATCH 1386/1894] net: macb: Correct usage of MACB_CAPS_CLK_HW_CHG flag A new flag MACB_CAPS_CLK_HW_CHG was added and all callers of macb_set_tx_clk were gated on the presence of this flag. - if (!clk) + if (!bp->tx_clk || !(bp->caps & MACB_CAPS_CLK_HW_CHG)) However the flag was not added to anything other than the new sama7g5_gem, turning that function call into a no op for all other systems. This breaks the networking on Zynq. The commit message adding this states: a new capability so that macb_set_tx_clock() to not be called for IPs having this capability This strongly implies that present of the flag was intended to skip the function not absence of the flag. Update the if statement to this effect, which repairs the existing users. Fixes: daafa1d33cc9 ("net: macb: add capability to not set the clock rate") Suggested-by: Andrew Lunn Signed-off-by: Charles Keepax Reviewed-by: Claudiu Beznea Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210104103802.13091-1-ckeepax@opensource.cirrus.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index d5d910916c2e8..814a5b10141d1 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -467,7 +467,7 @@ static void macb_set_tx_clk(struct macb *bp, int speed) { long ferr, rate, rate_rounded; - if (!bp->tx_clk || !(bp->caps & MACB_CAPS_CLK_HW_CHG)) + if (!bp->tx_clk || (bp->caps & MACB_CAPS_CLK_HW_CHG)) return; switch (speed) { -- GitLab From 2ff2c7e274392871bfdee00ff2adbb8ebae5d240 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 30 Dec 2020 13:42:51 +0200 Subject: [PATCH 1387/1894] selftests: mlxsw: Set headroom size of correct port The test was setting the headroom size of the wrong port. This was not visible because of a firmware bug that canceled this bug. Set the headroom size of the correct port, so that the test will pass with both old and new firmware versions. Fixes: bfa804784e32 ("selftests: mlxsw: Add a PFC test") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20201230114251.394009-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 4d900bc1f76c6..5c7700212f753 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -230,7 +230,7 @@ switch_create() __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null # bridges # ------- -- GitLab From cfd82dfc9799c53ef109343a23af006a0f6860a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 30 Dec 2020 16:24:51 +0100 Subject: [PATCH 1388/1894] net: usb: qmi_wwan: add Quectel EM160R-GL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New modem using ff/ff/30 for QCDM, ff/00/00 for AT and NMEA, and ff/ff/ff for RMNET/QMI. T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0620 Rev= 4.09 S: Manufacturer=Quectel S: Product=EM160R-GL S: SerialNumber=e31cedc1 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Bjørn Mork Link: https://lore.kernel.org/r/20201230152451.245271-1-bjorn@mork.no Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d166c321ee9b6..af19513a9f75b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1013,6 +1013,7 @@ static const struct usb_device_id products[] = { {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)}, /* Quectel EM160R-GL */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ -- GitLab From e80bd76fbf563cc7ed8c9e9f3bbcdf59b0897f69 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 30 Dec 2020 19:33:34 +0100 Subject: [PATCH 1389/1894] r8169: work around power-saving bug on some chip versions A user reported failing network with RTL8168dp (a quite rare chip version). Realtek confirmed that few chip versions suffer from a PLL power-down hw bug. Fixes: 07df5bd874f0 ("r8169: power down chip in probe") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/a1c39460-d533-7f9e-fa9d-2b8990b02426@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 46d8510b2fe26..a569abe7f5ef2 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2207,7 +2207,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) } switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_43: @@ -2233,7 +2234,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) static void rtl_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_43: -- GitLab From b40f97b91a3b167ab22c9e9f1ef00b1615ff01e9 Mon Sep 17 00:00:00 2001 From: Xie He Date: Thu, 31 Dec 2020 09:43:31 -0800 Subject: [PATCH 1390/1894] net: lapb: Decrease the refcount of "struct lapb_cb" in lapb_device_event In lapb_device_event, lapb_devtostruct is called to get a reference to an object of "struct lapb_cb". lapb_devtostruct increases the refcount of the object and returns a pointer to it. However, we didn't decrease the refcount after we finished using the pointer. This patch fixes this problem. Fixes: a4989fa91110 ("net/lapb: support netdev events") Cc: Martin Schiller Signed-off-by: Xie He Link: https://lore.kernel.org/r/20201231174331.64539-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- net/lapb/lapb_iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 213ea7abc9ab2..40961889e9c01 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -489,6 +489,7 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event, break; } + lapb_put(lapb); return NOTIFY_DONE; } -- GitLab From c1a9ec7e5d577a9391660800c806c53287fca991 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 3 Jan 2021 02:25:43 +0100 Subject: [PATCH 1391/1894] net: dsa: lantiq_gswip: Enable GSWIP_MII_CFG_EN also for internal PHYs Enable GSWIP_MII_CFG_EN also for internal PHYs to make traffic flow. Without this the PHY link is detected properly and ethtool statistics for TX are increasing but there's no RX traffic coming in. Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Suggested-by: Hauke Mehrtens Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 09701c17f3f63..5d378c8026f01 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1541,9 +1541,7 @@ static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port, { struct gswip_priv *priv = ds->priv; - /* Enable the xMII interface only for the external PHY */ - if (interface != PHY_INTERFACE_MODE_INTERNAL) - gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); + gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); } static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, -- GitLab From 709a3c9dff2a639966ae7d8ba6239d2b8aba036d Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 3 Jan 2021 02:25:44 +0100 Subject: [PATCH 1392/1894] net: dsa: lantiq_gswip: Fix GSWIP_MII_CFG(p) register access There is one GSWIP_MII_CFG register for each switch-port except the CPU port. The register offset for the first port is 0x0, 0x02 for the second, 0x04 for the third and so on. Update the driver to not only restrict the GSWIP_MII_CFG registers to ports 0, 1 and 5. Handle ports 0..5 instead but skip the CPU port. This means we are not overwriting the configuration for the third port (port two since we start counting from zero) with the settings for the sixth port (with number five) anymore. The GSWIP_MII_PCDU(p) registers are not updated because there's really only three (one for each of the following ports: 0, 1, 5). Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 5d378c8026f01..4b36d89bec061 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -92,9 +92,7 @@ GSWIP_MDIO_PHY_FDUP_MASK) /* GSWIP MII Registers */ -#define GSWIP_MII_CFG0 0x00 -#define GSWIP_MII_CFG1 0x02 -#define GSWIP_MII_CFG5 0x04 +#define GSWIP_MII_CFGp(p) (0x2 * (p)) #define GSWIP_MII_CFG_EN BIT(14) #define GSWIP_MII_CFG_LDCLKDIS BIT(12) #define GSWIP_MII_CFG_MODE_MIIP 0x0 @@ -392,17 +390,9 @@ static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set, static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, int port) { - switch (port) { - case 0: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG0); - break; - case 1: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG1); - break; - case 5: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG5); - break; - } + /* There's no MII_CFG register for the CPU port */ + if (!dsa_is_cpu_port(priv->ds, port)) + gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(port)); } static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, @@ -822,9 +812,8 @@ static int gswip_setup(struct dsa_switch *ds) gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); /* Disable the xMII link */ - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 0); - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 1); - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 5); + for (i = 0; i < priv->hw_info->max_ports; i++) + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i); /* enable special tag insertion on cpu port */ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, -- GitLab From 81b6d05ccad4f3d8a9dfb091fb46ad6978ee40e4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:36:35 +0000 Subject: [PATCH 1393/1894] io_uring: synchronise IOPOLL on task_submit fail io_req_task_submit() might be called for IOPOLL, do the fail path under uring_lock to comply with IOPOLL synchronisation based solely on it. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca46f314640b1..5be33fd8b6bc6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2126,15 +2126,16 @@ static void io_req_task_cancel(struct callback_head *cb) static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + bool fail; - if (!__io_sq_thread_acquire_mm(ctx) && - !__io_sq_thread_acquire_files(ctx)) { - mutex_lock(&ctx->uring_lock); + fail = __io_sq_thread_acquire_mm(ctx) || + __io_sq_thread_acquire_files(ctx); + mutex_lock(&ctx->uring_lock); + if (!fail) __io_queue_sqe(req, NULL); - mutex_unlock(&ctx->uring_lock); - } else { + else __io_req_task_cancel(req, -EFAULT); - } + mutex_unlock(&ctx->uring_lock); } static void io_req_task_submit(struct callback_head *cb) -- GitLab From 6c503150ae33ee19036255cfda0998463613352c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:36:36 +0000 Subject: [PATCH 1394/1894] io_uring: patch up IOPOLL overflow_flush sync IOPOLL skips completion locking but keeps it under uring_lock, thus io_cqring_overflow_flush() and so io_cqring_events() need additional locking with uring_lock in some cases for IOPOLL. Remove __io_cqring_overflow_flush() from io_cqring_events(), introduce a wrapper around flush doing needed synchronisation and call it by hand. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 78 +++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5be33fd8b6bc6..445035b24a507 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1713,9 +1713,9 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) } /* Returns true if there are no backlogged entries after the flush */ -static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, - struct task_struct *tsk, - struct files_struct *files) +static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) { struct io_rings *rings = ctx->rings; struct io_kiocb *req, *tmp; @@ -1768,6 +1768,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, return all_flushed; } +static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) +{ + if (test_bit(0, &ctx->cq_check_overflow)) { + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx, force, tsk, files); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); + } +} + static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) { struct io_ring_ctx *ctx = req->ctx; @@ -2314,20 +2328,8 @@ static void io_double_put_req(struct io_kiocb *req) io_free_req(req); } -static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) +static unsigned io_cqring_events(struct io_ring_ctx *ctx) { - if (test_bit(0, &ctx->cq_check_overflow)) { - /* - * noflush == true is from the waitqueue handler, just ensure - * we wake up the task, and the next invocation will flush the - * entries. We cannot safely to it from here. - */ - if (noflush) - return -1U; - - io_cqring_overflow_flush(ctx, false, NULL, NULL); - } - /* See comment at the top of this file */ smp_rmb(); return __io_cqring_events(ctx); @@ -2552,7 +2554,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) * If we do, we can potentially be spinning for commands that * already triggered a CQE (eg in error). */ - if (io_cqring_events(ctx, false)) + if (test_bit(0, &ctx->cq_check_overflow)) + __io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) break; /* @@ -6827,7 +6831,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { - if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) + if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL)) return -EBUSY; } @@ -7090,7 +7094,7 @@ struct io_wait_queue { unsigned nr_timeouts; }; -static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) +static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; @@ -7099,7 +7103,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ - return io_cqring_events(ctx, noflush) >= iowq->to_wait || + return io_cqring_events(ctx) >= iowq->to_wait || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } @@ -7109,11 +7113,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, wq); - /* use noflush == true, as we can't safely rely on locking context */ - if (!io_should_wake(iowq, true)) - return -1; - - return autoremove_wake_function(curr, mode, wake_flags, key); + /* + * Cannot safely flush overflowed CQEs from here, ensure we wake up + * the task, and the next invocation will do it. + */ + if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow)) + return autoremove_wake_function(curr, mode, wake_flags, key); + return -1; } static int io_run_task_work_sig(void) @@ -7150,7 +7156,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, int ret = 0; do { - if (io_cqring_events(ctx, false) >= min_events) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx) >= min_events) return 0; if (!io_run_task_work()) break; @@ -7178,6 +7185,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); trace_io_uring_cqring_wait(ctx, min_events); do { + io_cqring_overflow_flush(ctx, false, NULL, NULL); prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ @@ -7186,8 +7194,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, continue; else if (ret < 0) break; - if (io_should_wake(&iowq, false)) + if (io_should_wake(&iowq)) break; + if (test_bit(0, &ctx->cq_check_overflow)) + continue; if (uts) { timeout = schedule_timeout(timeout); if (timeout == 0) { @@ -8625,7 +8635,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) smp_rmb(); if (!io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; - if (io_cqring_events(ctx, false)) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) mask |= EPOLLIN | EPOLLRDNORM; return mask; @@ -8683,7 +8694,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) - io_cqring_overflow_flush(ctx, true, NULL, NULL); + __io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); io_kill_timeouts(ctx, NULL, NULL); @@ -8857,9 +8868,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } io_cancel_defer_files(ctx, task, files); - io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); io_cqring_overflow_flush(ctx, true, task, files); - io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); if (!files) __io_uring_cancel_task_requests(ctx, task); @@ -9195,13 +9204,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - if (!list_empty_careful(&ctx->cq_overflow_list)) { - bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL; + io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_lock(ctx, needs_lock); - io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_unlock(ctx, needs_lock); - } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) -- GitLab From de7f1d9e99d8b99e4e494ad8fcd91f0c4c5c9357 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:43:29 +0000 Subject: [PATCH 1395/1894] io_uring: drop file refs after task cancel io_uring fds marked O_CLOEXEC and we explicitly cancel all requests before going through exec, so we don't want to leave task's file references to not our anymore io_uring instances. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 445035b24a507..85de42c424335 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8958,6 +8958,15 @@ static void io_uring_attempt_task_drop(struct file *file) io_uring_del_task_file(file); } +static void io_uring_remove_task_files(struct io_uring_task *tctx) +{ + struct file *file; + unsigned long index; + + xa_for_each(&tctx->xa, index, file) + io_uring_del_task_file(file); +} + void __io_uring_files_cancel(struct files_struct *files) { struct io_uring_task *tctx = current->io_uring; @@ -8966,16 +8975,12 @@ void __io_uring_files_cancel(struct files_struct *files) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - - xa_for_each(&tctx->xa, index, file) { - struct io_ring_ctx *ctx = file->private_data; - - io_uring_cancel_task_requests(ctx, files); - if (files) - io_uring_del_task_file(file); - } - + xa_for_each(&tctx->xa, index, file) + io_uring_cancel_task_requests(file->private_data, files); atomic_dec(&tctx->in_idle); + + if (files) + io_uring_remove_task_files(tctx); } static s64 tctx_inflight(struct io_uring_task *tctx) @@ -9038,6 +9043,8 @@ void __io_uring_task_cancel(void) } while (1); atomic_dec(&tctx->in_idle); + + io_uring_remove_task_files(tctx); } static int io_uring_flush(struct file *file, void *data) -- GitLab From 90df08538c07b7135703358a0c8c08d97889a704 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:43:30 +0000 Subject: [PATCH 1396/1894] io_uring: cancel more aggressively in exit_work While io_ring_exit_work() is running new requests of all sorts may be issued, so it should do a bit more to cancel them, otherwise they may just get stuck. e.g. in io-wq, in poll lists, etc. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 85de42c424335..5bccb235271f7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -992,6 +992,9 @@ enum io_mem_account { ACCT_PINNED, }; +static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, + struct task_struct *task); + static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); static struct fixed_file_ref_node *alloc_fixed_file_ref_node( struct io_ring_ctx *ctx); @@ -8675,7 +8678,7 @@ static void io_ring_exit_work(struct work_struct *work) * as nobody else will be looking for them. */ do { - io_iopoll_try_reap_events(ctx); + __io_uring_cancel_task_requests(ctx, NULL); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } @@ -8830,9 +8833,11 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, enum io_wq_cancel cret; bool ret = false; - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); - if (cret != IO_WQ_CANCEL_NOTFOUND) - ret = true; + if (ctx->io_wq) { + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, + &cancel, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } /* SQPOLL thread does its own polling */ if (!(ctx->flags & IORING_SETUP_SQPOLL)) { -- GitLab From 75353bcd2184010f08a3ed2f0da019bd9d604e1e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 24 Dec 2020 15:13:57 +0000 Subject: [PATCH 1397/1894] drm/i915: clear the shadow batch The shadow batch is an internal object, which doesn't have any page clearing, and since the batch_len can be smaller than the object, we should take care to clear it. Testcase: igt/gen9_exec_parse/shadow-peek Fixes: 4f7af1948abc ("drm/i915: Support ro ppgtt mapped cmdparser shadow buffers") Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-1-matthew.auld@intel.com Cc: stable@vger.kernel.org (cherry picked from commit eeb52ee6c4a429ec301faf1dc48988744960786e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_cmd_parser.c | 27 +++++++++----------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 93265951fdbbd..b0899b665e852 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1166,7 +1166,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { - unsigned long x, n; + unsigned long x, n, remain; void *ptr; /* @@ -1177,14 +1177,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * We don't care about copying too much here as we only * validate up to the end of the batch. */ + remain = length; if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - length = round_up(length, + remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min(length, PAGE_SIZE - x); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); if (needs_clflush) @@ -1193,13 +1194,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, kunmap_atomic(src); ptr += len; - length -= len; + remain -= len; x = 0; } } i915_gem_object_unpin_pages(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); + /* dst_obj is returned with vmap pinned */ return dst; } @@ -1392,11 +1395,6 @@ static unsigned long *alloc_whitelist(u32 batch_length) #define LENGTH_BIAS 2 -static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); -} - /** * intel_engine_cmd_parser() - parse a batch buffer for privilege violations * @engine: the engine on which the batch is to execute @@ -1538,16 +1536,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = 0; /* allow execution */ } } - - if (shadow_needs_clflush(shadow->obj)) - drm_clflush_virt_range(batch_end, 8); } - if (shadow_needs_clflush(shadow->obj)) { - void *ptr = page_mask_bits(shadow->obj->mm.mapping); - - drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); - } + i915_gem_object_flush_map(shadow->obj); if (!IS_ERR_OR_NULL(jump_whitelist)) kfree(jump_whitelist); -- GitLab From 641382e9b44fba81a0778e1914ee35b8471121f9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 24 Dec 2020 15:13:58 +0000 Subject: [PATCH 1398/1894] drm/i915: clear the gpu reloc batch The reloc batch is short lived but can exist in the user visible ppGTT, and since it's backed by an internal object, which lacks page clearing, we should take care to clear it upfront. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-2-matthew.auld@intel.com Cc: stable@vger.kernel.org (cherry picked from commit 26ebc511e799f621357982ccc37a7987a56a00f4) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index bcc80f428172b..bd3046e5a9348 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1046,7 +1046,7 @@ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cach GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(cache->rq->engine->gt); @@ -1296,6 +1296,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_pool; } + memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); -- GitLab From 557862535c2cad6de6f6fb12312b7a6d09c06407 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Dec 2020 12:08:28 +0000 Subject: [PATCH 1399/1894] drm/i915/gt: Define guc firmware blob for older Cometlakes When splitting the Coffeelake define to also identify Cometlakes, I missed the double fw_def for Coffeelake. That is only newer Cometlakes use the cml specific guc firmware, older Cometlakes should use kbl firmware. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2859 Fixes: 5f4ae2704d59 ("drm/i915: Identify Cometlake platform") Signed-off-by: Chris Wilson Cc: # v5.9+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201229120828.29931-1-chris@chris-wilson.co.uk (cherry picked from commit 70960ab27542d8dc322f909f516391f331fbd3f1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 180c23e2e25e4..602f1a0bc5871 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ -- GitLab From 9397d66212cdf7a21c66523f1583e5d63a609e84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Dec 2020 20:23:09 +0000 Subject: [PATCH 1400/1894] drm/i915/dp: Track pm_qos per connector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since multiple connectors may run intel_dp_aux_xfer conncurrently, a single global pm_qos does not suffice. (One connector may disable the dma-latency boost prematurely while the second is still depending on it.) Instead of a single global pm_qos, track the pm_qos request for each intel_dp. v2: Move the pm_qos setup/teardown to intel_dp_aux_init/fini Fixes: 9ee32fea5fe8 ("drm/i915: irq-drive the dp aux communication") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Imre Deak Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20201230202309.23982-1-chris@chris-wilson.co.uk (cherry picked from commit b3304591f14b437b6bccd8dbff06006c11837031) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 8 ++++++-- drivers/gpu/drm/i915/i915_drv.c | 5 ----- drivers/gpu/drm/i915/i915_drv.h | 3 --- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ce82d654d0f24..34d78c654df3b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1436,6 +1436,9 @@ struct intel_dp { bool ycbcr_444_to_420; } dfp; + /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ + struct pm_qos_request pm_qos; + /* Display stream compression testing */ bool force_dsc_en; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2165398d2c7cd..37f1a10fd0217 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1489,7 +1489,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, * lowest possible wakeup latency and so prevent the cpu from going into * deep sleep states. */ - cpu_latency_qos_update_request(&i915->pm_qos, 0); + cpu_latency_qos_update_request(&intel_dp->pm_qos, 0); intel_dp_check_edp(intel_dp); @@ -1622,7 +1622,7 @@ done: ret = recv_bytes; out: - cpu_latency_qos_update_request(&i915->pm_qos, PM_QOS_DEFAULT_VALUE); + cpu_latency_qos_update_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); if (vdd) edp_panel_vdd_off(intel_dp, false); @@ -1898,6 +1898,9 @@ static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index) static void intel_dp_aux_fini(struct intel_dp *intel_dp) { + if (cpu_latency_qos_request_active(&intel_dp->pm_qos)) + cpu_latency_qos_remove_request(&intel_dp->pm_qos); + kfree(intel_dp->aux.name); } @@ -1950,6 +1953,7 @@ intel_dp_aux_init(struct intel_dp *intel_dp) encoder->base.name); intel_dp->aux.transfer = intel_dp_aux_transfer; + cpu_latency_qos_add_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); } bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 320856b665a17..88ad754962af1 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -578,8 +578,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pci_set_master(pdev); - cpu_latency_qos_add_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE); - intel_gt_init_workarounds(dev_priv); /* On the 945G/GM, the chipset reports the MSI capability on the @@ -626,7 +624,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); - cpu_latency_qos_remove_request(&dev_priv->pm_qos); err_mem_regions: intel_memory_regions_driver_release(dev_priv); err_ggtt: @@ -648,8 +645,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) if (pdev->msi_enabled) pci_disable_msi(pdev); - - cpu_latency_qos_remove_request(&dev_priv->pm_qos); } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a3ee4f9dc0a7..632c713227dc7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -891,9 +891,6 @@ struct drm_i915_private { bool display_irqs_enabled; - /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ - struct pm_qos_request pm_qos; - /* Sideband mailbox protection */ struct mutex sb_lock; struct pm_qos_request sb_qos; -- GitLab From 05f6f7271a38c482c5021967433f7b698e102c45 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Thu, 24 Dec 2020 20:26:07 +0800 Subject: [PATCH 1401/1894] i2c: mediatek: Fix apdma and i2c hand-shake timeout With the apdma remove hand-shake signal, it requirs special operation timing to reset i2c manually, otherwise the interrupt will not be triggered, i2c transmission will be timeout. Fixes: 8426fe70cfa4("i2c: mediatek: Add apdma sync in i2c driver") Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 33de99b7bc20c..0818d3e507347 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -38,6 +38,7 @@ #define I2C_IO_CONFIG_OPEN_DRAIN 0x0003 #define I2C_IO_CONFIG_PUSH_PULL 0x0000 #define I2C_SOFT_RST 0x0001 +#define I2C_HANDSHAKE_RST 0x0020 #define I2C_FIFO_ADDR_CLR 0x0001 #define I2C_DELAY_LEN 0x0002 #define I2C_TIME_CLR_VALUE 0x0000 @@ -45,6 +46,7 @@ #define I2C_WRRD_TRANAC_VALUE 0x0002 #define I2C_RD_TRANAC_VALUE 0x0001 #define I2C_SCL_MIS_COMP_VALUE 0x0000 +#define I2C_CHN_CLR_FLAG 0x0000 #define I2C_DMA_CON_TX 0x0000 #define I2C_DMA_CON_RX 0x0001 @@ -54,7 +56,9 @@ #define I2C_DMA_START_EN 0x0001 #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 +#define I2C_DMA_WARM_RST 0x0001 #define I2C_DMA_HARD_RST 0x0002 +#define I2C_DMA_HANDSHAKE_RST 0x0004 #define MAX_SAMPLE_CNT_DIV 8 #define MAX_STEP_CNT_DIV 64 @@ -475,11 +479,24 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; - writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); - udelay(50); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); - - mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + if (i2c->dev_comp->dma_sync) { + writel(I2C_DMA_WARM_RST, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_HANDSHAKE_RST | I2C_DMA_HARD_RST, + i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_HANDSHAKE_RST | I2C_SOFT_RST, + OFFSET_SOFTRESET); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_SOFTRESET); + } else { + writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); + udelay(50); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + } /* Set ioconfig */ if (i2c->use_push_pull) -- GitLab From d1c5246e08eb64991001d97a3bd119c93edbc79a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 2 Dec 2020 22:28:12 -0800 Subject: [PATCH 1402/1894] x86/mm: Fix leak of pmd ptlock Commit 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") introduced a new location where a pmd was released, but neglected to run the pmd page destructor. In fact, this happened previously for a different pmd release path and was fixed by commit: c283610e44ec ("x86, mm: do not leak page->ptl for pmd page tables"). This issue was hidden until recently because the failure mode is silent, but commit: b2b29d6d0119 ("mm: account PMD tables like PTE tables") turns the failure mode into this signature: BUG: Bad page state in process lt-pmem-ns pfn:15943d page:000000007262ed7b refcount:0 mapcount:-1024 mapping:0000000000000000 index:0x0 pfn:0x15943d flags: 0xaffff800000000() raw: 00affff800000000 dead000000000100 0000000000000000 0000000000000000 raw: 0000000000000000 ffff913a029bcc08 00000000fffffbff 0000000000000000 page dumped because: nonzero mapcount [..] dump_stack+0x8b/0xb0 bad_page.cold+0x63/0x94 free_pcp_prepare+0x224/0x270 free_unref_page+0x18/0xd0 pud_free_pmd_page+0x146/0x160 ioremap_pud_range+0xe3/0x350 ioremap_page_range+0x108/0x160 __ioremap_caller.constprop.0+0x174/0x2b0 ? memremap+0x7a/0x110 memremap+0x7a/0x110 devm_memremap+0x53/0xa0 pmem_attach_disk+0x4ed/0x530 [nd_pmem] ? __devm_release_region+0x52/0x80 nvdimm_bus_probe+0x85/0x210 [libnvdimm] Given this is a repeat occurrence it seemed prudent to look for other places where this destructor might be missing and whether a better helper is needed. try_to_free_pmd_page() looks like a candidate, but testing with setting up and tearing down pmd mappings via the dax unit tests is thus far not triggering the failure. As for a better helper pmd_free() is close, but it is a messy fit due to requiring an @mm arg. Also, ___pmd_free_tlb() wants to call paravirt_tlb_remove_table() instead of free_page(), so open-coded pgtable_pmd_page_dtor() seems the best way forward for now. Debugged together with Matthew Wilcox . Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Tested-by: Yi Zhang Acked-by: Peter Zijlstra (Intel) Cc: Link: https://lkml.kernel.org/r/160697689204.605323.17629854984697045602.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/mm/pgtable.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfd82f51ba66b..f6a9e2e366425 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) } free_page((unsigned long)pmd_sv); + + pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); return 1; -- GitLab From 311bea3cb9ee20ef150ca76fc60a592bf6b159f5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 17 Dec 2020 16:24:32 -0800 Subject: [PATCH 1403/1894] arm64: link with -z norelro for LLD or aarch64-elf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With GNU binutils 2.35+, linking with BFD produces warnings for vmlinux: aarch64-linux-gnu-ld: warning: -z norelro ignored BFD can produce this warning when the target emulation mode does not support RELRO program headers, and -z relro or -z norelro is passed. Alan Modra clarifies: The default linker emulation for an aarch64-linux ld.bfd is -maarch64linux, the default for an aarch64-elf linker is -maarch64elf. They are not equivalent. If you choose -maarch64elf you get an emulation that doesn't support -z relro. The ARCH=arm64 kernel prefers -maarch64elf, but may fall back to -maarch64linux based on the toolchain configuration. LLD will always create RELRO program header regardless of target emulation. To avoid the above warning when linking with BFD, pass -z norelro only when linking with LLD or with -maarch64linux. Fixes: 3b92fa7485eb ("arm64: link with -z norelro regardless of CONFIG_RELOCATABLE") Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options") Cc: # 5.0.x- Reported-by: kernelci.org bot Reported-by: Quentin Perret Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Ard Biesheuvel Cc: Alan Modra Cc: Fāng-ruì Sòng Link: https://lore.kernel.org/r/20201218002432.788499-1-ndesaulniers@google.com Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 6be9b37502503..90309208bb28d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X -z norelro +LDFLAGS_vmlinux :=--no-undefined -X ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -115,16 +115,20 @@ KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ # Prefer the baremetal ELF build target, but not all toolchains include # it so fall back to the standard linux version if needed. -KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb) +KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb -z norelro) UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ # Same as above, prefer ELF but fall back to linux target if needed. -KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux) +KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux -z norelro) UTS_MACHINE := aarch64 endif +ifeq ($(CONFIG_LD_IS_LLD), y) +KBUILD_LDFLAGS += -z norelro +endif + CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) -- GitLab From 96ebc9c871d8a28fb22aa758dd9188a4732df482 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 4 Jan 2021 20:07:15 -0800 Subject: [PATCH 1404/1894] usb: uas: Add PNY USB Portable SSD to unusual_uas Here's another variant PNY Pro Elite USB 3.1 Gen 2 portable SSD that hangs and doesn't respond to ATA_1x pass-through commands. If it doesn't support these commands, it should respond properly to the host. Add it to the unusual uas list to be able to move forward with other operations. Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Acked-by: Oliver Neukum Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/2edc7af892d0913bf06f5b35e49ec463f03d5ed8.1609819418.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 870e9cf3d5dc4..f9677a5ec31b2 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -90,6 +90,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA), +/* Reported-by: Thinh Nguyen */ +UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999, + "PNY", + "Pro Elite SSD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Thinh Nguyen */ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999, "PNY", -- GitLab From 45ba7b195a369f35cb39094fdb32efe5908b34ad Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Mon, 4 Jan 2021 19:38:44 +0800 Subject: [PATCH 1405/1894] arm64: cpufeature: remove non-exist CONFIG_KVM_ARM_HOST Commit d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST") deletes CONFIG_KVM_ARM_HOST option, it should use CONFIG_KVM instead. Just remove CONFIG_KVM_ARM_HOST here. Fixes: d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST") Signed-off-by: Shannon Zhao Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1609760324-92271-1-git-send-email-shannon.zhao@linux.alibaba.com --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d96f4554282df..bc3549663957c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2558,7 +2558,7 @@ static void verify_hyp_capabilities(void) int parange, ipa_max; unsigned int safe_vmid_bits, vmid_bits; - if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST)) + if (!IS_ENABLED(CONFIG_KVM)) return; safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); -- GitLab From c9c48bb701ba78df7d4652146b12bcf3ad716507 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 22 Dec 2020 02:47:56 +0100 Subject: [PATCH 1406/1894] speakup: Add github repository URL and bug tracker We have set up a repository for users to try newer releases more easily, and keep records of known bugs. Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20201222014756.ov5vi6fywylbp5n6@function Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..62934e7706460 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16710,6 +16710,8 @@ M: Samuel Thibault L: speakup@linux-speakup.org S: Odd Fixes W: http://www.linux-speakup.org/ +W: https://github.com/linux-speakup/speakup +B: https://github.com/linux-speakup/speakup/issues F: drivers/accessibility/speakup/ SPEAR CLOCK FRAMEWORK SUPPORT -- GitLab From f6bcb4c7f366905b66ce8ffca7190118244bb642 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Jan 2021 14:42:29 +0300 Subject: [PATCH 1407/1894] regmap: debugfs: Fix a reversed if statement in regmap_debugfs_init() This code will leak "map->debugfs_name" because the if statement is reversed so it only frees NULL pointers instead of non-NULL. In fact the if statement is not required and should just be removed because kfree() accepts NULL pointers. Fixes: cffa4b2122f5 ("regmap: debugfs: Fix a memory leak when calling regmap_attach_dev") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X/RQpfAwRdLg0GqQ@mwanda Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-debugfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index bf03cd343be23..ff2ee87987c7e 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -594,9 +594,7 @@ void regmap_debugfs_init(struct regmap *map) } if (!strcmp(name, "dummy")) { - if (!map->debugfs_name) - kfree(map->debugfs_name); - + kfree(map->debugfs_name); map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d", dummy_index); if (!map->debugfs_name) -- GitLab From dfe94d4086e40e92b1926bddcefa629b791e9b28 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 21 Dec 2020 22:55:41 -0800 Subject: [PATCH 1408/1894] x86/hyperv: Fix kexec panic/hang issues Currently the kexec kernel can panic or hang due to 2 causes: 1) hv_cpu_die() is not called upon kexec, so the hypervisor corrupts the old VP Assist Pages when the kexec kernel runs. The same issue is fixed for hibernation in commit 421f090c819d ("x86/hyperv: Suspend/resume the VP assist page for hibernation"). Now fix it for kexec. 2) hyperv_cleanup() is called too early. In the kexec path, the other CPUs are stopped in hv_machine_shutdown() -> native_machine_shutdown(), so between hv_kexec_handler() and native_machine_shutdown(), the other CPUs can still try to access the hypercall page and cause panic. The workaround "hv_hypercall_pg = NULL;" in hyperv_cleanup() is unreliabe. Move hyperv_cleanup() to a better place. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201222065541.24312-1-decui@microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 4 ++++ arch/x86/include/asm/mshyperv.h | 2 ++ arch/x86/kernel/cpu/mshyperv.c | 18 ++++++++++++++++++ drivers/hv/vmbus_drv.c | 2 -- 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e04d90af4c27c..4638a52d8eaea 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,8 @@ #include #include +int hyperv_init_cpuhp; + void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -401,6 +404,7 @@ void __init hyperv_init(void) register_syscore_ops(&hv_syscore_ops); + hyperv_init_cpuhp = cpuhp; return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ffc289992d1b0..30f76b9668579 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) +extern int hyperv_init_cpuhp; + extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f628e3dc150f3..43b54bef5448f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -135,14 +135,32 @@ static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) hv_kexec_handler(); + + /* + * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor + * corrupts the old VP Assist Pages and can crash the kexec kernel. + */ + if (kexec_in_progress && hyperv_init_cpuhp > 0) + cpuhp_remove_state(hyperv_init_cpuhp); + + /* The function calls stop_other_cpus(). */ native_machine_shutdown(); + + /* Disable the hypercall page when there is only 1 active CPU. */ + if (kexec_in_progress) + hyperv_cleanup(); } static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) hv_crash_handler(regs); + + /* The function calls crash_smp_send_stop(). */ native_machine_crash_shutdown(regs); + + /* Disable the hypercall page when there is only 1 active CPU. */ + hyperv_cleanup(); } #endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_HYPERV */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 502f8cd95f6d4..d491fdcee61f0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2550,7 +2550,6 @@ static void hv_kexec_handler(void) /* Make sure conn_state is set as hv_synic_cleanup checks for it */ mb(); cpuhp_remove_state(hyperv_cpuhp_online); - hyperv_cleanup(); }; static void hv_crash_handler(struct pt_regs *regs) @@ -2566,7 +2565,6 @@ static void hv_crash_handler(struct pt_regs *regs) cpu = smp_processor_id(); hv_stimer_cleanup(cpu); hv_synic_disable_regs(cpu); - hyperv_cleanup(); }; static int hv_synic_suspend(void) -- GitLab From 3fb6819f411b5a89afb5726afafacf0c4b62844f Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 28 Dec 2020 15:05:08 +0800 Subject: [PATCH 1409/1894] arm64: traps: remove duplicate include statement asm/exception.h is included more than once. Remove the one that isn't necessary. Signed-off-by: Tian Tao Link: https://lore.kernel.org/r/1609139108-10819-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 08156be755691..6895ce777e7f2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include -- GitLab From e2bba5f92354488c331b7821d873db7c388e31aa Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 30 Dec 2020 14:19:54 -0800 Subject: [PATCH 1410/1894] arm64: vdso: disable .eh_frame_hdr via /DISCARD/ instead of --no-eh-frame-hdr Currently with ld.lld we emit an empty .eh_frame_hdr section (and a corresponding program header) into the vDSO. With ld.bfd the section is not emitted but the program header is, with p_vaddr set to 0. This can lead to unwinders attempting to interpret the data at whichever location the program header happens to point to as an unwind info header. This happens to be mostly harmless as long as the byte at that location (interpreted as a version number) has a value other than 1, causing both libgcc and LLVM libunwind to ignore the section (in libunwind's case, after printing an error message to stderr), but it could lead to worse problems if the byte happened to be 1 or the program header points to non-readable memory (e.g. if the empty section was placed at a page boundary). Instead of disabling .eh_frame_hdr via --no-eh-frame-hdr (which also has the downside of being unsupported by older versions of GNU binutils), disable it by discarding the section, and stop emitting the program header that points to it. I understand that we intend to emit valid unwind info for the vDSO at some point. Once that happens this patch can be reverted. Signed-off-by: Peter Collingbourne Acked-by: Ard Biesheuvel Acked-by: Vincenzo Frascino Link: https://linux-review.googlesource.com/id/If745fd9cadcb31b4010acbf5693727fe111b0863 Link: https://lore.kernel.org/r/20201230221954.2007257-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 3 +-- arch/arm64/kernel/vdso/vdso.lds.S | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index a8f8e409e2bfb..cd9c3fa25902f 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,8 +24,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id=sha1 -n \ - $(btildflags-y) -T + -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index d808ad31e01f7..61dbb4c838ef7 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -40,9 +40,6 @@ SECTIONS PROVIDE (_etext = .); PROVIDE (etext = .); - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .dynamic : { *(.dynamic) } :text :dynamic .rodata : { *(.rodata*) } :text @@ -54,6 +51,7 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.eh_frame .eh_frame_hdr) } } @@ -66,7 +64,6 @@ PHDRS text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; } /* -- GitLab From f34d93f30d6a72f6b15ba24b6994b746df0c30de Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 4 Jan 2021 12:15:41 +0000 Subject: [PATCH 1411/1894] arm64: kasan: Set TCR_EL1.TBID1 when KASAN_HW_TAGS is enabled Commit 49b3cf035edc ("kasan: arm64: set TCR_EL1.TBID1 when enabled") set the TBID1 bit for the KASAN_SW_TAGS configuration, freeing up 8 bits to be used by PAC. With in-kernel MTE now in mainline, also set this bit for the KASAN_HW_TAGS configuration. Signed-off-by: Catalin Marinas Cc: Peter Collingbourne Acked-by: Vincenzo Frascino Acked-by: Andrey Konovalov --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 37a54b57178a7..1f7ee8c8b7b81 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 +#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 #else #define TCR_KASAN_HW_FLAGS 0 #endif -- GitLab From a8f7e08a81708920a928664a865208fdf451c49f Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 5 Jan 2021 08:33:11 -0800 Subject: [PATCH 1412/1894] x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling The IN and OUT instructions with port address as an immediate operand only use an 8-bit immediate (imm8). The current VC handler uses the entire 32-bit immediate value but these instructions only set the first bytes. Cast the operand to an u8 for that. [ bp: Massage commit message. ] Fixes: 25189d08e5168 ("x86/sev-es: Add support for handling IOIO exceptions") Signed-off-by: Peter Gonda Signed-off-by: Borislav Petkov Acked-by: David Rientjes Link: https://lkml.kernel.org/r/20210105163311.221490-1-pgonda@google.com --- arch/x86/kernel/sev-es-shared.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 7d04b356d44d3..cdc04d0912423 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* IN register opcodes */ -- GitLab From d16baa3f1453c14d680c5fee01cd122a22d0e0ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Jan 2021 12:37:23 -0500 Subject: [PATCH 1413/1894] blk-iocost: fix NULL iocg deref from racing against initialization When initializing iocost for a queue, its rqos should be registered before the blkcg policy is activated to allow policy data initiailization to lookup the associated ioc. This unfortunately means that the rqos methods can be called on bios before iocgs are attached to all existing blkgs. While the race is theoretically possible on ioc_rqos_throttle(), it mostly happened in ioc_rqos_merge() due to the difference in how they lookup ioc. The former determines it from the passed in @rqos and then bails before dereferencing iocg if the looked up ioc is disabled, which most likely is the case if initialization is still in progress. The latter looked up ioc by dereferencing the possibly NULL iocg making it a lot more prone to actually triggering the bug. * Make ioc_rqos_merge() use the same method as ioc_rqos_throttle() to look up ioc for consistency. * Make ioc_rqos_throttle() and ioc_rqos_merge() test for NULL iocg before dereferencing it. * Explain the danger of NULL iocgs in blk_iocost_init(). Signed-off-by: Tejun Heo Reported-by: Jonathan Lemon Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Jens Axboe --- block/blk-iocost.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index ac6078a349394..98d656bdb42b7 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2551,8 +2551,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) bool use_debt, ioc_locked; unsigned long flags; - /* bypass IOs if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass IOs if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; /* calculate the absolute vtime cost */ @@ -2679,14 +2679,14 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); - struct ioc *ioc = iocg->ioc; + struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; u64 vtime, abs_cost, cost; unsigned long flags; - /* bypass if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; abs_cost = calc_vtime_cost(bio, iocg, true); @@ -2863,6 +2863,12 @@ static int blk_iocost_init(struct request_queue *q) ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); + /* + * rqos must be added before activation to allow iocg_pd_init() to + * lookup the ioc from q. This means that the rqos methods may get + * called before policy activation completion, can't assume that the + * target bio has an iocg associated and need to test for NULL iocg. + */ rq_qos_add(q, rqos); ret = blkcg_activate_policy(q, &blkcg_policy_iocost); if (ret) { -- GitLab From 6d4d273588378c65915acaf7b2ee74e9dd9c130a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 10 Dec 2020 10:44:33 +0100 Subject: [PATCH 1414/1894] bfq: Fix computation of shallow depth BFQ computes number of tags it allows to be allocated for each request type based on tag bitmap. However it uses 1 << bitmap.shift as number of available tags which is wrong. 'shift' is just an internal bitmap value containing logarithm of how many bits bitmap uses in each bitmap word. Thus number of tags allowed for some request types can be far to low. Use proper bitmap.depth which has the number of tags instead. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9e81d1052091f..9e4eb0fc1c16e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6332,13 +6332,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); + bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6348,9 +6348,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) -- GitLab From 170b3bbda08852277b97f4f0516df0785c939764 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 5 Jan 2021 21:53:40 +0800 Subject: [PATCH 1415/1894] =?UTF-8?q?io=5Furing:=20Delete=20useless=20vari?= =?UTF-8?q?able=20=E2=80=98id=E2=80=99=20in=20io=5Fprep=5Fasync=5Fwork?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix follow warning: fs/io_uring.c:1523:22: warning: variable ‘id’ set but not used [-Wunused-but-set-variable] struct io_identity *id; ^~ Reported-by: Hulk Robot Signed-off-by: Ye Bin Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5bccb235271f7..dc92ca5090a39 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1523,10 +1523,8 @@ static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; - struct io_identity *id; io_req_init_async(req); - id = req->work.identity; if (req->flags & REQ_F_FORCE_ASYNC) req->work.flags |= IO_WQ_WORK_CONCURRENT; -- GitLab From aebf5db917055b38f4945ed6d621d9f07a44ff30 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 21 Dec 2020 12:33:35 +0800 Subject: [PATCH 1416/1894] block: fix use-after-free in disk_part_iter_next Make sure that bdgrab() is done on the 'block_device' instance before referring to it for avoiding use-after-free. Cc: Reported-by: syzbot+825f0f9657d4e528046e@syzkaller.appspotmail.com Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 73faec438e49a..419548e92d82f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -246,15 +246,18 @@ struct block_device *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; + piter->part = bdgrab(part); + if (!piter->part) + continue; if (!bdev_nr_sectors(part) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && - piter->idx == 0)) + piter->idx == 0)) { + bdput(piter->part); + piter->part = NULL; continue; + } - piter->part = bdgrab(part); - if (!piter->part) - continue; piter->idx += inc; break; } -- GitLab From 6775ae901ffd130d0be9c32837f88d1f9d560189 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 22 Dec 2020 17:42:32 +0100 Subject: [PATCH 1417/1894] iommu/iova: fix 'domain' typos Replace misspelled 'doamin' with 'domain' in several comments. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201222164232.88795-1-sgarzare@redhat.com Signed-off-by: Will Deacon --- drivers/iommu/iova.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 4bb3293ae4d73..d20b8b333d30d 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -358,7 +358,7 @@ static void private_free_iova(struct iova_domain *iovad, struct iova *iova) * @iovad: - iova domain in question. * @pfn: - page frame number * This function finds and returns an iova belonging to the - * given doamin which matches the given pfn. + * given domain which matches the given pfn. */ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { @@ -601,7 +601,7 @@ void queue_iova(struct iova_domain *iovad, EXPORT_SYMBOL_GPL(queue_iova); /** - * put_iova_domain - destroys the iova doamin + * put_iova_domain - destroys the iova domain * @iovad: - iova domain in question. * All the iova's in that domain are destroyed. */ @@ -712,9 +712,9 @@ EXPORT_SYMBOL_GPL(reserve_iova); /** * copy_reserved_iova - copies the reserved between domains - * @from: - source doamin from where to copy + * @from: - source domain from where to copy * @to: - destination domin where to copy - * This function copies reserved iova's from one doamin to + * This function copies reserved iova's from one domain to * other. */ void -- GitLab From ff2b46d7cff80d27d82f7f3252711f4ca1666129 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 5 Jan 2021 13:18:37 +0800 Subject: [PATCH 1418/1894] iommu/intel: Fix memleak in intel_irq_remapping_alloc When irq_domain_get_irq_data() or irqd_cfg() fails at i == 0, data allocated by kzalloc() has not been freed before returning, which leads to memleak. Fixes: b106ee63abcc ("irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical irqdomains") Signed-off-by: Dinghao Liu Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210105051837.32118-1-dinghao.liu@zju.edu.cn Signed-off-by: Will Deacon --- drivers/iommu/intel/irq_remapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index aeffda92b10b7..685200a5cff0f 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1353,6 +1353,8 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, irq_data = irq_domain_get_irq_data(domain, virq + i); irq_cfg = irqd_cfg(irq_data); if (!irq_data || !irq_cfg) { + if (!i) + kfree(data); ret = -EINVAL; goto out_free_data; } -- GitLab From 12bc4570c14e24e6244d66466aeda994f805634b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Jan 2021 01:32:51 +0000 Subject: [PATCH 1419/1894] iommu/amd: Set iommu->int_enabled consistently when interrupts are set up When I made the INTCAPXT support stop gratuitously pretending to be MSI, I missed the fact that iommu_setup_msi() also sets the ->int_enabled flag. I missed this in the iommu_setup_intcapxt() code path, which means that a resume from suspend will try to allocate the IRQ domains again, accidentally re-enabling interrupts as it does, resulting in much sadness. Lift out the bit which sets iommu->int_enabled into the iommu_init_irq() function which is also where it gets checked. Link: https://lore.kernel.org/r/20210104132250.GE32151@zn.tnic/ Fixes: d1adcfbb520c ("iommu/amd: Fix IOMMU interrupt generation in X2APIC mode") Reported-by: Borislav Petkov Signed-off-by: David Woodhouse Tested-by: Borislav Petkov Link: https://lore.kernel.org/r/50cd5f55be8ead0937ac315cd2f5b89364f6a9a5.camel@infradead.org Signed-off-by: Will Deacon --- drivers/iommu/amd/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f54cd79b43e40..6a1f7048dacc6 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1973,8 +1973,6 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return r; } - iommu->int_enabled = true; - return 0; } @@ -2169,6 +2167,7 @@ static int iommu_init_irq(struct amd_iommu *iommu) if (ret) return ret; + iommu->int_enabled = true; enable_faults: iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); -- GitLab From b34f10c2dc5961021850c3c15f46a84b56a0c0e8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Jan 2021 01:36:13 +0000 Subject: [PATCH 1420/1894] iommu/amd: Stop irq_remapping_select() matching when remapping is disabled The AMD IOMMU initialisation registers the IRQ remapping domain for each IOMMU before doing the final sanity check that every I/OAPIC is covered. This means that the AMD irq_remapping_select() function gets invoked even when IRQ remapping has been disabled, eventually leading to a NULL pointer dereference in alloc_irq_table(). Unfortunately, the IVRS isn't fully parsed early enough that the sanity check can be done in time to registering the IRQ domain altogether. Doing that would be nice, but is a larger and more error-prone task. The simple fix is just for irq_remapping_select() to refuse to report a match when IRQ remapping has disabled. Link: https://lore.kernel.org/lkml/ed4be9b4-24ac-7128-c522-7ef359e8185d@gmx.at Fixes: a1a785b57242 ("iommu/amd: Implement select() method on remapping irqdomain") Reported-by: Johnathan Smithinovic Signed-off-by: David Woodhouse Link: https://lore.kernel.org/r/04bbe8bca87f81a3cfa93ec4299e53f47e00e5b3.camel@infradead.org Signed-off-by: Will Deacon --- drivers/iommu/amd/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7e2c445a1faec..f0adbc48fd179 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3854,6 +3854,9 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, struct amd_iommu *iommu; int devid = -1; + if (!amd_iommu_irq_remap) + return 0; + if (x86_fwspec_is_ioapic(fwspec)) devid = get_ioapic_devid(fwspec->param[0]); else if (x86_fwspec_is_hpet(fwspec)) -- GitLab From c2407cf7d22d0c0d94cf20342b3b8f06f1d904e7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Jan 2021 11:33:00 -0800 Subject: [PATCH 1421/1894] mm: make wait_on_page_writeback() wait for multiple pending writebacks Ever since commit 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic") we've had some very occasional reports of BUG_ON(PageWriteback) in write_cache_pages(), which we thought we already fixed in commit 073861ed77b6 ("mm: fix VM_BUG_ON(PageTail) and BUG_ON(PageWriteback)"). But syzbot just reported another one, even with that commit in place. And it turns out that there's a simpler way to trigger the BUG_ON() than the one Hugh found with page re-use. It all boils down to the fact that the page writeback is ostensibly serialized by the page lock, but that isn't actually really true. Yes, the people _setting_ writeback all do so under the page lock, but the actual clearing of the bit - and waking up any waiters - happens without any page lock. This gives us this fairly simple race condition: CPU1 = end previous writeback CPU2 = start new writeback under page lock CPU3 = write_cache_pages() CPU1 CPU2 CPU3 ---- ---- ---- end_page_writeback() test_clear_page_writeback(page) ... delayed... lock_page(); set_page_writeback() unlock_page() lock_page() wait_on_page_writeback(); wake_up_page(page, PG_writeback); .. wakes up CPU3 .. BUG_ON(PageWriteback(page)); where the BUG_ON() happens because we woke up the PG_writeback bit becasue of the _previous_ writeback, but a new one had already been started because the clearing of the bit wasn't actually atomic wrt the actual wakeup or serialized by the page lock. The reason this didn't use to happen was that the old logic in waiting on a page bit would just loop if it ever saw the bit set again. The nice proper fix would probably be to get rid of the whole "wait for writeback to clear, and then set it" logic in the writeback path, and replace it with an atomic "wait-to-set" (ie the same as we have for page locking: we set the page lock bit with a single "lock_page()", not with "wait for lock bit to clear and then set it"). However, out current model for writeback is that the waiting for the writeback bit is done by the generic VFS code (ie write_cache_pages()), but the actual setting of the writeback bit is done much later by the filesystem ".writepages()" function. IOW, to make the writeback bit have that same kind of "wait-to-set" behavior as we have for page locking, we'd have to change our roughly ~50 different writeback functions. Painful. Instead, just make "wait_on_page_writeback()" loop on the very unlikely situation that the PG_writeback bit is still set, basically re-instating the old behavior. This is very non-optimal in case of contention, but since we only ever set the bit under the page lock, that situation is controlled. Reported-by: syzbot+2fc0712f8f8b8b8fa0ef@syzkaller.appspotmail.com Fixes: 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic") Acked-by: Hugh Dickins Cc: Andrew Morton Cc: Matthew Wilcox Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 586042472ac90..eb34d204d4ee7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2826,7 +2826,7 @@ EXPORT_SYMBOL(__test_set_page_writeback); */ void wait_on_page_writeback(struct page *page) { - if (PageWriteback(page)) { + while (PageWriteback(page)) { trace_wait_on_page_writeback(page, page_mapping(page)); wait_on_page_bit(page, PG_writeback); } -- GitLab From 8a48c0a3360bf2bf4f40c980d0ec216e770e58ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 4 Jan 2021 19:44:53 -0800 Subject: [PATCH 1422/1894] arch/arc: add copy_user_page() to to fix build error on ARC fs/dax.c uses copy_user_page() but ARC does not provide that interface, resulting in a build error. Provide copy_user_page() in . ../fs/dax.c: In function 'copy_cow_page_dax': ../fs/dax.c:702:2: error: implicit declaration of function 'copy_user_page'; did you mean 'copy_to_user_page'? [-Werror=implicit-function-declaration] Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Dan Williams #Acked-by: Vineet Gupta # v1 Cc: Andrew Morton Cc: Matthew Wilcox Cc: Jan Kara Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org #Reviewed-by: Ira Weiny # v2 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 23e41e890eda7..ad9b7fe4dba36 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) struct vm_area_struct; -- GitLab From f4d9359de8ac0fb64a5ecc9c34833705eb53327b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Nov 2020 13:22:10 -0800 Subject: [PATCH 1423/1894] include/soc: remove headers for EZChip NPS NPS platform has been removed from ARC port and there are no in-tree user of it now . So RIP ! Signed-off-by: Vineet Gupta --- include/soc/nps/common.h | 172 --------------------------------------- include/soc/nps/mtm.h | 59 -------------- 2 files changed, 231 deletions(-) delete mode 100644 include/soc/nps/common.h delete mode 100644 include/soc/nps/mtm.h diff --git a/include/soc/nps/common.h b/include/soc/nps/common.h deleted file mode 100644 index 8c18dc6d3fde5..0000000000000 --- a/include/soc/nps/common.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SOC_NPS_COMMON_H -#define SOC_NPS_COMMON_H - -#ifdef CONFIG_SMP -#define NPS_IPI_IRQ 5 -#endif - -#define NPS_HOST_REG_BASE 0xF6000000 - -#define NPS_MSU_BLKID 0x018 - -#define CTOP_INST_RSPI_GIC_0_R12 0x3C56117E -#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST 0x5B60 -#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM 0x00010422 - -#ifndef AUX_IENABLE -#define AUX_IENABLE 0x40c -#endif - -#define CTOP_AUX_IACK (0xFFFFF800 + 0x088) - -#ifndef __ASSEMBLY__ - -/* In order to increase compilation test coverage */ -#ifdef CONFIG_ARC -static inline void nps_ack_gic(void) -{ - __asm__ __volatile__ ( - " .word %0\n" - : - : "i"(CTOP_INST_RSPI_GIC_0_R12) - : "memory"); -} -#else -static inline void nps_ack_gic(void) { } -#define write_aux_reg(r, v) -#define read_aux_reg(r) 0 -#endif - -/* CPU global ID */ -struct global_id { - union { - struct { -#ifdef CONFIG_EZNPS_MTM_EXT - u32 __reserved:20, cluster:4, core:4, thread:4; -#else - u32 __reserved:24, cluster:4, core:4; -#endif - }; - u32 value; - }; -}; - -/* - * Convert logical to physical CPU IDs - * - * The conversion swap bits 1 and 2 of cluster id (out of 4 bits) - * Now quad of logical clusters id's are adjacent physically, - * and not like the id's physically came with each cluster. - * Below table is 4x4 mesh of core clusters as it layout on chip. - * Cluster ids are in format: logical (physical) - * - * ----------------- ------------------ - * 3 | 5 (3) 7 (7) | | 13 (11) 15 (15)| - * - * 2 | 4 (2) 6 (6) | | 12 (10) 14 (14)| - * ----------------- ------------------ - * 1 | 1 (1) 3 (5) | | 9 (9) 11 (13)| - * - * 0 | 0 (0) 2 (4) | | 8 (8) 10 (12)| - * ----------------- ------------------ - * 0 1 2 3 - */ -static inline int nps_cluster_logic_to_phys(int cluster) -{ -#ifdef __arc__ - __asm__ __volatile__( - " mov r3,%0\n" - " .short %1\n" - " .word %2\n" - " mov %0,r3\n" - : "+r"(cluster) - : "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST), - "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM) - : "r3"); -#endif - - return cluster; -} - -#define NPS_CPU_TO_CLUSTER_NUM(cpu) \ - ({ struct global_id gid; gid.value = cpu; \ - nps_cluster_logic_to_phys(gid.cluster); }) - -struct nps_host_reg_address { - union { - struct { - u32 base:8, cl_x:4, cl_y:4, - blkid:6, reg:8, __reserved:2; - }; - u32 value; - }; -}; - -struct nps_host_reg_address_non_cl { - union { - struct { - u32 base:7, blkid:11, reg:12, __reserved:2; - }; - u32 value; - }; -}; - -static inline void *nps_host_reg_non_cl(u32 blkid, u32 reg) -{ - struct nps_host_reg_address_non_cl reg_address; - - reg_address.value = NPS_HOST_REG_BASE; - reg_address.blkid = blkid; - reg_address.reg = reg; - - return (void *)reg_address.value; -} - -static inline void *nps_host_reg(u32 cpu, u32 blkid, u32 reg) -{ - struct nps_host_reg_address reg_address; - u32 cl = NPS_CPU_TO_CLUSTER_NUM(cpu); - - reg_address.value = NPS_HOST_REG_BASE; - reg_address.cl_x = (cl >> 2) & 0x3; - reg_address.cl_y = cl & 0x3; - reg_address.blkid = blkid; - reg_address.reg = reg; - - return (void *)reg_address.value; -} -#endif /* __ASSEMBLY__ */ - -#endif /* SOC_NPS_COMMON_H */ diff --git a/include/soc/nps/mtm.h b/include/soc/nps/mtm.h deleted file mode 100644 index d2f5e7e3703ef..0000000000000 --- a/include/soc/nps/mtm.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SOC_NPS_MTM_H -#define SOC_NPS_MTM_H - -#define CTOP_INST_HWSCHD_OFF_R3 0x3B6F00BF -#define CTOP_INST_HWSCHD_RESTORE_R3 0x3E6F70C3 - -static inline void hw_schd_save(unsigned int *flags) -{ - __asm__ __volatile__( - " .word %1\n" - " st r3,[%0]\n" - : - : "r"(flags), "i"(CTOP_INST_HWSCHD_OFF_R3) - : "r3", "memory"); -} - -static inline void hw_schd_restore(unsigned int flags) -{ - __asm__ __volatile__( - " mov r3, %0\n" - " .word %1\n" - : - : "r"(flags), "i"(CTOP_INST_HWSCHD_RESTORE_R3) - : "r3"); -} - -#endif /* SOC_NPS_MTM_H */ -- GitLab From 2860d45a589818dd8ffd90cdc4bcf77f36a5a6be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:17 +0100 Subject: [PATCH 1424/1894] qed: select CONFIG_CRC32 Without this, the driver fails to link: lpc_eth.c:(.text+0x1934): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_grc_dump': qed_debug.c:(.text+0x4068): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_idle_chk_dump': qed_debug.c:(.text+0x51fc): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_mcp_trace_dump': qed_debug.c:(.text+0x6000): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_dbg_reg_fifo_dump': qed_debug.c:(.text+0x66cc): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o:qed_debug.c:(.text+0x6aa4): more undefined references to `crc32_le' follow Fixes: 7a4b21b7d1f0 ("qed: Add nvram selftest") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 4366c7a8de951..6b5ddb07ee833 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -78,6 +78,7 @@ config QED depends on PCI select ZLIB_INFLATE select CRC8 + select CRC32 select NET_DEVLINK help This enables the support for Marvell FastLinQ adapters family. -- GitLab From f9d6f94132f01d2a552dcbab54fa56496638186d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:18 +0100 Subject: [PATCH 1425/1894] phy: dp83640: select CONFIG_CRC32 Without crc32, this driver fails to link: arm-linux-gnueabi-ld: drivers/net/phy/dp83640.o: in function `match': dp83640.c:(.text+0x476c): undefined reference to `crc32_le' Fixes: 539e44d26855 ("dp83640: Include hash in timestamp/packet matching") Signed-off-by: Arnd Bergmann Reviewed-by: Andrew Lunn Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 476d7c7fe70a7..d2bf05ccbbe20 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -64,6 +64,7 @@ config DP83640_PHY depends on NETWORK_PHY_TIMESTAMPING depends on PHYLIB depends on PTP_1588_CLOCK + select CRC32 help Supports the DP83640 PHYTER with IEEE 1588 features. -- GitLab From 1d48595c786b1b9dc6be301e8d7f6fc74e9882aa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:19 +0100 Subject: [PATCH 1426/1894] can: kvaser_pciefd: select CONFIG_CRC32 Without crc32, this driver fails to link: arm-linux-gnueabi-ld: drivers/net/can/kvaser_pciefd.o: in function `kvaser_pciefd_probe': kvaser_pciefd.c:(.text+0x2b0): undefined reference to `crc32_be' Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Signed-off-by: Arnd Bergmann Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- drivers/net/can/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 424970939fd4c..1c28eade6becc 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -123,6 +123,7 @@ config CAN_JANZ_ICAN3 config CAN_KVASER_PCIEFD depends on PCI tristate "Kvaser PCIe FD cards" + select CRC32 help This is a driver for the Kvaser PCI Express CAN FD family. -- GitLab From e186620d7bf11b274b985b839c38266d7918cc05 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:20 +0100 Subject: [PATCH 1427/1894] wil6210: select CONFIG_CRC32 Without crc32, the driver fails to link: arm-linux-gnueabi-ld: drivers/net/wireless/ath/wil6210/fw.o: in function `wil_fw_verify': fw.c:(.text+0x74c): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/wireless/ath/wil6210/fw.o:fw.c:(.text+0x758): more undefined references to `crc32_le' follow Fixes: 151a9706503f ("wil6210: firmware download") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/wireless/ath/wil6210/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/wil6210/Kconfig b/drivers/net/wireless/ath/wil6210/Kconfig index 6a95b199bf626..f074e9c31aa22 100644 --- a/drivers/net/wireless/ath/wil6210/Kconfig +++ b/drivers/net/wireless/ath/wil6210/Kconfig @@ -2,6 +2,7 @@ config WIL6210 tristate "Wilocity 60g WiFi card wil6210 support" select WANT_DEV_COREDUMP + select CRC32 depends on CFG80211 depends on PCI default n -- GitLab From 152a8a6c017bfdeda7f6d052fbc6e151891bd9b6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:21 +0100 Subject: [PATCH 1428/1894] cfg80211: select CONFIG_CRC32 Without crc32 support, this fails to link: arm-linux-gnueabi-ld: net/wireless/scan.o: in function `cfg80211_scan_6ghz': scan.c:(.text+0x928): undefined reference to `crc32_le' Fixes: c8cb5b854b40 ("nl80211/cfg80211: support 6 GHz scanning") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/wireless/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 27026f587fa61..f620acd2a0f5e 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -21,6 +21,7 @@ config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL select FW_LOADER + select CRC32 # may need to update this when certificates are changed and are # using a different algorithm, though right now they shouldn't # (this is here rather than below to allow it to be a module) -- GitLab From 51049bd903a81307f751babe15a1df8d197884e8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:22 +0100 Subject: [PATCH 1429/1894] misdn: dsp: select CONFIG_BITREVERSE Without this, we run into a link error arm-linux-gnueabi-ld: drivers/isdn/mISDN/dsp_audio.o: in function `dsp_audio_generate_law_tables': (.text+0x30c): undefined reference to `byte_rev_table' arm-linux-gnueabi-ld: drivers/isdn/mISDN/dsp_audio.o:(.text+0x5e4): more undefined references to `byte_rev_table' follow Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/isdn/mISDN/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig index 26cf0ac9c4ad0..c9a53c2224728 100644 --- a/drivers/isdn/mISDN/Kconfig +++ b/drivers/isdn/mISDN/Kconfig @@ -13,6 +13,7 @@ if MISDN != n config MISDN_DSP tristate "Digital Audio Processing of transparent data" depends on MISDN + select BITREVERSE help Enable support for digital audio processing capability. -- GitLab From 69931e11288520c250152180ecf9b6ac5e6e40ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:23 +0100 Subject: [PATCH 1430/1894] wan: ds26522: select CONFIG_BITREVERSE Without this, the driver runs into a link failure arm-linux-gnueabi-ld: drivers/net/wan/slic_ds26522.o: in function `slic_ds26522_probe': slic_ds26522.c:(.text+0x100c): undefined reference to `byte_rev_table' arm-linux-gnueabi-ld: slic_ds26522.c:(.text+0x1cdc): undefined reference to `byte_rev_table' arm-linux-gnueabi-ld: drivers/net/wan/slic_ds26522.o: in function `slic_write': slic_ds26522.c:(.text+0x1e4c): undefined reference to `byte_rev_table' Fixes: c37d4a0085c5 ("Maxim/driver: Add driver for maxim ds26522") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/wan/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 4029fde71a9e5..83c9481995dd2 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -282,6 +282,7 @@ config SLIC_DS26522 tristate "Slic Maxim ds26522 card support" depends on SPI depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST + select BITREVERSE help This module initializes and configures the slic maxim card in T1 or E1 mode. -- GitLab From 0f7ba7bc46fa0b574ccacf5672991b321e028492 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Jan 2021 11:26:26 +1100 Subject: [PATCH 1431/1894] net/sonic: Fix some resource leaks in error handling paths A call to dma_alloc_coherent() is wrapped by sonic_alloc_descriptors(). This is correctly freed in the remove function, but not in the error handling path of the probe function. Fix this by adding the missing dma_free_coherent() call. While at it, rename a label in order to be slightly more informative. Cc: Christophe JAILLET Cc: Thomas Bogendoerfer Cc: Chris Zankel References: commit 10e3cc180e64 ("net/sonic: Fix a resource leak in an error handling path in 'jazz_sonic_probe()'") Fixes: 74f2a5f0ef64 ("xtensa: Add support for the Sonic Ethernet device for the XT2000 board.") Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Signed-off-by: Christophe JAILLET Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/macsonic.c | 12 ++++++++++-- drivers/net/ethernet/natsemi/xtsonic.c | 7 +++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 776b7d264dc34..2289e1fe37419 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -506,10 +506,14 @@ static int mac_sonic_platform_probe(struct platform_device *pdev) err = register_netdev(dev); if (err) - goto out; + goto undo_probe; return 0; +undo_probe: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); out: free_netdev(dev); @@ -584,12 +588,16 @@ static int mac_sonic_nubus_probe(struct nubus_board *board) err = register_netdev(ndev); if (err) - goto out; + goto undo_probe; nubus_set_drvdata(board, ndev); return 0; +undo_probe: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); out: free_netdev(ndev); return err; diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index afa166ff7aef5..28d9e98db81a8 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -229,11 +229,14 @@ int xtsonic_probe(struct platform_device *pdev) sonic_msg_init(dev); if ((err = register_netdev(dev))) - goto out1; + goto undo_probe1; return 0; -out1: +undo_probe1: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); release_region(dev->base_addr, SONIC_MEM_SIZE); out: free_netdev(dev); -- GitLab From cf0720697143f3eaa0779cca5a6602d8557d1c6f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Dec 2020 19:37:53 -0800 Subject: [PATCH 1432/1894] net: suggest L2 discards be counted towards rx_dropped From the existing definitions it's unclear which stat to use to report filtering based on L2 dst addr in old broadcast-medium Ethernet. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 874cc12a34d9d..82708c6db4322 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -75,8 +75,9 @@ struct rtnl_link_stats { * * @rx_dropped: Number of packets received but not processed, * e.g. due to lack of resources or unsupported protocol. - * For hardware interfaces this counter should not include packets - * dropped by the device which are counted separately in + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in * @rx_missed_errors (since procfs folds those two counters together). * * @tx_dropped: Number of packets dropped on their way to transmission, -- GitLab From 55b7ab1178cbf41f979ff83236d3321ad35ed2ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Dec 2020 19:40:27 -0800 Subject: [PATCH 1433/1894] net: vlan: avoid leaks on register_vlan_dev() failures VLAN checks for NETREG_UNINITIALIZED to distinguish between registration failure and unregistration in progress. Since commit cb626bf566eb ("net-sysfs: Fix reference count leak") registration failure may, however, result in NETREG_UNREGISTERED as well as NETREG_UNINITIALIZED. This fix is similer to cebb69754f37 ("rtnetlink: Fix memory(net_device) leak when ->newlink fails") Fixes: cb626bf566eb ("net-sysfs: Fix reference count leak") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f292e0267bb9e..15bbfaf943fd1 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -284,7 +284,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - if (new_dev->reg_state == NETREG_UNINITIALIZED) + if (new_dev->reg_state == NETREG_UNINITIALIZED || + new_dev->reg_state == NETREG_UNREGISTERED) free_netdev(new_dev); return err; } -- GitLab From 7eeecc4b1f480c7ba1932cb9a7693f8c452640f2 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:41 -0600 Subject: [PATCH 1434/1894] net: stmmac: dwmac-sun8i: Fix probe error handling stmmac_pltfr_remove does three things in one function, making it inapproprate for unwinding the steps in the probe function. Currently, a failure before the call to stmmac_dvr_probe would leak OF node references due to missing a call to stmmac_remove_config_dt. And an error in stmmac_dvr_probe would cause the driver to attempt to remove a netdevice that was never added. Fix these by reordering the init and splitting out the error handling steps. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Fixes: 40a1dcee2d18 ("net: ethernet: dwmac-sun8i: Use the correct function in exit path") Signed-off-by: Samuel Holland Reviewed-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 58e0511badba8..b20f261fce5b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1134,10 +1134,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL); if (!gmac) return -ENOMEM; @@ -1201,11 +1197,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = of_get_phy_mode(dev->of_node, &interface); if (ret) return -EINVAL; - plat_dat->interface = interface; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. */ + plat_dat->interface = interface; plat_dat->rx_coe = STMMAC_RX_COE_TYPE2; plat_dat->tx_coe = 1; plat_dat->has_sun8i = true; @@ -1216,7 +1216,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); if (ret) - return ret; + goto dwmac_deconfig; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -1230,7 +1230,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (gmac->variant->soc_has_internal_phy) { ret = get_ephy_nodes(priv); if (ret) - goto dwmac_exit; + goto dwmac_remove; ret = sun8i_dwmac_register_mdio_mux(priv); if (ret) { dev_err(&pdev->dev, "Failed to register mux\n"); @@ -1239,15 +1239,20 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) } else { ret = sun8i_dwmac_reset(priv); if (ret) - goto dwmac_exit; + goto dwmac_remove; } return ret; dwmac_mux: sun8i_dwmac_unset_syscon(gmac); +dwmac_remove: + stmmac_dvr_remove(&pdev->dev); dwmac_exit: - stmmac_pltfr_remove(pdev); -return ret; + sun8i_dwmac_exit(pdev, gmac); +dwmac_deconfig: + stmmac_remove_config_dt(pdev, plat_dat); + + return ret; } static const struct of_device_id sun8i_dwmac_match[] = { -- GitLab From 529254216773acd5039c07aa18cf06fd1f9fccdd Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:42 -0600 Subject: [PATCH 1435/1894] net: stmmac: dwmac-sun8i: Balance internal PHY resource references While stmmac_pltfr_remove calls sun8i_dwmac_exit, the sun8i_dwmac_init and sun8i_dwmac_exit functions are also called by the stmmac_platform suspend/resume callbacks. They may be called many times during the device's lifetime and should not release resources used by the driver. Furthermore, there was no error handling in case registering the MDIO mux failed during probe, and the EPHY clock was never released at all. Fix all of these issues by moving the deinitialization code to a driver removal callback. Also ensure the EPHY is powered down before removal. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Samuel Holland Reviewed-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index b20f261fce5b5..a05dee5d4584b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1004,17 +1004,12 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) struct sunxi_priv_data *gmac = priv; if (gmac->variant->soc_has_internal_phy) { - /* sun8i_dwmac_exit could be called with mdiomux uninit */ - if (gmac->mux_handle) - mdio_mux_uninit(gmac->mux_handle); if (gmac->internal_phy_powered) sun8i_dwmac_unpower_internal_phy(gmac); } sun8i_dwmac_unset_syscon(gmac); - reset_control_put(gmac->rst_ephy); - clk_disable_unprepare(gmac->tx_clk); if (gmac->regulator) @@ -1244,6 +1239,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) return ret; dwmac_mux: + reset_control_put(gmac->rst_ephy); + clk_put(gmac->ephy_clk); sun8i_dwmac_unset_syscon(gmac); dwmac_remove: stmmac_dvr_remove(&pdev->dev); @@ -1255,6 +1252,24 @@ dwmac_deconfig: return ret; } +static int sun8i_dwmac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + + if (gmac->variant->soc_has_internal_phy) { + mdio_mux_uninit(gmac->mux_handle); + sun8i_dwmac_unpower_internal_phy(gmac); + reset_control_put(gmac->rst_ephy); + clk_put(gmac->ephy_clk); + } + + stmmac_pltfr_remove(pdev); + + return 0; +} + static const struct of_device_id sun8i_dwmac_match[] = { { .compatible = "allwinner,sun8i-h3-emac", .data = &emac_variant_h3 }, @@ -1274,7 +1289,7 @@ MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); static struct platform_driver sun8i_dwmac_driver = { .probe = sun8i_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove = sun8i_dwmac_remove, .driver = { .name = "dwmac-sun8i", .pm = &stmmac_pltfr_pm_ops, -- GitLab From b8239638853e3e37b287e4bd4d57b41f14c78550 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:43 -0600 Subject: [PATCH 1436/1894] net: stmmac: dwmac-sun8i: Balance internal PHY power sun8i_dwmac_exit calls sun8i_dwmac_unpower_internal_phy, but sun8i_dwmac_init did not call sun8i_dwmac_power_internal_phy. This caused PHY power to remain off after a suspend/resume cycle. Fix this by recording if PHY power should be restored, and if so, restoring it. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Samuel Holland Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index a05dee5d4584b..e2c25c1c702a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -64,6 +64,7 @@ struct emac_variant { * @variant: reference to the current board variant * @regmap: regmap for using the syscon * @internal_phy_powered: Does the internal PHY is enabled + * @use_internal_phy: Is the internal PHY selected for use * @mux_handle: Internal pointer used by mdio-mux lib */ struct sunxi_priv_data { @@ -74,6 +75,7 @@ struct sunxi_priv_data { const struct emac_variant *variant; struct regmap_field *regmap_field; bool internal_phy_powered; + bool use_internal_phy; void *mux_handle; }; @@ -539,8 +541,11 @@ static const struct stmmac_dma_ops sun8i_dwmac_dma_ops = { .dma_interrupt = sun8i_dwmac_dma_interrupt, }; +static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv); + static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) { + struct net_device *ndev = platform_get_drvdata(pdev); struct sunxi_priv_data *gmac = priv; int ret; @@ -554,13 +559,25 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) ret = clk_prepare_enable(gmac->tx_clk); if (ret) { - if (gmac->regulator) - regulator_disable(gmac->regulator); dev_err(&pdev->dev, "Could not enable AHB clock\n"); - return ret; + goto err_disable_regulator; + } + + if (gmac->use_internal_phy) { + ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev)); + if (ret) + goto err_disable_clk; } return 0; + +err_disable_clk: + clk_disable_unprepare(gmac->tx_clk); +err_disable_regulator: + if (gmac->regulator) + regulator_disable(gmac->regulator); + + return ret; } static void sun8i_dwmac_core_init(struct mac_device_info *hw, @@ -831,7 +848,6 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, struct sunxi_priv_data *gmac = priv->plat->bsp_priv; u32 reg, val; int ret = 0; - bool need_power_ephy = false; if (current_child ^ desired_child) { regmap_field_read(gmac->regmap_field, ®); @@ -839,13 +855,12 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID: dev_info(priv->device, "Switch mux to internal PHY"); val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SELECT; - - need_power_ephy = true; + gmac->use_internal_phy = true; break; case DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID: dev_info(priv->device, "Switch mux to external PHY"); val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SHUTDOWN; - need_power_ephy = false; + gmac->use_internal_phy = false; break; default: dev_err(priv->device, "Invalid child ID %x\n", @@ -853,7 +868,7 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, return -EINVAL; } regmap_field_write(gmac->regmap_field, val); - if (need_power_ephy) { + if (gmac->use_internal_phy) { ret = sun8i_dwmac_power_internal_phy(priv); if (ret) return ret; -- GitLab From 9b1e39cf5dd81f33186cdb950fcf75a121f1a9a7 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:44 -0600 Subject: [PATCH 1437/1894] net: stmmac: dwmac-sun8i: Balance syscon (de)initialization Previously, sun8i_dwmac_set_syscon was called from a chain of functions in several different files: sun8i_dwmac_probe stmmac_dvr_probe stmmac_hw_init stmmac_hwif_init sun8i_dwmac_setup sun8i_dwmac_set_syscon which made the lifetime of the syscon values hard to reason about. Part of the problem is that there is no similar platform driver callback from stmmac_dvr_remove. As a result, the driver unset the syscon value in sun8i_dwmac_exit, but this leaves it uninitialized after a suspend/ resume cycle. It was also unset a second time (outside sun8i_dwmac_exit) in the probe error path. Move the init to the earliest available place in sun8i_dwmac_probe (after stmmac_probe_config_dt, which initializes plat_dat), and the deinit to the corresponding position in the cleanup order. Since priv is not filled in until stmmac_dvr_probe, this requires changing the sun8i_dwmac_set_syscon parameters to priv's two relevant members. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Samuel Holland Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index e2c25c1c702a7..a5e0eff4a3874 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -898,22 +898,23 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) return ret; } -static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) +static int sun8i_dwmac_set_syscon(struct device *dev, + struct plat_stmmacenet_data *plat) { - struct sunxi_priv_data *gmac = priv->plat->bsp_priv; - struct device_node *node = priv->device->of_node; + struct sunxi_priv_data *gmac = plat->bsp_priv; + struct device_node *node = dev->of_node; int ret; u32 reg, val; ret = regmap_field_read(gmac->regmap_field, &val); if (ret) { - dev_err(priv->device, "Fail to read from regmap field.\n"); + dev_err(dev, "Fail to read from regmap field.\n"); return ret; } reg = gmac->variant->default_syscon_value; if (reg != val) - dev_warn(priv->device, + dev_warn(dev, "Current syscon value is not the default %x (expect %x)\n", val, reg); @@ -926,9 +927,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) /* Force EPHY xtal frequency to 24MHz. */ reg |= H3_EPHY_CLK_SEL; - ret = of_mdio_parse_addr(priv->device, priv->plat->phy_node); + ret = of_mdio_parse_addr(dev, plat->phy_node); if (ret < 0) { - dev_err(priv->device, "Could not parse MDIO addr\n"); + dev_err(dev, "Could not parse MDIO addr\n"); return ret; } /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY @@ -944,17 +945,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { if (val % 100) { - dev_err(priv->device, "tx-delay must be a multiple of 100\n"); + dev_err(dev, "tx-delay must be a multiple of 100\n"); return -EINVAL; } val /= 100; - dev_dbg(priv->device, "set tx-delay to %x\n", val); + dev_dbg(dev, "set tx-delay to %x\n", val); if (val <= gmac->variant->tx_delay_max) { reg &= ~(gmac->variant->tx_delay_max << SYSCON_ETXDC_SHIFT); reg |= (val << SYSCON_ETXDC_SHIFT); } else { - dev_err(priv->device, "Invalid TX clock delay: %d\n", + dev_err(dev, "Invalid TX clock delay: %d\n", val); return -EINVAL; } @@ -962,17 +963,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (!of_property_read_u32(node, "allwinner,rx-delay-ps", &val)) { if (val % 100) { - dev_err(priv->device, "rx-delay must be a multiple of 100\n"); + dev_err(dev, "rx-delay must be a multiple of 100\n"); return -EINVAL; } val /= 100; - dev_dbg(priv->device, "set rx-delay to %x\n", val); + dev_dbg(dev, "set rx-delay to %x\n", val); if (val <= gmac->variant->rx_delay_max) { reg &= ~(gmac->variant->rx_delay_max << SYSCON_ERXDC_SHIFT); reg |= (val << SYSCON_ERXDC_SHIFT); } else { - dev_err(priv->device, "Invalid RX clock delay: %d\n", + dev_err(dev, "Invalid RX clock delay: %d\n", val); return -EINVAL; } @@ -983,7 +984,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (gmac->variant->support_rmii) reg &= ~SYSCON_RMII_EN; - switch (priv->plat->interface) { + switch (plat->interface) { case PHY_INTERFACE_MODE_MII: /* default */ break; @@ -997,8 +998,8 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) reg |= SYSCON_RMII_EN | SYSCON_ETCS_EXT_GMII; break; default: - dev_err(priv->device, "Unsupported interface mode: %s", - phy_modes(priv->plat->interface)); + dev_err(dev, "Unsupported interface mode: %s", + phy_modes(plat->interface)); return -EINVAL; } @@ -1023,8 +1024,6 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) sun8i_dwmac_unpower_internal_phy(gmac); } - sun8i_dwmac_unset_syscon(gmac); - clk_disable_unprepare(gmac->tx_clk); if (gmac->regulator) @@ -1059,16 +1058,11 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) { struct mac_device_info *mac; struct stmmac_priv *priv = ppriv; - int ret; mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL); if (!mac) return NULL; - ret = sun8i_dwmac_set_syscon(priv); - if (ret) - return NULL; - mac->pcsr = priv->ioaddr; mac->mac = &sun8i_dwmac_ops; mac->dma = &sun8i_dwmac_dma_ops; @@ -1224,10 +1218,14 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) plat_dat->exit = sun8i_dwmac_exit; plat_dat->setup = sun8i_dwmac_setup; - ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); + ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat); if (ret) goto dwmac_deconfig; + ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); + if (ret) + goto dwmac_syscon; + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) goto dwmac_exit; @@ -1256,11 +1254,12 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) dwmac_mux: reset_control_put(gmac->rst_ephy); clk_put(gmac->ephy_clk); - sun8i_dwmac_unset_syscon(gmac); dwmac_remove: stmmac_dvr_remove(&pdev->dev); dwmac_exit: sun8i_dwmac_exit(pdev, gmac); +dwmac_syscon: + sun8i_dwmac_unset_syscon(gmac); dwmac_deconfig: stmmac_remove_config_dt(pdev, plat_dat); @@ -1281,6 +1280,7 @@ static int sun8i_dwmac_remove(struct platform_device *pdev) } stmmac_pltfr_remove(pdev); + sun8i_dwmac_unset_syscon(gmac); return 0; } -- GitLab From 9f9d41f03bb07069e6e83ff4720cfea74a63898d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Jan 2021 17:22:24 -0800 Subject: [PATCH 1438/1894] docs: net: fix documentation on .ndo_get_stats Fix calling context. Signed-off-by: Jakub Kicinski Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- Documentation/networking/netdevices.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 5a85fcc80c765..e65665c5ab501 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -64,8 +64,8 @@ ndo_do_ioctl: Context: process ndo_get_stats: - Synchronization: dev_base_lock rwlock. - Context: nominally process, but don't sleep inside an rwlock + Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU. + Context: atomic (can't sleep under rwlock or RCU) ndo_start_xmit: Synchronization: __netif_tx_lock spinlock. -- GitLab From f04bbcbf1e38d192e94bbfa126731a52332c40b1 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Tue, 5 Jan 2021 11:37:26 +0800 Subject: [PATCH 1439/1894] net: hns3: fix a phy loopback fail issue When phy driver does not implement the set_loopback interface, phy loopback test will return -EOPNOTSUPP, and the loopback test will fail. So when phy driver does not implement the set_loopback interface, don't do phy loopback test. Fixes: c9765a89d142 ("net: hns3: add phy selftest function") Signed-off-by: Yonglong Liu Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e6f37f91c489d..135bd0a0dcaf3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -752,7 +752,8 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset) handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK; handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK; - if (hdev->hw.mac.phydev) { + if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && + hdev->hw.mac.phydev->drv->set_loopback) { count += 1; handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK; } -- GitLab From 65e61e3c2a619c4d4b873885b2d5394025ed117b Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 5 Jan 2021 11:37:27 +0800 Subject: [PATCH 1440/1894] net: hns3: fix the number of queues actually used by ARQ HCLGE_MBX_MAX_ARQ_MSG_NUM is used to apply memory for the number of queues used by ARQ(Asynchronous Receive Queue), so the head and tail pointers should also use this macro. Fixes: 07a0556a3a73 ("net: hns3: Changes to support ARQ(Asynchronous Receive Queue)") Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index fb5e8842983c2..33defa4c180ae 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -169,7 +169,7 @@ struct hclgevf_mbx_arq_ring { #define hclge_mbx_ring_ptr_move_crq(crq) \ (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num) #define hclge_mbx_tail_ptr_move_arq(arq) \ - (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) + (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #define hclge_mbx_head_ptr_move_arq(arq) \ - (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) + (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #endif -- GitLab From ab6e32d2913a594bc8f822ce4a75c400190b2ecc Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 5 Jan 2021 11:37:28 +0800 Subject: [PATCH 1441/1894] net: hns3: fix incorrect handling of sctp6 rss tuple For DEVICE_VERSION_V2, the hardware only supports src-ip, dst-ip and verification-tag for rss tuple set of sctp6 packet. For DEVICE_VERSION_V3, the hardware supports src-port and dst-port as well. Currently, when user queries the sctp6 rss tuples info, some unsupported information will be showed on V2. So add a check for hardware version when initializing and queries sctp6 rss tuple to fix this issue. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 ++ .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 9 ++++++--- .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 ++ 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 135bd0a0dcaf3..c242883fea5db 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4538,8 +4538,8 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, req->ipv4_sctp_en = tuple_sets; break; case SCTP_V6_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 && + (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3))) return -EINVAL; req->ipv6_sctp_en = tuple_sets; @@ -4731,6 +4731,8 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev) vport[i].rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; vport[i].rss_tuple_sets.ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : HCLGE_RSS_INPUT_TUPLE_SCTP; vport[i].rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 50a294dfaff50..ca46bc9110d7d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -107,6 +107,8 @@ #define HCLGE_D_IP_BIT BIT(2) #define HCLGE_S_IP_BIT BIT(3) #define HCLGE_V_TAG_BIT BIT(4) +#define HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT \ + (HCLGE_D_IP_BIT | HCLGE_S_IP_BIT | HCLGE_V_TAG_BIT) #define HCLGE_RSS_TC_SIZE_0 1 #define HCLGE_RSS_TC_SIZE_1 2 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 145757cb70f9f..674b3a22e91fe 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -917,8 +917,8 @@ static int hclgevf_set_rss_tuple(struct hnae3_handle *handle, req->ipv4_sctp_en = tuple_sets; break; case SCTP_V6_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 && + (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3))) return -EINVAL; req->ipv6_sctp_en = tuple_sets; @@ -2502,7 +2502,10 @@ static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) tuple_sets->ipv4_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; tuple_sets->ipv6_tcp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; tuple_sets->ipv6_udp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; - tuple_sets->ipv6_sctp_en = HCLGEVF_RSS_INPUT_TUPLE_SCTP; + tuple_sets->ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT : + HCLGEVF_RSS_INPUT_TUPLE_SCTP; tuple_sets->ipv6_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 1b183bc35604b..f6d817a3edcb3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -122,6 +122,8 @@ #define HCLGEVF_D_IP_BIT BIT(2) #define HCLGEVF_S_IP_BIT BIT(3) #define HCLGEVF_V_TAG_BIT BIT(4) +#define HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT \ + (HCLGEVF_D_IP_BIT | HCLGEVF_S_IP_BIT | HCLGEVF_V_TAG_BIT) #define HCLGEVF_STATS_TIMER_INTERVAL 36U -- GitLab From 7a68d725e4ea384977445e0bcaed3d7de83ab5b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 5 Jan 2021 06:52:49 +0200 Subject: [PATCH 1442/1894] net: cdc_ncm: correct overhead in delayed_ndp_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligning to tx_ndp_modulus is not sufficient because the next align call can be cdc_ncm_align_tail, which can add up to ctx->tx_modulus + ctx->tx_remainder - 1 bytes. This used to lead to occasional crashes on a Huawei 909s-120 LTE module as follows: - the condition marked /* if there is a remaining skb [...] */ is true so the swaps happen - skb_out is set from ctx->tx_curr_skb - skb_out->len is exactly 0x3f52 - ctx->tx_curr_size is 0x4000 and delayed_ndp_size is 0xac (note that the sum of skb_out->len and delayed_ndp_size is 0x3ffe) - the for loop over n is executed once - the cdc_ncm_align_tail call marked /* align beginning of next frame */ increases skb_out->len to 0x3f56 (the sum is now 0x4002) - the condition marked /* check if we had enough room left [...] */ is false so we break out of the loop - the condition marked /* If requested, put NDP at end of frame. */ is true so the NDP is written into skb_out - now skb_out->len is 0x4002, so padding_count is minus two interpreted as an unsigned number, which is used as the length argument to memset, leading to a crash with various symptoms but usually including > Call Trace: > > cdc_ncm_fill_tx_frame+0x83a/0x970 [cdc_ncm] > cdc_mbim_tx_fixup+0x1d9/0x240 [cdc_mbim] > usbnet_start_xmit+0x5d/0x720 [usbnet] The cdc_ncm_align_tail call first aligns on a ctx->tx_modulus boundary (adding at most ctx->tx_modulus-1 bytes), then adds ctx->tx_remainder bytes. Alternatively, the next alignment call can occur in cdc_ncm_ndp16 or cdc_ncm_ndp32, in which case at most ctx->tx_ndp_modulus-1 bytes are added. A similar problem has occurred before, and the code is nontrivial to reason about, so add a guard before the crashing call. By that time it is too late to prevent any memory corruption (we'll have written past the end of the buffer already) but we can at least try to get a warning written into an on-disk log by avoiding the hard crash caused by padding past the buffer with a huge number of zeros. Signed-off-by: Jouni K. Seppänen Fixes: 4a0e3e989d66 ("cdc_ncm: Add support for moving NDP to end of NCM frame") Link: https://bugzilla.kernel.org/show_bug.cgi?id=209407 Reported-by: kernel test robot Reviewed-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 3b816a4731f29..5a78848db93fd 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1199,7 +1199,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * accordingly. Otherwise, we should check here. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) - delayed_ndp_size = ALIGN(ctx->max_ndp_size, ctx->tx_ndp_modulus); + delayed_ndp_size = ctx->max_ndp_size + + max_t(u32, + ctx->tx_ndp_modulus, + ctx->tx_modulus + ctx->tx_remainder) - 1; else delayed_ndp_size = 0; @@ -1410,7 +1413,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && skb_out->len > ctx->min_tx_pkt) { padding_count = ctx->tx_curr_size - skb_out->len; - skb_put_zero(skb_out, padding_count); + if (!WARN_ON(padding_count > ctx->tx_curr_size)) + skb_put_zero(skb_out, padding_count); } else if (skb_out->len < ctx->tx_curr_size && (skb_out->len % dev->maxpacket) == 0) { skb_put_u8(skb_out, 0); /* force short packet */ -- GitLab From 4beb17e553b49c3dd74505c9f361e756aaae653e Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Tue, 5 Jan 2021 13:57:54 +0800 Subject: [PATCH 1443/1894] net: qrtr: fix null-ptr-deref in qrtr_ns_remove A null-ptr-deref bug is reported by Hulk Robot like this: -------------- KASAN: null-ptr-deref in range [0x0000000000000128-0x000000000000012f] Call Trace: qrtr_ns_remove+0x22/0x40 [ns] qrtr_proto_fini+0xa/0x31 [qrtr] __x64_sys_delete_module+0x337/0x4e0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x468ded -------------- When qrtr_ns_init fails in qrtr_proto_init, qrtr_ns_remove which would be called later on would raise a null-ptr-deref because qrtr_ns.workqueue has been destroyed. Fix it by making qrtr_ns_init have a return value and adding a check in qrtr_proto_init. Reported-by: Hulk Robot Signed-off-by: Qinglang Miao Signed-off-by: David S. Miller --- net/qrtr/ns.c | 7 ++++--- net/qrtr/qrtr.c | 16 +++++++++++----- net/qrtr/qrtr.h | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 56aaf8cb6527e..8d00dfe8139e8 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -755,7 +755,7 @@ static void qrtr_ns_data_ready(struct sock *sk) queue_work(qrtr_ns.workqueue, &qrtr_ns.work); } -void qrtr_ns_init(void) +int qrtr_ns_init(void) { struct sockaddr_qrtr sq; int ret; @@ -766,7 +766,7 @@ void qrtr_ns_init(void) ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM, PF_QIPCRTR, &qrtr_ns.sock); if (ret < 0) - return; + return ret; ret = kernel_getsockname(qrtr_ns.sock, (struct sockaddr *)&sq); if (ret < 0) { @@ -797,12 +797,13 @@ void qrtr_ns_init(void) if (ret < 0) goto err_wq; - return; + return 0; err_wq: destroy_workqueue(qrtr_ns.workqueue); err_sock: sock_release(qrtr_ns.sock); + return ret; } EXPORT_SYMBOL_GPL(qrtr_ns_init); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index f4ab3ca6d73b3..b34358282f379 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1287,13 +1287,19 @@ static int __init qrtr_proto_init(void) return rc; rc = sock_register(&qrtr_family); - if (rc) { - proto_unregister(&qrtr_proto); - return rc; - } + if (rc) + goto err_proto; - qrtr_ns_init(); + rc = qrtr_ns_init(); + if (rc) + goto err_sock; + return 0; + +err_sock: + sock_unregister(qrtr_family.family); +err_proto: + proto_unregister(&qrtr_proto); return rc; } postcore_initcall(qrtr_proto_init); diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h index dc2b67f179271..3f2d28696062a 100644 --- a/net/qrtr/qrtr.h +++ b/net/qrtr/qrtr.h @@ -29,7 +29,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); -void qrtr_ns_init(void); +int qrtr_ns_init(void); void qrtr_ns_remove(void); -- GitLab From 445c6198fe7be03b7d38e66fe8d4b3187bc251d4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Jan 2021 20:15:15 +1100 Subject: [PATCH 1444/1894] net: ethernet: fs_enet: Add missing MODULE_LICENSE Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE() into error") the ppc32_allmodconfig build fails with: ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o Add the missing MODULE_LICENSEs to fix the build. Both files include a copyright header indicating they are GPL v2. Signed-off-by: Michael Ellerman Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c | 1 + drivers/net/ethernet/freescale/fs_enet/mii-fec.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c index c8e5d889bd81f..21de56345503f 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c @@ -223,3 +223,4 @@ static struct platform_driver fs_enet_bb_mdio_driver = { }; module_platform_driver(fs_enet_bb_mdio_driver); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index 8b51ee142fa3c..152f4d83765aa 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -224,3 +224,4 @@ static struct platform_driver fs_enet_fec_mdio_driver = { }; module_platform_driver(fs_enet_fec_mdio_driver); +MODULE_LICENSE("GPL"); -- GitLab From 3503ee6c0bec5f173d606359e6384a5ef85492fb Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Tue, 5 Jan 2021 18:17:40 +0800 Subject: [PATCH 1445/1894] selftests: fix the return value for UDP GRO test The udpgro.sh will always return 0 (unless the bpf selftest was not build first) even if there are some failed sub test-cases. Therefore the kselftest framework will report this case is OK. Check and return the exit status of each test to make it easier to spot real failures. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index ac2a30be9b325..f8a19f548ae9d 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,14 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +# set global exit status, but never reset nonzero one. +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -44,7 +52,9 @@ run_one() { # Hack: let bg programs complete the startup sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_test() { @@ -87,8 +97,10 @@ run_one_nat() { sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? kill -INT $pid wait $(jobs -p) + return $ret } run_one_2sock() { @@ -110,7 +122,9 @@ run_one_2sock() { sleep 0.1 # first UDP GSO socket should be closed at this point ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_nat_test() { @@ -131,36 +145,54 @@ run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + ret=0 echo "ipv4" run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + check_err $? # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) # when GRO does not take place run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + check_err $? # the GSO packets are aggregated because: # * veth schedule napi after each xmit # * segmentation happens in BH context, veth napi poll is delayed after # the transmission of the last segment run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + check_err $? run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + check_err $? run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + check_err $? run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + check_err $? run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + check_err $? run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + check_err $? run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" + check_err $? + return $ret } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -180,3 +212,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then shift run_one_2sock $@ fi + +exit $? -- GitLab From 67208692802ce3cacfa00fe586dc0cb1bef0a51c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Jan 2021 00:42:19 +0100 Subject: [PATCH 1446/1894] tools/resolve_btfids: Warn when having multiple IDs for single type The kernel image can contain multiple types (structs/unions) with the same name. This causes distinct type hierarchies in BTF data and makes resolve_btfids fail with error like: BTFIDS vmlinux FAILED unresolved symbol udp6_sock as reported by Qais Yousef [1]. This change adds warning when multiple types of the same name are detected: BTFIDS vmlinux WARN: multiple IDs found for 'file': 526, 113351 - using 526 WARN: multiple IDs found for 'sk_buff': 2744, 113958 - using 2744 We keep the lower ID for the given type instance and let the build continue. Also changing the 'nr' variable name to 'nr_types' to avoid confusion. [1] https://lore.kernel.org/lkml/20201229151352.6hzmjvu3qh6p2qgg@e107158-lin/ Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210105234219.970039-1-jolsa@kernel.org --- tools/bpf/resolve_btfids/main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index e3ea569ee1253..7409d7860aa6c 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -139,6 +139,8 @@ int eprintf(int level, int var, const char *fmt, ...) #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) static bool is_btf_id(const char *name) { @@ -472,7 +474,7 @@ static int symbols_resolve(struct object *obj) int nr_funcs = obj->nr_funcs; int err, type_id; struct btf *btf; - __u32 nr; + __u32 nr_types; btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); @@ -483,12 +485,12 @@ static int symbols_resolve(struct object *obj) } err = -1; - nr = btf__get_nr_types(btf); + nr_types = btf__get_nr_types(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id <= nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -526,8 +528,13 @@ static int symbols_resolve(struct object *obj) id = btf_id__find(root, str); if (id) { - id->id = type_id; - (*nr)--; + if (id->id) { + pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", + str, id->id, type_id, id->id); + } else { + id->id = type_id; + (*nr)--; + } } } -- GitLab From 1d53864c3617f5235f891ca0fbe9347c4cd35d46 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 22 Dec 2020 15:29:04 +0800 Subject: [PATCH 1447/1894] scsi: ufs: Fix possible power drain during system suspend Currently if device needs to do flush or BKOP operations, the device VCC power is kept during runtime-suspend period. However, if system suspend is happening while device is runtime-suspended, such power may not be disabled successfully. The reasons may be, 1. If current PM level is the same as SPM level, device will keep runtime-suspended by ufshcd_system_suspend(). 2. Flush recheck work may not be scheduled successfully during system suspend period. If it can wake up the system, this is also not the intention of the recheck work. To fix this issue, simply runtime-resume the device if the flush is allowed during runtime suspend period. Flush capability will be disabled while leaving runtime suspend, and also not be allowed in system suspend period. Link: https://lore.kernel.org/r/20201222072905.32221-2-stanley.chu@mediatek.com Fixes: 51dd905bd2f6 ("scsi: ufs: Fix WriteBooster flush during runtime suspend") Reviewed-by: Chaotian Jing Reviewed-by: Can Guo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 82ad31781bc9e..9db348650f17c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8938,7 +8938,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba) if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) == hba->curr_dev_pwr_mode) && (ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) == - hba->uic_link_state)) + hba->uic_link_state) && + !hba->dev_info.b_rpm_dev_flush_capable) goto out; if (pm_runtime_suspended(hba->dev)) { -- GitLab From 21acf4601cc63cf564c6fc1a74d81b191313c929 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 22 Dec 2020 15:29:05 +0800 Subject: [PATCH 1448/1894] scsi: ufs: Relax the condition of UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL is intended to skip enabling fWriteBoosterBufferFlushEn while WriteBooster is initializing. Therefore it is better to apply the checking during WriteBooster initialization only. Link: https://lore.kernel.org/r/20201222072905.32221-3-stanley.chu@mediatek.com Reviewed-by: Can Guo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9db348650f17c..04ab6f2ccac6d 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -289,7 +289,8 @@ static inline void ufshcd_wb_config(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: En WB flush during H8: failed: %d\n", __func__, ret); - ufshcd_wb_toggle_flush(hba, true); + if (!(hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL)) + ufshcd_wb_toggle_flush(hba, true); } static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba) @@ -5436,9 +5437,6 @@ static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) { - if (hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL) - return; - if (enable) ufshcd_wb_buf_flush_enable(hba); else -- GitLab From 4ceb06e7c336f4a8d3f3b6ac9a4fea2e9c97dc07 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Tue, 1 Dec 2020 14:03:29 +0800 Subject: [PATCH 1449/1894] drm/i915/gvt: Fix vfio_edid issue for BXT/APL BXT/APL has different isr/irr/hpd regs compared with other GEN9. If not setting these regs bits correctly according to the emulated monitor (currently a DP on PORT_B), although gvt still triggers a virtual HPD event, the guest driver won't detect a valid HPD pulse thus no full display detection will be executed to read the updated EDID. With this patch, the vfio_edid is enabled again on BXT/APL, which is previously disabled. Fixes: 642403e3599e ("drm/i915/gvt: Temporarily disable vfio_edid for BXT/APL") Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20201201060329.142375-1-colin.xu@intel.com Reviewed-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 81 +++++++++++++++++++++--------- drivers/gpu/drm/i915/gvt/vgpu.c | 5 +- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index a15f875396576..62a5b0dd2003b 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -217,6 +217,15 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) DDI_BUF_CTL_ENABLE); vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE; } + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTA_HOTPLUG_ENABLE | PORTA_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTB_HOTPLUG_ENABLE | PORTB_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTC_HOTPLUG_ENABLE | PORTC_HOTPLUG_STATUS_MASK); + /* No hpd_invert set in vgpu vbt, need to clear invert mask */ + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= ~BXT_DDI_HPD_INVERT_MASK; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HOTPLUG_MASK; vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1)); vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &= @@ -273,6 +282,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } @@ -301,6 +312,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } @@ -329,6 +342,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); } @@ -661,44 +676,62 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) PORTD_HOTPLUG_STATUS_MASK; intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); } else if (IS_BROXTON(i915)) { - if (connected) { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + } else { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIB_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTA_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_A_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIC_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= - GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); - } - } else { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + SFUSE_STRAP_DDIB_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIB_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) &= - ~SFUSE_STRAP_DDIC_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTB_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { + if (connected) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= + SFUSE_STRAP_DDIC_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) &= + ~SFUSE_STRAP_DDIC_DETECTED; } + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTC_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_C_HOTPLUG); } - vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= - PORTB_HOTPLUG_STATUS_MASK; - intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); } } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e49944fde3339..cbe5931906e0a 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -437,10 +437,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_BROXTON(dev_priv)) ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B); - /* FixMe: Re-enable APL/BXT once vfio_edid enabled */ - else if (!IS_BROXTON(dev_priv)) + else ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); if (ret) goto out_clean_sched_policy; -- GitLab From 6948a96a0d69b7e8203758f44849ce4ab06ff788 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Sat, 19 Dec 2020 15:40:39 +0900 Subject: [PATCH 1450/1894] scsi: ufs: Relocate flush of exceptional event The current flush location does not guarantee disabling BKOPS for the case of requesting device power off. 1) The exceptional event handler is queued 2) ufs suspend starts with a request of device power off 3) BKOPS is disabled in ufs suspend 4) The queued work for the handler is done and BKOPS is re-enabled Relocate the flush statement to ensure BKOPS remain disabled. Link: https://lore.kernel.org/r/1608360039-16390-1-git-send-email-kwmad.kim@samsung.com Reviewed-by: Can Guo Reviewed-by: Stanley Chu Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 04ab6f2ccac6d..00ee8cabf8e00 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8696,6 +8696,8 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_wb_need_flush(hba)); } + flush_work(&hba->eeh_work); + if (req_dev_pwr_mode != hba->curr_dev_pwr_mode) { if (!ufshcd_is_runtime_pm(pm_op)) /* ensure that bkops is disabled */ @@ -8708,8 +8710,6 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) } } - flush_work(&hba->eeh_work); - /* * In the case of DeepSleep, the device is expected to remain powered * with the link off, so do not check for bkops. -- GitLab From 35fc4cd34426c242ab015ef280853b7bff101f48 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 28 Dec 2020 04:04:36 -0800 Subject: [PATCH 1451/1894] scsi: ufs: Correct the LUN used in eh_device_reset_handler() callback Users can initiate resets to specific SCSI device/target/host through IOCTL. When this happens, the SCSI cmd passed to eh_device/target/host _reset_handler() callbacks is initialized with a request whose tag is -1. In this case it is not right for eh_device_reset_handler() callback to count on the LUN get from hba->lrb[-1]. Fix it by getting LUN from the SCSI device associated with the SCSI cmd. Link: https://lore.kernel.org/r/1609157080-26283-1-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Reviewed-by: Stanley Chu Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 00ee8cabf8e00..e31d2c5c7b23b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6659,19 +6659,16 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) { struct Scsi_Host *host; struct ufs_hba *hba; - unsigned int tag; u32 pos; int err; - u8 resp = 0xF; - struct ufshcd_lrb *lrbp; + u8 resp = 0xF, lun; unsigned long flags; host = cmd->device->host; hba = shost_priv(host); - tag = cmd->request->tag; - lrbp = &hba->lrb[tag]; - err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp); + lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); + err = ufshcd_issue_tm_cmd(hba, lun, 0, UFS_LOGICAL_RESET, &resp); if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) { if (!err) err = resp; @@ -6680,7 +6677,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) /* clear the commands that were pending for corresponding LUN */ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { - if (hba->lrb[pos].lun == lrbp->lun) { + if (hba->lrb[pos].lun == lun) { err = ufshcd_clear_cmd(hba, pos); if (err) break; -- GitLab From d50c7986fbf0e2167279e110a2ed5bd8e811c660 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 17 Dec 2020 02:51:44 -0800 Subject: [PATCH 1452/1894] scsi: qedi: Correct max length of CHAP secret The CHAP secret displayed garbage characters causing iSCSI login authentication failure. Correct the CHAP password max length. Link: https://lore.kernel.org/r/20201217105144.8055-1-njavali@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index f5fc7f518f8af..47ad64b066236 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2245,7 +2245,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: @@ -2253,7 +2253,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: -- GitLab From 39718fe7adb1a79f78be23f058299bc038cbe161 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Dec 2020 17:20:19 +0000 Subject: [PATCH 1453/1894] scsi: mpt3sas: Fix spelling mistake in Kconfig "compatiblity" -> "compatibility" There is a spelling mistake in the Kconfig help text. Fix it. Link: https://lore.kernel.org/r/20201217172019.57768-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig index 86209455172d6..c299f7e078fb9 100644 --- a/drivers/scsi/mpt3sas/Kconfig +++ b/drivers/scsi/mpt3sas/Kconfig @@ -79,5 +79,5 @@ config SCSI_MPT2SAS select SCSI_MPT3SAS depends on PCI && SCSI help - Dummy config option for backwards compatiblity: configure the MPT3SAS + Dummy config option for backwards compatibility: configure the MPT3SAS driver instead. -- GitLab From 3b01d7ea4dae907d34fa0eeb3f17bacd714c6d0c Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 14:15:03 +0800 Subject: [PATCH 1454/1894] scsi: scsi_debug: Fix memleak in scsi_debug_init() When sdeb_zbc_model does not match BLK_ZONED_NONE, BLK_ZONED_HA or BLK_ZONED_HM, we should free sdebug_q_arr to prevent memleak. Also there is no need to execute sdebug_erase_store() on failure of sdeb_zbc_model_str(). Link: https://lore.kernel.org/r/20201226061503.20050-1-dinghao.liu@zju.edu.cn Acked-by: Douglas Gilbert Signed-off-by: Dinghao Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 24c0f7ec03511..4a08c450b756f 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6740,7 +6740,7 @@ static int __init scsi_debug_init(void) k = sdeb_zbc_model_str(sdeb_zbc_model_s); if (k < 0) { ret = k; - goto free_vm; + goto free_q_arr; } sdeb_zbc_model = k; switch (sdeb_zbc_model) { @@ -6753,7 +6753,8 @@ static int __init scsi_debug_init(void) break; default: pr_err("Invalid ZBC model\n"); - return -EINVAL; + ret = -EINVAL; + goto free_q_arr; } } if (sdeb_zbc_model != BLK_ZONED_NONE) { -- GitLab From e5cc9002caafacbaa8dab878d17a313192c3b03b Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 7 Dec 2020 17:10:21 -0500 Subject: [PATCH 1455/1894] scsi: sd: Suppress spurious errors when WRITE SAME is being disabled The block layer code will split a large zeroout request into multiple bios and if WRITE SAME is disabled because the storage device reports that it does not support it (or support the length used), we can get an error message from the block layer despite the setting of RQF_QUIET on the first request. This is because more than one request may have already been submitted. Fix this by setting RQF_QUIET when BLK_STS_TARGET is returned to fail the request early, we don't need to log a message because we did not actually submit the command to the device, and the block layer code will handle the error by submitting individual write bios. Link: https://lore.kernel.org/r/20201207221021.28243-1-emilne@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 679c2c0250476..b766ad54e4a54 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -984,8 +984,10 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) } } - if (sdp->no_write_same) + if (sdp->no_write_same) { + rq->rq_flags |= RQF_QUIET; return BLK_STS_TARGET; + } if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); -- GitLab From be2553358cd40c0db11d1aa96f819c07413b2aae Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 14 Dec 2020 10:54:24 +0100 Subject: [PATCH 1456/1894] scsi: sd: Remove obsolete variable in sd_remove() Commit 996e509bbc95 ("sd: use __register_blkdev to avoid a modprobe for an unregistered dev_t") removed blk_register_region(devt, ...) in sd_remove() and since then, devt is unused in sd_remove(). Hence, make W=1 warns: drivers/scsi/sd.c:3516:8: warning: variable 'devt' set but not used [-Wunused-but-set-variable] Simply remove this obsolete variable. [mkp: fixed commit sha] Link: https://lore.kernel.org/r/20201214095424.12479-1-lukas.bulwahn@gmail.com Reviewed-by: Christoph Hellwig Reviewed-by: Nathan Chancellor Acked-by: Martin K. Petersen Signed-off-by: Lukas Bulwahn Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b766ad54e4a54..a3d2d4bc4a3dc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3512,10 +3512,8 @@ static int sd_probe(struct device *dev) static int sd_remove(struct device *dev) { struct scsi_disk *sdkp; - dev_t devt; sdkp = dev_get_drvdata(dev); - devt = disk_devt(sdkp->disk); scsi_autopm_get_device(sdkp->device); async_synchronize_full_domain(&scsi_sd_pm_domain); -- GitLab From 19fce0470f05031e6af36e49ce222d0f0050d432 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 1 Dec 2020 17:52:43 -0800 Subject: [PATCH 1457/1894] nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios from interrupt context Recent patches changed calling sequences. nvme_fc_abort_outstanding_ios used to be called from a timeout or work context. Now it is being called in an io completion context, which can be an interrupt handler. Unfortunately, the abort outstanding ios routine attempts to stop nvme queues and nested routines that may try to sleep, which is in conflict with the interrupt handler. Correct replacing the direct call with a work element scheduling, and the abort outstanding ios routine will be called in the work element. Fixes: 95ced8a2c72d ("nvme-fc: eliminate terminate_io use by nvme_fc_error_recovery") Signed-off-by: James Smart Reported-by: Daniel Wagner Tested-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 38373a0e86efb..5f36cfa8136c0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -166,6 +166,7 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; + struct work_struct ioerr_work; struct delayed_work connect_work; struct kref ref; @@ -1888,6 +1889,15 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, } } +static void +nvme_fc_ctrl_ioerr_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ioerr_work); + + nvme_fc_error_recovery(ctrl, "transport detected io error"); +} + static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { @@ -2046,7 +2056,7 @@ done: check_error: if (terminate_assoc) - nvme_fc_error_recovery(ctrl, "transport detected io error"); + queue_work(nvme_reset_wq, &ctrl->ioerr_work); } static int @@ -3233,6 +3243,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + cancel_work_sync(&ctrl->ioerr_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -3449,6 +3460,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3540,6 +3552,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ioerr_work); cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); -- GitLab From 2b54996b7d56badc563755840838614f2fa9c4de Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 7 Dec 2020 12:29:40 -0800 Subject: [PATCH 1458/1894] nvme-fcloop: Fix sscanf type and list_first_entry_or_null warnings Kernel robot had the following warnings: >> fcloop.c:1506:6: warning: %x in format string (no. 1) requires >> 'unsigned int *' but the argument type is 'signed int *'. >> [invalidScanfArgType_int] >> if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) >> ^ Resolve by changing opcode from and int to an unsigned int and >> fcloop.c:1632:32: warning: Uninitialized variable: lport [uninitvar] >> ret = __wait_localport_unreg(lport); >> ^ >> fcloop.c:1615:28: warning: Uninitialized variable: nport [uninitvar] >> ret = __remoteport_unreg(nport, rport); >> ^ These aren't actual issues as the values are assigned prior to use. It appears the tool doesn't understand list_first_entry_or_null(). Regardless, quiet the tool by initializing the pointers to NULL at declaration. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 733d9363900e4..68213f0a052bb 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1501,7 +1501,8 @@ static ssize_t fcloop_set_cmd_drop(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int opcode, starting, amount; + unsigned int opcode; + int starting, amount; if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) return -EBADRQC; @@ -1588,8 +1589,8 @@ out_destroy_class: static void __exit fcloop_exit(void) { - struct fcloop_lport *lport; - struct fcloop_nport *nport; + struct fcloop_lport *lport = NULL; + struct fcloop_nport *nport = NULL; struct fcloop_tport *tport; struct fcloop_rport *rport; unsigned long flags; -- GitLab From 7ee5c78ca3895d44e918c38332921983ed678be0 Mon Sep 17 00:00:00 2001 From: Gopal Tiwari Date: Fri, 4 Dec 2020 21:46:57 +0530 Subject: [PATCH 1459/1894] nvme-pci: mark Samsung PM1725a as IGNORE_DEV_SUBNQN A system with more than one of these SSDs will only have one usable. Hence the kernel fails to detect nvme devices due to duplicate cntlids. [ 6.274554] nvme nvme1: Duplicate cntlid 33 with nvme0, rejecting [ 6.274566] nvme nvme1: Removing after probe failure status: -22 Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue. Signed-off-by: Gopal Tiwari Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b4385cb0ff609..553871e6962b8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3196,7 +3196,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ -- GitLab From 5c11f7d9f843bdd24cd29b95401938bc3f168070 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 21 Dec 2020 00:03:39 -0800 Subject: [PATCH 1460/1894] nvme-tcp: Fix possible race of io_work and direct send We may send a request (with or without its data) from two paths: 1. From our I/O context nvme_tcp_io_work which is triggered from: - queue_rq - r2t reception - socket data_ready and write_space callbacks 2. Directly from queue_rq if the send_list is empty (because we want to save the context switch associated with scheduling our io_work). However, given that now we have the send_mutex, we may run into a race condition where none of these contexts will send the pending payload to the controller. Both io_work send path and queue_rq send path opportunistically attempt to acquire the send_mutex however queue_rq only attempts to send a single request, and if io_work context fails to acquire the send_mutex it will complete without rescheduling itself. The race can trigger with the following sequence: 1. queue_rq sends request (no incapsule data) and blocks 2. RX path receives r2t - prepares data PDU to send, adds h2cdata PDU to the send_list and schedules io_work 3. io_work triggers and cannot acquire the send_mutex - because of (1), ends without self rescheduling 4. queue_rq completes the send, and completes ==> no context will send the h2cdata - timeout. Fix this by having queue_rq sending as much as it can from the send_list such that if it still has any left, its because the socket buffer is full and the socket write_space callback will trigger, thus guaranteeing that a context will be scheduled to send the h2cdata PDU. Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Reported-by: Potnuri Bharat Teja Reported-by: Samuel Jones Signed-off-by: Sagi Grimberg Tested-by: Potnuri Bharat Teja Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1ba6599274427..979ee31b8dd1c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -262,6 +262,16 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } +static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) +{ + int ret; + + /* drain the send queue as much as we can... */ + do { + ret = nvme_tcp_try_send(queue); + } while (ret > 0); +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -279,7 +289,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, if (queue->io_cpu == smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; - nvme_tcp_try_send(queue); + nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); } else if (last) { -- GitLab From 62df80165d7f197c9c0652e7416164f294a96661 Mon Sep 17 00:00:00 2001 From: Lalithambika Krishnakumar Date: Wed, 23 Dec 2020 14:09:00 -0800 Subject: [PATCH 1461/1894] nvme: avoid possible double fetch in handling CQE While handling the completion queue, keep a local copy of the command id from the DMA-accessible completion entry. This silences a time-of-check to time-of-use (TOCTOU) warning from KF/x[1], with respect to a Thunderclap[2] vulnerability analysis. The double-read impact appears benign. There may be a theoretical window for @command_id to be used as an adversary-controlled array-index-value for mounting a speculative execution attack, but that mitigation is saved for a potential follow-on. A man-in-the-middle attack on the data payload is out of scope for this analysis and is hopefully mitigated by filesystem integrity mechanisms. [1] https://github.com/intel/kernel-fuzzer-for-xen-project [2] http://thunderclap.io/thunderclap-paper-ndss2019.pdf Signed-off-by: Lalithambika Krishna Kumar Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 553871e6962b8..50d9a20568a28 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -967,6 +967,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { struct nvme_completion *cqe = &nvmeq->cqes[idx]; + __u16 command_id = READ_ONCE(cqe->command_id); struct request *req; /* @@ -975,17 +976,17 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) { + if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; } - req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id); if (unlikely(!req)) { dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", - cqe->command_id, le16_to_cpu(cqe->sq_id)); + command_id, le16_to_cpu(cqe->sq_id)); return; } -- GitLab From 9b66fc02bec0ca613bc6d4c1d0049f727a95567d Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 30 Dec 2020 20:22:44 +0900 Subject: [PATCH 1462/1894] nvme: unexport functions with no external caller There are no callers for nvme_reset_ctrl_sync() and nvme_alloc_request_qid() so that we keep the symbols exported. Unexport those functions, mark them static and update the header file respectively. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ++---- drivers/nvme/host/nvme.h | 3 --- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ce1b615194413..70a63d7c1d028 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -179,7 +179,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_reset_ctrl); -int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) +static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) { int ret; @@ -192,7 +192,6 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) return ret; } -EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) { @@ -578,7 +577,7 @@ struct request *nvme_alloc_request(struct request_queue *q, } EXPORT_SYMBOL_GPL(nvme_alloc_request); -struct request *nvme_alloc_request_qid(struct request_queue *q, +static struct request *nvme_alloc_request_qid(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) { struct request *req; @@ -589,7 +588,6 @@ struct request *nvme_alloc_request_qid(struct request_queue *q, nvme_init_request(req, cmd); return req; } -EXPORT_SYMBOL_GPL(nvme_alloc_request_qid); static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7e49f61f81df8..9c4fbfe44c006 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -610,8 +610,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags); -struct request *nvme_alloc_request_qid(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); @@ -630,7 +628,6 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); -int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); -- GitLab From 9ceb7863537748c67fa43ac4f2f565819bbd36e4 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 5 Jan 2021 10:46:54 +0200 Subject: [PATCH 1463/1894] nvmet-rdma: Fix list_del corruption on queue establishment failure When a queue is in NVMET_RDMA_Q_CONNECTING state, it may has some requests at rsp_wait_list. In case a disconnect occurs at this state, no one will empty this list and will return the requests to free_rsps list. Normally nvmet_rdma_queue_established() free those requests after moving the queue to NVMET_RDMA_Q_LIVE state, but in this case __nvmet_rdma_queue_disconnect() is called before. The crash happens at nvmet_rdma_free_rsps() when calling list_del(&rsp->free_list), because the request exists only at the wait list. To fix the issue, simply clear rsp_wait_list when destroying the queue. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5c1e7cb7fe0de..bdfc22eb2a10f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1641,6 +1641,16 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) spin_lock_irqsave(&queue->state_lock, flags); switch (queue->state) { case NVMET_RDMA_Q_CONNECTING: + while (!list_empty(&queue->rsp_wait_list)) { + struct nvmet_rdma_rsp *rsp; + + rsp = list_first_entry(&queue->rsp_wait_list, + struct nvmet_rdma_rsp, + wait_list); + list_del(&rsp->wait_list); + nvmet_rdma_put_rsp(rsp); + } + fallthrough; case NVMET_RDMA_Q_LIVE: queue->state = NVMET_RDMA_Q_DISCONNECTING; disconnect = true; -- GitLab From 2b59787a223b79228fed9ade1bf6936194ddb8cd Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 5 Jan 2021 10:34:02 +0000 Subject: [PATCH 1464/1894] nvme: remove the unused status argument from nvme_trace_bio_complete The only used argument in this function is the "req". Signed-off-by: Max Gurtovoy Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/nvme.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 70a63d7c1d028..f320273fc6727 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -330,7 +330,7 @@ static inline void nvme_end_req(struct request *req) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); - nvme_trace_bio_complete(req, status); + nvme_trace_bio_complete(req); blk_mq_end_request(req, status); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9c4fbfe44c006..88a6b97247f50 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -672,8 +672,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) kblockd_schedule_work(&head->requeue_work); } -static inline void nvme_trace_bio_complete(struct request *req, - blk_status_t status) +static inline void nvme_trace_bio_complete(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -728,8 +727,7 @@ static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) { } -static inline void nvme_trace_bio_complete(struct request *req, - blk_status_t status) +static inline void nvme_trace_bio_complete(struct request *req) { } static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, -- GitLab From 3ce47d95b7346dcafd9bed3556a8d072cb2b8571 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 4 Jan 2021 13:59:53 -0700 Subject: [PATCH 1465/1894] powerpc: Handle .text.{hot,unlikely}.* in linker script Commit eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input sections") added ".text.unlikely.*" and ".text.hot.*" due to an LLVM change [1]. After another LLVM change [2], these sections are seen in some PowerPC builds, where there is a orphan section warning then build failure: $ make -skj"$(nproc)" \ ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- LLVM=1 O=out \ distclean powernv_defconfig zImage.epapr ld.lld: warning: kernel/built-in.a(panic.o):(.text.unlikely.) is being placed in '.text.unlikely.' ... ld.lld: warning: address (0xc000000000009314) of section .text is not a multiple of alignment (256) ... ERROR: start_text address is c000000000009400, should be c000000000008000 ERROR: try to enable LD_HEAD_STUB_CATCH config option ERROR: see comments in arch/powerpc/tools/head_check.sh ... Explicitly handle these sections like in the main linker script so there is no more build failure. [1]: https://reviews.llvm.org/D79600 [2]: https://reviews.llvm.org/D92493 Fixes: 83a092cf95f2 ("powerpc: Link warning for orphan sections") Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1218 Link: https://lore.kernel.org/r/20210104205952.1399409-1-natechancellor@gmail.com --- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0318ba436f34e..8e0b1298bf19b 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS ALIGN_FUNCTION(); #endif /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); + *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif -- GitLab From ad0a6bad44758afa3b440c254a24999a0c7e35d5 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 5 Jan 2021 17:50:43 +0000 Subject: [PATCH 1466/1894] x86/hyperv: check cpu mask after interrupt has been disabled We've observed crashes due to an empty cpu mask in hyperv_flush_tlb_others. Obviously the cpu mask in question is changed between the cpumask_empty call at the beginning of the function and when it is actually used later. One theory is that an interrupt comes in between and a code path ends up changing the mask. Move the check after interrupt has been disabled to see if it fixes the issue. Signed-off-by: Wei Liu Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210105175043.28325-1-wei.liu@kernel.org Reviewed-by: Michael Kelley --- arch/x86/hyperv/mmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 5208ba49c89a9..2c87350c1fb09 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -66,11 +66,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, if (!hv_hypercall_pg) goto do_native; - if (cpumask_empty(cpus)) - return; - local_irq_save(flags); + /* + * Only check the mask _after_ interrupt has been disabled to avoid the + * mask changing under our feet. + */ + if (cpumask_empty(cpus)) { + local_irq_restore(flags); + return; + } + flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); -- GitLab From cb7f4a8b1fb426a175d1708f05581939c61329d4 Mon Sep 17 00:00:00 2001 From: Ying-Tsun Huang Date: Tue, 15 Dec 2020 15:07:20 +0800 Subject: [PATCH 1467/1894] x86/mtrr: Correct the range check before performing MTRR type lookups In mtrr_type_lookup(), if the input memory address region is not in the MTRR, over 4GB, and not over the top of memory, a write-back attribute is returned. These condition checks are for ensuring the input memory address region is actually mapped to the physical memory. However, if the end address is just aligned with the top of memory, the condition check treats the address is over the top of memory, and write-back attribute is not returned. And this hits in a real use case with NVDIMM: the nd_pmem module tries to map NVDIMMs as cacheable memories when NVDIMMs are connected. If a NVDIMM is the last of the DIMMs, the performance of this NVDIMM becomes very low since it is aligned with the top of memory and its memory type is uncached-minus. Move the input end address change to inclusive up into mtrr_type_lookup(), before checking for the top of memory in either mtrr_type_lookup_{variable,fixed}() helpers. [ bp: Massage commit message. ] Fixes: 0cc705f56e40 ("x86/mm/mtrr: Clean up mtrr_type_lookup()") Signed-off-by: Ying-Tsun Huang Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201215070721.4349-1-ying-tsun.huang@amd.com --- arch/x86/kernel/cpu/mtrr/generic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 23ad8e953dfb1..a29997e6cf9e6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, *repeat = 0; *uniform = 1; - /* Make end inclusive instead of exclusive */ - end--; - prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) int repeat; u64 partial_end; + /* Make end inclusive instead of exclusive */ + end--; + if (!mtrr_state_set) return MTRR_TYPE_INVALID; -- GitLab From 3e2224c5867fead6c0b94b84727cc676ac6353a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 6 Jan 2021 16:09:26 +0000 Subject: [PATCH 1468/1894] io_uring: Fix return value from alloc_fixed_file_ref_node alloc_fixed_file_ref_node() currently returns an ERR_PTR on failure. io_sqe_files_unregister() expects it to return NULL and since it can only return -ENOMEM, it makes more sense to change alloc_fixed_file_ref_node() to behave that way. Fixes: 1ffc54220c44 ("io_uring: fix io_sqe_files_unregister() hangs") Reported-by: Dan Carpenter Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index dc92ca5090a39..27a8c226abf8c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7696,12 +7696,12 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); if (!ref_node) - return ERR_PTR(-ENOMEM); + return NULL; if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero, 0, GFP_KERNEL)) { kfree(ref_node); - return ERR_PTR(-ENOMEM); + return NULL; } INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->file_list); @@ -7795,9 +7795,9 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, } ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) { + if (!ref_node) { io_sqe_files_unregister(ctx); - return PTR_ERR(ref_node); + return -ENOMEM; } io_sqe_files_set_node(file_data, ref_node); @@ -7897,8 +7897,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, return -EINVAL; ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) - return PTR_ERR(ref_node); + if (!ref_node) + return -ENOMEM; done = 0; fds = u64_to_user_ptr(up->fds); -- GitLab From 00b8c557d096f0930d5c07df618223d3d06902d6 Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Wed, 6 Jan 2021 15:52:01 +0000 Subject: [PATCH 1469/1894] staging: ION: remove some references to CONFIG_ION With commit e722a295cf49 ("staging: ion: remove from the tree"), ION and its corresponding config CONFIG_ION is gone. Remove stale references from drivers/staging/media/atomisp/pci and from the recommended Android kernel config. Fixes: e722a295cf49 ("staging: ion: remove from the tree") Cc: Hridya Valsaraju Cc: Rob Herring Cc: linux-media@vger.kernel.org Cc: devel@driverdev.osuosl.org Signed-off-by: Matthias Maennich Link: https://lore.kernel.org/r/20210106155201.2845319-1-maennich@google.com Signed-off-by: Greg Kroah-Hartman --- .../media/atomisp/pci/atomisp_subdev.c | 20 ------------------- kernel/configs/android-recommended.config | 1 - 2 files changed, 21 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c index 52b9fb18c87f0..b666cb23e5ca1 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c @@ -1062,26 +1062,6 @@ static const struct v4l2_ctrl_config ctrl_select_isp_version = { .def = 0, }; -#if 0 /* #ifdef CONFIG_ION */ -/* - * Control for ISP ion device fd - * - * userspace will open ion device and pass the fd to kernel. - * this fd will be used to map shared fd to buffer. - */ -/* V4L2_CID_ATOMISP_ION_DEVICE_FD is not defined */ -static const struct v4l2_ctrl_config ctrl_ion_dev_fd = { - .ops = &ctrl_ops, - .id = V4L2_CID_ATOMISP_ION_DEVICE_FD, - .type = V4L2_CTRL_TYPE_INTEGER, - .name = "Ion Device Fd", - .min = -1, - .max = 1024, - .step = 1, - .def = ION_FD_UNSET -}; -#endif - static void atomisp_init_subdev_pipe(struct atomisp_sub_device *asd, struct atomisp_video_pipe *pipe, enum v4l2_buf_type buf_type) { diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 53d688bdd894c..eb0029c9a6a63 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -81,7 +81,6 @@ CONFIG_INPUT_JOYSTICK=y CONFIG_INPUT_MISC=y CONFIG_INPUT_TABLET=y CONFIG_INPUT_UINPUT=y -CONFIG_ION=y CONFIG_JOYSTICK_XPAD=y CONFIG_JOYSTICK_XPAD_FF=y CONFIG_JOYSTICK_XPAD_LEDS=y -- GitLab From 3d1a90ab0ed93362ec8ac85cf291243c87260c21 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 11 Dec 2020 05:12:51 -0500 Subject: [PATCH 1470/1894] NFS4: Fix use-after-free in trace_event_raw_event_nfs4_set_lock It is only safe to call the tracepoint before rpc_put_task() because 'data' is freed inside nfs4_lock_release (rpc_release). Fixes: 48c9579a1afe ("Adding stateid information to tracepoints") Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0ce04e0e5d829..14acd2f791072 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7111,9 +7111,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.new_lock_owner, ret); } else data->cancelled = true; + trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); dprintk("%s: done, ret = %d!\n", __func__, ret); - trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); return ret; } -- GitLab From 0e61f09af48beb41be0954e7be7d3ba2d18c9946 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Mon, 14 Dec 2020 17:05:55 +0800 Subject: [PATCH 1471/1894] drm/amd/pm: correct the sensor value of power for vangogh This patch is to correct the sensor value of power for vangogh. Signed-off-by: Xiaojian Du Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 8cb4fcee9a2c3..5c1482d4ca43e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -252,7 +252,8 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, *value = metrics->UvdActivity; break; case METRICS_AVERAGE_SOCKETPOWER: - *value = metrics->CurrentSocketPower; + *value = (metrics->CurrentSocketPower << 8) / + 1000 ; break; case METRICS_TEMPERATURE_EDGE: *value = metrics->GfxTemperature / 100 * -- GitLab From 37030aba0f362cf8b16eb2347c7430b2e9ef719e Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Fri, 18 Dec 2020 14:32:02 +0800 Subject: [PATCH 1472/1894] drm/amd/pm: improve the fine grain tuning function for RV/RV2/PCO This patch is to improve the fine grain tuning function for RV/RV2/PCO. This patch adds two new commands: "restore" and "commit". This function uses the pp_od_clk_voltage sysfs file to configure the min and max value of gfx clock frequency manually or restore the default value. Command guide: echo "s level value" > pp_od_clk_voltage "s" - set the sclk frequency "level" - 0 or 1, "0" represents the min value, "1" represents the max value "value" - the target value of sclk frequency, it should be limited in the safe range echo "r" > pp_od_clk_voltage "r" - reset the sclk frequency, restore the default value instantly echo "c" > pp_od_clk_voltage "c" - commit the min and max value of sclk frequency to the system only after the commit command, the target values set by "s" command will take effect. Example: 1)change power profile from "auto" to "manual" $ cat power_dpm_force_performance_level auto $ echo "manual" > power_dpm_force_performance_level $ cat power_dpm_force_performance_level manual 2)check the default sclk frequency $ cat pp_od_clk_voltage OD_SCLK: 0: 200Mhz 1: 1400Mhz OD_RANGE: SCLK: 200MHz 1400MHz 3)use "s" -- set command to configure the min and max sclk frequency $ echo "s 0 600" > pp_od_clk_voltage $ echo "s 1 1000" > pp_od_clk_voltage $ echo "c" > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 600Mhz 1: 1000Mhz OD_RANGE: SCLK: 200MHz 1400MHz 4)use "r" -- reset command to restore the min or max sclk frequency $ echo "r" > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 200Mhz 1: 1400Mhz OD_RANGE: SCLK: 200MHz 1400MHz Signed-off-by: Xiaojian Du Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 114 +++++++++++++++--- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h | 1 + 2 files changed, 98 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index e57e64bbacdc2..0cf899566e31c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -251,7 +251,7 @@ static int smu10_set_hard_min_gfxclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cl smu10_data->gfx_actual_soft_min_freq = clock; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - smu10_data->gfx_actual_soft_min_freq, + clock, NULL); } return 0; @@ -948,6 +948,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu10_voltage_dependency_table *mclk_table = data->clock_vol_info.vdd_dep_on_fclk; uint32_t i, now, size = 0; + uint32_t min_freq, max_freq = 0; + uint32_t ret = 0; switch (type) { case PP_SCLK: @@ -983,18 +985,28 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; + size = sprintf(buf, "%s:\n", "OD_SCLK"); size += sprintf(buf + size, "0: %10uMhz\n", - (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : data->gfx_min_freq_limit/100); - size += sprintf(buf + size, "1: %10uMhz\n", data->gfx_max_freq_limit/100); + (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq); + size += sprintf(buf + size, "1: %10uMhz\n", + (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq); } break; case OD_RANGE: if (hwmgr->od_enabled) { - uint32_t min_freq, max_freq = 0; - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; size = sprintf(buf, "%s:\n", "OD_RANGE"); size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", @@ -1414,23 +1426,91 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type, long *input, uint32_t size) { + uint32_t min_freq, max_freq = 0; + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + int ret = 0; + if (!hwmgr->od_enabled) { pr_err("Fine grain not support\n"); return -EINVAL; } - if (size != 2) { - pr_err("Input parameter number not correct\n"); - return -EINVAL; - } - if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { - if (input[0] == 0) - smu10_set_hard_min_gfxclk_by_freq(hwmgr, input[1]); - else if (input[0] == 1) - smu10_set_soft_max_gfxclk_by_freq(hwmgr, input[1]); - else + if (size != 2) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (input[0] == 0) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (input[1] < min_freq) { + pr_err("Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n", + input[1], min_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_min_freq = input[1]; + } else if (input[0] == 1) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (input[1] > max_freq) { + pr_err("Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n", + input[1], max_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_max_freq = input[1]; + } else { + return -EINVAL; + } + } else if (type == PP_OD_RESTORE_DEFAULT_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + + smu10_data->gfx_actual_soft_min_freq = min_freq; + smu10_data->gfx_actual_soft_max_freq = max_freq; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + max_freq, + NULL); + if (ret) + return ret; + } else if (type == PP_OD_COMMIT_DPM_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (smu10_data->gfx_actual_soft_min_freq > smu10_data->gfx_actual_soft_max_freq) { + pr_err("The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + smu10_data->gfx_actual_soft_min_freq, smu10_data->gfx_actual_soft_max_freq); return -EINVAL; + } + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + smu10_data->gfx_actual_soft_min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + smu10_data->gfx_actual_soft_max_freq, + NULL); + if (ret) + return ret; + } else { + return -EINVAL; } return 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 6c9b5f060902b..28d86d354d50b 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -283,6 +283,7 @@ struct smu10_hwmgr { uint32_t vclk_soft_min; uint32_t dclk_soft_min; uint32_t gfx_actual_soft_min_freq; + uint32_t gfx_actual_soft_max_freq; uint32_t gfx_min_freq_limit; uint32_t gfx_max_freq_limit; /* in 10Khz*/ -- GitLab From fc996f952df1c63b57e3a08ac612db53bf8abadc Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 25 Dec 2020 12:22:51 +0800 Subject: [PATCH 1473/1894] drm/amd/pm: updated PM to I2C controller port on sienna cichlid sienna cichlid interfaces with RAS eeprom on I2C controller port 1 Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 9608745d732fb..12b36eb0ff6a5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2372,7 +2372,7 @@ static void sienna_cichlid_fill_i2c_req(SwI2cRequest_t *req, bool write, { int i; - req->I2CcontrollerPort = 0; + req->I2CcontrollerPort = 1; req->I2CSpeed = 2; req->SlaveAddress = address; req->NumCmds = numbytes; -- GitLab From a7b5d9dd57298333e6e9f4c167f01385d922bbfb Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 29 Dec 2020 14:10:28 +0800 Subject: [PATCH 1474/1894] drm/amd/display: fix sysfs amdgpu_current_backlight_pwm NULL pointer issue fix NULL pointer issue when read sysfs amdgpu_current_backlight_pwm sysfs node. Call Trace: [ 248.273833] BUG: kernel NULL pointer dereference, address: 0000000000000130 [ 248.273930] #PF: supervisor read access in kernel mode [ 248.273993] #PF: error_code(0x0000) - not-present page [ 248.274054] PGD 0 P4D 0 [ 248.274092] Oops: 0000 [#1] SMP PTI [ 248.274138] CPU: 2 PID: 1377 Comm: cat Tainted: G OE 5.9.0-rc5-drm-next-5.9+ #1 [ 248.274233] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 3802 03/15/2018 [ 248.274641] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu] [ 248.274718] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01 [ 248.274919] RSP: 0018:ffffb5ad809b3df0 EFLAGS: 00010203 [ 248.274982] RAX: ffffa0f77d1c0010 RBX: ffffa0f793ae9168 RCX: 0000000000000001 [ 248.275064] RDX: ffffa0f79753db00 RSI: 0000000000000001 RDI: 0000000000000000 [ 248.275145] RBP: ffffb5ad809b3e00 R08: ffffb5ad809b3da0 R09: 0000000000000000 [ 248.275225] R10: ffffb5ad809b3e68 R11: 0000000000000000 R12: ffffa0f793ae9190 [ 248.275306] R13: ffffb5ad809b3ef0 R14: 0000000000000001 R15: ffffa0f793ae9168 [ 248.275388] FS: 00007f5f1ec4d540(0000) GS:ffffa0f79ec80000(0000) knlGS:0000000000000000 [ 248.275480] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 248.275547] CR2: 0000000000000130 CR3: 000000042a03c005 CR4: 00000000003706e0 [ 248.275628] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 248.275708] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 248.275789] Call Trace: [ 248.276124] ? current_backlight_read+0x24/0x40 [amdgpu] [ 248.276194] seq_read+0xc3/0x3f0 [ 248.276240] full_proxy_read+0x5c/0x90 [ 248.276290] vfs_read+0xa7/0x190 [ 248.276334] ksys_read+0xa7/0xe0 [ 248.276379] __x64_sys_read+0x1a/0x20 [ 248.276429] do_syscall_64+0x37/0x80 [ 248.276477] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 248.276538] RIP: 0033:0x7f5f1e75c191 [ 248.276585] Code: fe ff ff 48 8d 3d b7 9d 0a 00 48 83 ec 08 e8 46 4d 02 00 66 0f 1f 44 00 00 48 8d 05 71 07 2e 00 8b 00 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53Hw [ 248.276784] RSP: 002b:00007ffcb1fc3f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 248.276872] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f5f1e75c191 [ 248.276953] RDX: 0000000000020000 RSI: 00007f5f1ec2b000 RDI: 0000000000000003 [ 248.277034] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 [ 248.277115] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f5f1ec2b000 [ 248.277195] R13: 0000000000000003 R14: 00007f5f1ec2b00f R15: 0000000000020000 [ 248.277279] Modules linked in: amdgpu(OE) iommu_v2 gpu_sched ttm(OE) drm_kms_helper cec drm i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache nls_iso8859_1 snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic ledtrig_audio intel_rapl_msr intel_rapl_common snd_hda_intel snd_intel_dspcfg x86_pkg_temp_thermal intel_powerclamp snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event mei_hdcp coretemp snd_rawmidi snd_seq kvm_intel kvm snd_seq_device snd_timer irqbypass joydev snd input_leds soundcore crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper rapl intel_cstate mac_hid mei_me serio_raw mei eeepc_wmi wmi_bmof asus_wmi mxm_wmi intel_wmi_thunderbolt acpi_pad sparse_keymap efi_pstore sch_fq_codel parport_pc ppdev lp parport sunrpc ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid e1000e psmouse ahci libahci wmi video [ 248.278211] CR2: 0000000000000130 [ 248.278221] ---[ end trace 1fbe72fe6f91091d ]--- [ 248.357226] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu] [ 248.357272] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01 Signed-off-by: Kevin Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9e1071b2181ff..f4a2088ab1792 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2487,9 +2487,14 @@ enum dc_status dc_link_validate_mode_timing( static struct abm *get_abm_from_stream_res(const struct dc_link *link) { int i; - struct dc *dc = link->ctx->dc; + struct dc *dc = NULL; struct abm *abm = NULL; + if (!link || !link->ctx) + return NULL; + + dc = link->ctx->dc; + for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i]; struct dc_stream_state *stream = pipe_ctx.stream; -- GitLab From 8ae291cc95e49011b736b641b0cfad502b7a1526 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Jan 2021 13:13:27 +0200 Subject: [PATCH 1475/1894] RDMA/ucma: Do not miss ctx destruction steps in some cases The destruction flow is very complicated here because the cm_id can be destroyed from the event handler at any time if the device is hot-removed. This leaves behind a partial ctx with no cm_id in the xarray, and will let user space leak memory. Make everything consistent in this flow in all places: - Return the xarray back to XA_ZERO_ENTRY before beginning any destruction. The thread that reaches this first is responsible to kfree, everyone else does nothing. - Test the xarray during the special hot-removal case to block the queue_work, this has much simpler locking and doesn't require a 'destroying' - Fix the ref initialization so that it is only positive if cm_id != NULL, then rely on that to guide the destruction process in all cases. Now the new ucma_destroy_private_ctx() can be called in all places that want to free the ctx, including all the error unwinds, and none of the details are missed. Fixes: a1d33b70dbbc ("RDMA/ucma: Rework how new connections are passed through event delivery") Link: https://lore.kernel.org/r/20210105111327.230270-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 135 ++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 63 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 7dab9a27a145a..da2512c30ffd5 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,8 +95,6 @@ struct ucma_context { u64 uid; struct list_head list; - /* sync between removal event and id destroy, protected by file mut */ - int destroying; struct work_struct close_work; }; @@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table); static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; -static int __destroy_id(struct ucma_context *ctx); +static int ucma_destroy_private_ctx(struct ucma_context *ctx); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) @@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work) /* once all inflight tasks are finished, we close all underlying * resources. The context is still alive till its explicit destryoing - * by its creator. + * by its creator. This puts back the xarray's reference. */ ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); - /* - * At this point ctx->ref is zero so the only place the ctx can be is in - * a uevent or in __destroy_id(). Since the former doesn't touch - * ctx->cm_id and the latter sync cancels this, there is no races with - * this store. - */ + /* Reading the cm_id without holding a positive ref is not allowed */ ctx->cm_id = NULL; } @@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); - refcount_set(&ctx->ref, 1); init_completion(&ctx->comp); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); @@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return ctx; } +static void ucma_set_ctx_cm_id(struct ucma_context *ctx, + struct rdma_cm_id *cm_id) +{ + refcount_set(&ctx->ref, 1); + ctx->cm_id = cm_id; +} + static void ucma_finish_ctx(struct ucma_context *ctx) { lockdep_assert_held(&ctx->file->mut); @@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, ctx = ucma_alloc_ctx(listen_ctx->file); if (!ctx) goto err_backlog; - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); uevent = ucma_create_uevent(listen_ctx, event); if (!uevent) @@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, return 0; err_alloc: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); err_backlog: atomic_inc(&listen_ctx->backlog); /* Returning error causes the new ID to be destroyed */ @@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, wake_up_interruptible(&ctx->file->poll_wait); } - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) - queue_work(system_unbound_wq, &ctx->close_work); + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { + xa_lock(&ctx_table); + if (xa_load(&ctx_table, ctx->id) == ctx) + queue_work(system_unbound_wq, &ctx->close_work); + xa_unlock(&ctx_table); + } return 0; } @@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ret = PTR_ERR(cm_id); goto err1; } - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); resp.id = ctx->id; if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + ucma_destroy_private_ctx(ctx); return -EFAULT; } @@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return 0; err1: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); return ret; } @@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) rdma_unlock_handler(mc->ctx->cm_id); } -/* - * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At - * this point, no new events will be reported from the hardware. However, we - * still need to cleanup the UCMA context for this ID. Specifically, there - * might be events that have not yet been consumed by the user space software. - * mutex. After that we release them as needed. - */ -static int ucma_free_ctx(struct ucma_context *ctx) +static int ucma_cleanup_ctx_events(struct ucma_context *ctx) { int events_reported; struct ucma_event *uevent, *tmp; LIST_HEAD(list); - ucma_cleanup_multicast(ctx); - - /* Cleanup events not yet reported to the user. */ + /* Cleanup events not yet reported to the user.*/ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) + if (uevent->ctx != ctx) + continue; + + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && + xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, + uevent->conn_req_ctx, XA_ZERO_ENTRY, + GFP_KERNEL) == uevent->conn_req_ctx) { list_move_tail(&uevent->list, &list); + continue; + } + list_del(&uevent->list); + kfree(uevent); } list_del(&ctx->list); events_reported = ctx->events_reported; mutex_unlock(&ctx->file->mut); /* - * If this was a listening ID then any connections spawned from it - * that have not been delivered to userspace are cleaned up too. - * Must be done outside any locks. + * If this was a listening ID then any connections spawned from it that + * have not been delivered to userspace are cleaned up too. Must be done + * outside any locks. */ list_for_each_entry_safe(uevent, tmp, &list, list) { - list_del(&uevent->list); - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && - uevent->conn_req_ctx != ctx) - __destroy_id(uevent->conn_req_ctx); + ucma_destroy_private_ctx(uevent->conn_req_ctx); kfree(uevent); } - - mutex_destroy(&ctx->mutex); - kfree(ctx); return events_reported; } -static int __destroy_id(struct ucma_context *ctx) +/* + * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie + * the ctx is not public to the user). This either because: + * - ucma_finish_ctx() hasn't been called + * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) + */ +static int ucma_destroy_private_ctx(struct ucma_context *ctx) { + int events_reported; + /* - * If the refcount is already 0 then ucma_close_id() has already - * destroyed the cm_id, otherwise holding the refcount keeps cm_id - * valid. Prevent queue_work() from being called. + * Destroy the underlying cm_id. New work queuing is prevented now by + * the removal from the xarray. Once the work is cancled ref will either + * be 0 because the work ran to completion and consumed the ref from the + * xarray, or it will be positive because we still have the ref from the + * xarray. This can also be 0 in cases where cm_id was never set */ - if (refcount_inc_not_zero(&ctx->ref)) { - rdma_lock_handler(ctx->cm_id); - ctx->destroying = 1; - rdma_unlock_handler(ctx->cm_id); - ucma_put_ctx(ctx); - } - cancel_work_sync(&ctx->close_work); - /* At this point it's guaranteed that there is no inflight closing task */ - if (ctx->cm_id) + if (refcount_read(&ctx->ref)) ucma_close_id(&ctx->close_work); - return ucma_free_ctx(ctx); + + events_reported = ucma_cleanup_ctx_events(ctx); + ucma_cleanup_multicast(ctx); + + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, + GFP_KERNEL) != NULL); + mutex_destroy(&ctx->mutex); + kfree(ctx); + return events_reported; } static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, @@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, xa_lock(&ctx_table); ctx = _ucma_find_context(cmd.id, file); - if (!IS_ERR(ctx)) - __xa_erase(&ctx_table, ctx->id); + if (!IS_ERR(ctx)) { + if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx) + ctx = ERR_PTR(-ENOENT); + } xa_unlock(&ctx_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); - resp.events_reported = __destroy_id(ctx); + resp.events_reported = ucma_destroy_private_ctx(ctx); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp) * prevented by this being a FD release function. The list_add_tail() in * ucma_connect_event_handler() can run concurrently, however it only * adds to the list *after* a listening ID. By only reading the first of - * the list, and relying on __destroy_id() to block + * the list, and relying on ucma_destroy_private_ctx() to block * ucma_connect_event_handler(), no additional locking is needed. */ while (!list_empty(&file->ctx_list)) { struct ucma_context *ctx = list_first_entry( &file->ctx_list, struct ucma_context, list); - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx); + ucma_destroy_private_ctx(ctx); } kfree(file); return 0; -- GitLab From ed1df58585632dff96cc01e14857175dfdf67376 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 31 Dec 2020 13:05:09 +0800 Subject: [PATCH 1476/1894] drm/amdgpu: switched to cached noretry setting for vangogh global noretry setting is cached to gmc.noretry Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index b72c8e4ca36bd..07104a1de3082 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -310,7 +310,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, -- GitLab From 9a029a3facc4d333100308a8e283d9210a36b94c Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 30 Dec 2020 10:27:42 +0800 Subject: [PATCH 1477/1894] drm/amdgpu: fix a memory protection fault when remove amdgpu device ASD and TA share the same firmware in SIENNA_CICHLID and only TA firmware is requested during boot, so only need release TA firmware when remove device. [ 83.877150] general protection fault, probably for non-canonical address 0x1269f97e6ed04095: 0000 [#1] SMP PTI [ 83.888076] CPU: 0 PID: 1312 Comm: modprobe Tainted: G W OE 5.9.0-rc5-deli-amd-vangogh-0.0.6.6-114-gdd99d5669a96-dirty #2 [ 83.901160] Hardware name: System manufacturer System Product Name/TUF Z370-PLUS GAMING II, BIOS 0411 09/21/2018 [ 83.912353] RIP: 0010:free_fw_priv+0xc/0x120 [ 83.917531] Code: e8 99 cd b0 ff b8 a1 ff ff ff eb 9f 4c 89 f7 e8 8a cd b0 ff b8 f4 ff ff ff eb 90 0f 1f 00 0f 1f 44 00 00 55 48 89 e5 41 54 53 <4c> 8b 67 18 48 89 fb 4c 89 e7 e8 45 94 41 00 b8 ff ff ff ff f0 0f [ 83.937576] RSP: 0018:ffffbc34c13a3ce0 EFLAGS: 00010206 [ 83.943699] RAX: ffffffffbb681850 RBX: ffffa047f117eb60 RCX: 0000000080800055 [ 83.951879] RDX: ffffbc34c1d5f000 RSI: 0000000080800055 RDI: 1269f97e6ed04095 [ 83.959955] RBP: ffffbc34c13a3cf0 R08: 0000000000000000 R09: 0000000000000001 [ 83.968107] R10: ffffbc34c13a3cc8 R11: 00000000ffffff00 R12: ffffa047d6b23378 [ 83.976166] R13: ffffa047d6b23338 R14: ffffa047d6b240c8 R15: 0000000000000000 [ 83.984295] FS: 00007f74f6712540(0000) GS:ffffa047fbe00000(0000) knlGS:0000000000000000 [ 83.993323] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 84.000056] CR2: 0000556a1cca4e18 CR3: 000000021faa8004 CR4: 00000000003706f0 [ 84.008128] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 84.016155] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 84.024174] Call Trace: [ 84.027514] release_firmware.part.11+0x4b/0x70 [ 84.033017] release_firmware+0x13/0x20 [ 84.037803] psp_sw_fini+0x77/0xb0 [amdgpu] [ 84.042857] amdgpu_device_fini+0x38c/0x5d0 [amdgpu] [ 84.048815] amdgpu_driver_unload_kms+0x43/0x70 [amdgpu] [ 84.055055] drm_dev_unregister+0x73/0xb0 [drm] [ 84.060499] drm_dev_unplug+0x28/0x30 [drm] [ 84.065598] amdgpu_dev_uninit+0x1b/0x40 [amdgpu] [ 84.071223] amdgpu_pci_remove+0x4e/0x70 [amdgpu] [ 84.076835] pci_device_remove+0x3e/0xc0 [ 84.081609] device_release_driver_internal+0xfb/0x1c0 [ 84.087558] driver_detach+0x4d/0xa0 [ 84.092041] bus_remove_driver+0x5f/0xe0 [ 84.096854] driver_unregister+0x2f/0x50 [ 84.101594] pci_unregister_driver+0x22/0xa0 [ 84.106806] amdgpu_exit+0x15/0x2b [amdgpu] Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 523d22db094b6..5d6fc369e32c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -563,7 +563,7 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || !psp->asd_fw) + if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size) return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); @@ -2589,11 +2589,10 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_fw_version = le32_to_cpu(desc->fw_version); psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); psp->asd_start_addr = ucode_start_addr; - psp->asd_fw = psp->ta_fw; break; case TA_FW_TYPE_PSP_XGMI: psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); -- GitLab From 88e21af1b3f887d217f2fb14fc7e7d3cd87ebf57 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 30 Dec 2020 19:45:15 +0800 Subject: [PATCH 1478/1894] drm/amdgpu: fix a GPU hang issue when remove device When GFXOFF is enabled and GPU is idle, driver will fail to access some registers. Therefore change to disable power gating before all access registers with MMIO. Dmesg log is as following: amdgpu 0000:03:00.0: amdgpu: amdgpu: finishing device. amdgpu: cp queue pipe 4 queue 0 preemption failed amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2 amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706 amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2 amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706 Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cb7d73f7317b..b69c34074d8d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2548,11 +2548,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); - amdgpu_amdkfd_device_fini(adev); - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_device_fini(adev); + /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) -- GitLab From 44cb39e19a05ca711bcb6e776e0a4399223204a0 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Wed, 30 Dec 2020 18:08:23 +0800 Subject: [PATCH 1479/1894] drm/amd/pm: fix the failure when change power profile for renoir This patch is to fix the failure when change power profile to "profile_peak" for renoir. Signed-off-by: Xiaojian Du Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index dc75db8af3715..f743685a20e8a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -188,6 +188,7 @@ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type return -EINVAL; *freq = clk_table->SocClocks[dpm_level].Freq; break; + case SMU_UCLK: case SMU_MCLK: if (dpm_level >= NUM_FCLK_DPM_LEVELS) return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 522d550046555..06abf2a7ce9e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -225,6 +225,7 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ break; case SMU_FCLK: case SMU_MCLK: + case SMU_UCLK: ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min, NULL); if (ret) return ret; -- GitLab From 98b64762080b96b0f8608da5fe161f1a7ab6f5de Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Tue, 29 Dec 2020 17:19:37 +0800 Subject: [PATCH 1480/1894] drm/amd/pm: improve the fine grain tuning function for RV/RV2/PCO This patch is to improve the fine grain tuning function for RV/RV2/PCO. The fine grain tuning function uses the sysfs node -- pp_od_clk_voltage to config gfxclk. Meanwhile, another sysfs node -- power_dpm_force_perfomance_level also affects the gfx clk. It will cause confusion when these two sysfs nodes works together. So this patch adds one flag to avoid this confusion, the flag will make these two sysfs nodes work separately. The flag is set as "disabled" by default, so the fine grain tuning function will be disabled by default. Only when power_dpm_force_perfomance_level is changed to "manual" mode, the flag will be set as "enabled", and the fine grain tuning function will be enabled. In other profile modes, including "auto", "high", "low", "profile_peak", "profile_standard", "profile_min_sclk", "profile_min_mclk", the flag will be set as "disabled", and the od range of fine grain tuning function will be restored default value. Signed-off-by: Xiaojian Du Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 58 ++++++++++++++++++- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h | 2 + 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 0cf899566e31c..88322781e447b 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -558,7 +558,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) /* enable the pp_od_clk_voltage sysfs file */ hwmgr->od_enabled = 1; - + /* disabled fine grain tuning function by default */ + data->fine_grain_enabled = 0; return result; } @@ -597,6 +598,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1; + uint32_t fine_grain_min_freq = 0, fine_grain_max_freq = 0; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -613,6 +615,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_max_freq_limit/100, @@ -648,6 +658,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -658,6 +676,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, min_mclk, @@ -668,6 +694,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, SMU10_UMD_PSTATE_GFXCLK, @@ -703,6 +737,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_AUTO: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -741,6 +783,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_LOW: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_min_freq_limit/100, @@ -759,6 +809,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_MANUAL: + data->fine_grain_enabled = 1; case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; @@ -1435,6 +1486,11 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, return -EINVAL; } + if (!smu10_data->fine_grain_enabled) { + pr_err("Fine grain not started\n"); + return -EINVAL; + } + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { if (size != 2) { pr_err("Input parameter number not correct\n"); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 28d86d354d50b..808e0ecbe1f09 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -300,6 +300,8 @@ struct smu10_hwmgr { bool need_min_deep_sleep_dcefclk; uint32_t deep_sleep_dcefclk; uint32_t num_active_display; + + bool fine_grain_enabled; }; struct pp_hwmgr; -- GitLab From 4f6a05501eb9c57fb4c9efed70840aee523a393b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 15:02:32 +0100 Subject: [PATCH 1481/1894] drm/amd/display: Fix unused variable warning Some of the newly added code is hidden inside of #ifdef blocks, but one variable is unused when debugfs is disabled: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8370:8: error: unused variable 'configure_crc' [-Werror,-Wunused-variable] Change the #ifdef to an if(IS_ENABLED()) check to fix the warning and avoid adding more #ifdefs. Fixes: c920888c604d ("drm/amd/display: Expose new CRC window property") Reviewed-by: Wayne Lin Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 519080e9a2338..3fb6baf9b0bae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8379,8 +8379,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); } -#ifdef CONFIG_DEBUG_FS - if (new_crtc_state->active && + if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active && amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture @@ -8401,7 +8400,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_crtc_configure_crc_source( crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); } -#endif } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 0235bfb246e5d..eba2f1d35d075 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source } /* amdgpu_dm_crc.c */ -#ifdef CONFIG_DEBUG_FS bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); +#ifdef CONFIG_DEBUG_FS int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, -- GitLab From e6d5c64efaa34aae3815a9afeb1314a976142e83 Mon Sep 17 00:00:00 2001 From: Jiawei Gu Date: Tue, 29 Dec 2020 20:35:33 +0800 Subject: [PATCH 1482/1894] drm/amdgpu: fix potential memory leak during navi12 deinitialization Navi12 HDCP & DTM deinitialization needs continue to free bo if already created though initialized flag is not set. Reviewed-by: Alex Deucher Signed-off-by: Jiawei Gu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 5d6fc369e32c1..347fec6694248 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1315,8 +1315,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->hdcp_context.hdcp_initialized) - return 0; + if (!psp->hdcp_context.hdcp_initialized) { + if (psp->hdcp_context.hdcp_shared_buf) + goto out; + else + return 0; + } ret = psp_hdcp_unload(psp); if (ret) @@ -1324,6 +1328,7 @@ static int psp_hdcp_terminate(struct psp_context *psp) psp->hdcp_context.hdcp_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, &psp->hdcp_context.hdcp_shared_mc_addr, @@ -1462,8 +1467,12 @@ static int psp_dtm_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->dtm_context.dtm_initialized) - return 0; + if (!psp->dtm_context.dtm_initialized) { + if (psp->dtm_context.dtm_shared_buf) + goto out; + else + return 0; + } ret = psp_dtm_unload(psp); if (ret) @@ -1471,6 +1480,7 @@ static int psp_dtm_terminate(struct psp_context *psp) psp->dtm_context.dtm_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, &psp->dtm_context.dtm_shared_mc_addr, -- GitLab From 8a82b347e8732fd2b68d26a6e9f0d9a1c397560d Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Tue, 5 Jan 2021 08:37:21 +0800 Subject: [PATCH 1483/1894] drm/amdgpu: fix no bad_pages issue after umc ue injection old code wrongly used the bad page status as the function return value, which cause amdgpu_ras_badpages_read always return failed. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c136bd4497446..82e952696d24f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1518,7 +1518,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; int i = 0; - int ret = 0; + int ret = 0, status; if (!con || !con->eh_data || !bps || !count) return -EINVAL; @@ -1543,12 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; - ret = amdgpu_vram_mgr_query_page_status( + status = amdgpu_vram_mgr_query_page_status( ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), data->bps[i].retired_page); - if (ret == -EBUSY) + if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (ret == -ENOENT) + else if (status == -ENOENT) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } -- GitLab From 3851c90b7aa8f0c275d14636f0e7ccca69a2bf84 Mon Sep 17 00:00:00 2001 From: John Clements Date: Tue, 5 Jan 2021 14:53:14 +0800 Subject: [PATCH 1484/1894] drm/amdgpu: enable ras eeprom support for sienna cichlid added I2C address and asic support flag Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 1dd040166c635..19d9aa76cfbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -30,6 +30,7 @@ #define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 +#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -62,7 +63,8 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { if ((adev->asic_type == CHIP_VEGA20) || - (adev->asic_type == CHIP_ARCTURUS)) + (adev->asic_type == CHIP_ARCTURUS) || + (adev->asic_type == CHIP_SIENNA_CICHLID)) return true; return false; @@ -100,6 +102,10 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, case CHIP_ARCTURUS: return __get_eeprom_i2c_addr_arct(adev, i2c_addr); + case CHIP_SIENNA_CICHLID: + *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID; + break; + default: return false; } -- GitLab From c241ed2f0ea549c18cff62a3708b43846b84dae3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 4 Jan 2021 11:24:20 -0500 Subject: [PATCH 1485/1894] drm/amdgpu/display: drop DCN support for aarch64 From Ard: "Simply disabling -mgeneral-regs-only left and right is risky, given that the standard AArch64 ABI permits the use of FP/SIMD registers anywhere, and GCC is known to use SIMD registers for spilling, and may invent other uses of the FP/SIMD register file that have nothing to do with the floating point code in question. Note that putting kernel_neon_begin() and kernel_neon_end() around the code that does use FP is not sufficient here, the problem is in all the other code that may be emitted with references to SIMD registers in it. So the only way to do this properly is to put all floating point code in a separate compilation unit, and only compile that unit with -mgeneral-regs-only." Disable support until the code can be properly refactored to support this properly on aarch64. Acked-by: Will Deacon Reported-by: Ard Biesheuvel Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 +- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 4 ---- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 21 ------------------- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 7 ------- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 7 ------- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 5 ----- .../gpu/drm/amd/display/dc/dcn301/Makefile | 4 ---- .../gpu/drm/amd/display/dc/dcn302/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dsc/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/os_types.h | 4 ---- 13 files changed, 1 insertion(+), 73 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 797b5d4b43e5e..e509a175ed175 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON)) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 64f515d744103..f3c00f479e1cb 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 calcs_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -calcs_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index d59b380e7b7fb..ff96bee57bfc5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -104,13 +104,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) @@ -125,13 +118,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) @@ -146,13 +132,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 733e6e6e43bd6..62ad1a11bff9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -31,11 +31,4 @@ DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) -# fix: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# aarch64 does not support soft-float, so use hard-float and handle this in code -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn10/dcn10_resource.o := -mgeneral-regs-only -endif - AMD_DISPLAY_FILES += $(AMD_DAL_DCN10) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index bdc37831535e4..36745193c391c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1534,15 +1534,8 @@ static bool dcn10_resource_construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_ARM64) - /* Aarch64 does not support -msoft-float/-mfloat-abi=soft */ - DC_FP_START(); - dcn10_resource_construct_fp(dc); - DC_FP_END(); -#else /* Other architectures we build for build this with soft-float */ dcn10_resource_construct_fp(dc); -#endif pool->base.pp_smu = dcn10_pp_smu_create(ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 624cb1341ef14..5fcaf78334ff9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -17,10 +17,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 1ee5fc03b7b3d..bb8c951410822 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -13,10 +13,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 248c2711aacef..c20331eb62e01 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -41,11 +41,6 @@ CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 2fd5d34e4ba6f..3ca7d911d25c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -21,10 +21,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index 36e44e1b07faf..8d4924b7dc221 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -20,10 +20,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index a02a33dcd70bd..6bb7f2905821b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 dml_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dml_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index f2624a1156e5c..8d31eb75c6a6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -10,10 +10,6 @@ ifdef CONFIG_PPC64 dsc_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dsc_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 95cb56929e79e..126c2f3a4dd3b 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -55,10 +55,6 @@ #include #define DC_FP_START() kernel_fpu_begin() #define DC_FP_END() kernel_fpu_end() -#elif defined(CONFIG_ARM64) -#include -#define DC_FP_START() kernel_neon_begin() -#define DC_FP_END() kernel_neon_end() #elif defined(CONFIG_PPC64) #include #include -- GitLab From 5efc1f4b454c6179d35e7b0c3eda0ad5763a00fc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 11:42:08 -0500 Subject: [PATCH 1486/1894] Revert "drm/amd/display: Fix memory leaks in S3 resume" This reverts commit a135a1b4c4db1f3b8cbed9676a40ede39feb3362. This leads to blank screens on some boards after replugging a display. Revert until we understand the root cause and can fix both the leak and the blank screen after replug. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211033 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1427 Cc: Stylon Wang Cc: Harry Wentland Cc: Nicholas Kazlauskas Cc: Andre Tomt Cc: Oleksandr Natalenko Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3fb6baf9b0bae..146486071d01d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2386,8 +2386,7 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); - aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid); - drm_connector_list_update(connector); + drm_add_edid_modes(connector, aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, -- GitLab From 67a5a68013056cbcf0a647e36cb6f4622fb6a470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Sat, 26 Dec 2020 13:21:58 -0500 Subject: [PATCH 1487/1894] gcc-plugins: fix gcc 11 indigestion with plugins... Fedora Rawhide has started including gcc 11,and the g++ compiler throws a wobbly when it hits scripts/gcc-plugins: HOSTCXX scripts/gcc-plugins/latent_entropy_plugin.so In file included from /usr/include/c++/11/type_traits:35, from /usr/lib/gcc/x86_64-redhat-linux/11/plugin/include/system.h:244, from /usr/lib/gcc/x86_64-redhat-linux/11/plugin/include/gcc-plugin.h:28, from scripts/gcc-plugins/gcc-common.h:7, from scripts/gcc-plugins/latent_entropy_plugin.c:78: /usr/include/c++/11/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support must be enabled with the -std=c++11 or -std=gnu++11 compiler options. 32 | #error This file requires compiler and library support \ In fact, it works just fine with c++11, which has been in gcc since 4.8, and we now require 4.9 as a minimum. Signed-off-by: Valdis Kletnieks Acked-by: Josh Poimboeuf Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/82487.1609006918@turing-police --- scripts/gcc-plugins/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index d66949bfeba45..b5487cce69e8e 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -22,9 +22,9 @@ always-y += $(GCC_PLUGIN) GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin) plugin_cxxflags = -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \ - -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++98 \ + -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \ -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \ - -ggdb -Wno-narrowing -Wno-unused-variable -Wno-c++11-compat \ + -ggdb -Wno-narrowing -Wno-unused-variable \ -Wno-format-diag plugin_ldflags = -shared -- GitLab From 6f02b540d7597f357bc6ee711346761045d4e108 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 6 Jan 2021 15:59:06 +0000 Subject: [PATCH 1488/1894] bpftool: Fix compilation failure for net.o with older glibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For older glibc ~2.17, #include'ing both linux/if.h and net/if.h fails due to complaints about redefinition of interface flags: CC net.o In file included from net.c:13:0: /usr/include/linux/if.h:71:2: error: redeclaration of enumerator ‘IFF_UP’ IFF_UP = 1<<0, /* sysfs */ ^ /usr/include/net/if.h:44:5: note: previous definition of ‘IFF_UP’ was here IFF_UP = 0x1, /* Interface is up. */ The issue was fixed in kernel headers in [1], but since compilation of net.c picks up system headers the problem can recur. Dropping #include resolves the issue and it is not needed for compilation anyhow. [1] https://lore.kernel.org/netdev/1461512707-23058-1-git-send-email-mikko.rapeli__34748.27880641$1462831734$gmane$org@iki.fi/ Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support") Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/1609948746-15369-1-git-send-email-alan.maguire@oracle.com --- tools/bpf/bpftool/net.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 3fae61ef63396..ff3aa0cf39978 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include -- GitLab From fcc42338375a1e67b8568dbb558f8b784d0f3b01 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 28 Dec 2020 07:14:07 +0000 Subject: [PATCH 1489/1894] dm snapshot: flush merged data before committing metadata If the origin device has a volatile write-back cache and the following events occur: 1: After finishing merge operation of one set of exceptions, merge_callback() is invoked. 2: Update the metadata in COW device tracking the merge completion. This update to COW device is flushed cleanly. 3: System crashes and the origin device's cache where the recent merge was completed has not been flushed. During the next cycle when we read the metadata from the COW device, we will skip reading those metadata whose merge was completed in step (1). This will lead to data loss/corruption. To address this, flush the origin device post merge IO before updating the metadata. Cc: stable@vger.kernel.org Signed-off-by: Akilesh Kailash Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4668b2cd98f4e..11890db71f3fe 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -141,6 +141,11 @@ struct dm_snapshot { * for them to be committed. */ struct bio_list bios_queued_during_merge; + + /* + * Flush data after merge. + */ + struct bio flush_bio; }; /* @@ -1121,6 +1126,17 @@ shut: static void error_bios(struct bio *bio); +static int flush_data(struct dm_snapshot *s) +{ + struct bio *flush_bio = &s->flush_bio; + + bio_reset(flush_bio); + bio_set_dev(flush_bio, s->origin->bdev); + flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + + return submit_bio_wait(flush_bio); +} + static void merge_callback(int read_err, unsigned long write_err, void *context) { struct dm_snapshot *s = context; @@ -1134,6 +1150,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) goto shut; } + if (flush_data(s) < 0) { + DMERR("Flush after merge failed: shutting down merge"); + goto shut; + } + if (s->store->type->commit_merge(s->store, s->num_merging_chunks) < 0) { DMERR("Write error in exception store: shutting down merge"); @@ -1318,6 +1339,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); + bio_init(&s->flush_bio, NULL, 0); /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -1504,6 +1526,8 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); + bio_uninit(&s->flush_bio); + dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); -- GitLab From 0d136f5cd9a7ba6ded7f8ff17e8b1ba680f37625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 5 Jan 2021 18:23:33 +0100 Subject: [PATCH 1490/1894] net: mvneta: fix error message when MTU too large for XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error message says that "Jumbo frames are not supported on XDP", but the code checks for mtu > MVNETA_MAX_RX_BUF_SIZE, not mtu > 1500. Fix this error message. Signed-off-by: Marek Behún Fixes: 0db51da7a8e9 ("net: mvneta: add basic XDP support") Cc: Lorenzo Bianconi Cc: Thomas Petazzoni Link: https://lore.kernel.org/r/20210105172333.21613-1-kabel@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 3369ec717a51c..bc4d8d1444019 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4432,7 +4432,7 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct bpf_prog *old_prog; if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { - NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP"); + NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); return -EOPNOTSUPP; } -- GitLab From 94bcfdbff0c210b17b27615f4952cc6ece7d5f5f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Jan 2021 11:07:25 -0800 Subject: [PATCH 1491/1894] net: bareudp: add missing error handling for bareudp_link_config() .dellink does not get called after .newlink fails, bareudp_newlink() must undo what bareudp_configure() has done if bareudp_link_config() fails. v2: call bareudp_dellink(), like bareudp_dev_create() does Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Link: https://lore.kernel.org/r/20210105190725.1736246-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 708171c0d6281..85de5f96c02bb 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -645,11 +645,20 @@ static int bareudp_link_config(struct net_device *dev, return 0; } +static void bareudp_dellink(struct net_device *dev, struct list_head *head) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + list_del(&bareudp->next); + unregister_netdevice_queue(dev, head); +} + static int bareudp_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bareudp_conf conf; + LIST_HEAD(list_kill); int err; err = bareudp2info(data, &conf, extack); @@ -662,17 +671,14 @@ static int bareudp_newlink(struct net *net, struct net_device *dev, err = bareudp_link_config(dev, tb); if (err) - return err; + goto err_unconfig; return 0; -} - -static void bareudp_dellink(struct net_device *dev, struct list_head *head) -{ - struct bareudp_dev *bareudp = netdev_priv(dev); - list_del(&bareudp->next); - unregister_netdevice_queue(dev, head); +err_unconfig: + bareudp_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return err; } static size_t bareudp_get_size(const struct net_device *dev) -- GitLab From 7f847db3040897f3ee25ce97265c545b5561f6c2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Jan 2021 18:18:15 -0800 Subject: [PATCH 1492/1894] net: dsa: fix led_classdev build errors Fix build errors when LEDS_CLASS=m and NET_DSA_HIRSCHMANN_HELLCREEK=y. This limits the latter to =m when LEDS_CLASS=m. microblaze-linux-ld: drivers/net/dsa/hirschmann/hellcreek_ptp.o: in function `hellcreek_ptp_setup': (.text+0xf80): undefined reference to `led_classdev_register_ext' microblaze-linux-ld: (.text+0xf94): undefined reference to `led_classdev_register_ext' microblaze-linux-ld: drivers/net/dsa/hirschmann/hellcreek_ptp.o: in function `hellcreek_ptp_free': (.text+0x1018): undefined reference to `led_classdev_unregister' microblaze-linux-ld: (.text+0x1024): undefined reference to `led_classdev_unregister' Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202101060655.iUvMJqS2-lkp@intel.com Cc: Kurt Kanzenbach Link: https://lore.kernel.org/r/20210106021815.31796-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/hirschmann/Kconfig b/drivers/net/dsa/hirschmann/Kconfig index 222dd35e2c9d8..e01191107a4ba 100644 --- a/drivers/net/dsa/hirschmann/Kconfig +++ b/drivers/net/dsa/hirschmann/Kconfig @@ -4,6 +4,7 @@ config NET_DSA_HIRSCHMANN_HELLCREEK depends on HAS_IOMEM depends on NET_DSA depends on PTP_1588_CLOCK + depends on LEDS_CLASS select NET_DSA_TAG_HELLCREEK help This driver adds support for Hirschmann Hellcreek TSN switches. -- GitLab From 1f685e6adbbe3c7b1bd9053be771b898d9efa655 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Jan 2021 20:25:31 -0800 Subject: [PATCH 1493/1894] ptp: ptp_ines: prevent build when HAS_IOMEM is not set ptp_ines.c uses devm_platform_ioremap_resource(), which is only built/available when CONFIG_HAS_IOMEM is enabled. CONFIG_HAS_IOMEM is not enabled for arch/s390/, so builds on S390 have a build error: s390-linux-ld: drivers/ptp/ptp_ines.o: in function `ines_ptp_ctrl_probe': ptp_ines.c:(.text+0x17e6): undefined reference to `devm_platform_ioremap_resource' Prevent builds of ptp_ines.c when HAS_IOMEM is not set. Fixes: bad1eaa6ac31 ("ptp: Add a driver for InES time stamping IP core.") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202101031125.ZEFCUiKi-lkp@intel.com Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20210106042531.1351-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/ptp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index d2bf05ccbbe20..f2edef0df40f5 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -79,6 +79,7 @@ config DP83640_PHY config PTP_1588_CLOCK_INES tristate "ZHAW InES PTP time stamping IP core" depends on NETWORK_PHY_TIMESTAMPING + depends on HAS_IOMEM depends on PHYLIB depends on PTP_1588_CLOCK help -- GitLab From c4aec381ab98c9189d47b935832541d520f1f67f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 15 Dec 2020 11:32:37 +0100 Subject: [PATCH 1494/1894] can: m_can: m_can_class_unregister(): remove erroneous m_can_clk_stop() In m_can_class_register() the clock is started, but stopped on exit. When calling m_can_class_unregister(), the clock is stopped a second time. This patch removes the erroneous m_can_clk_stop() in m_can_class_unregister(). Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework") Cc: Dan Murphy Cc: Sriram Dash Reviewed-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20201215103238.524029-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2c9f124012760..da551fd0f5026 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1852,8 +1852,6 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { unregister_candev(cdev->net); - - m_can_clk_stop(cdev); } EXPORT_SYMBOL_GPL(m_can_class_unregister); -- GitLab From aee2b3ccc8a63d1cd7da6a8a153d1f3712d40826 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 15 Dec 2020 11:32:38 +0100 Subject: [PATCH 1495/1894] can: tcan4x5x: fix bittiming const, use common bittiming from m_can driver According to the TCAN4550 datasheet "SLLSF91 - DECEMBER 2018" the tcan4x5x has the same bittiming constants as a m_can revision 3.2.x/3.3.0. The tcan4x5x chip I'm using identifies itself as m_can revision 3.2.1, so remove the tcan4x5x specific bittiming values and rely on the values in the m_can driver, which are selected according to core revision. Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel") Cc: Dan Murphy Reviewed-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20201215103238.524029-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index 24c737c4fc446..970f0e9d19bfd 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -131,30 +131,6 @@ static inline struct tcan4x5x_priv *cdev_to_priv(struct m_can_classdev *cdev) } -static struct can_bittiming_const tcan4x5x_bittiming_const = { - .name = DEVICE_NAME, - .tseg1_min = 2, - .tseg1_max = 31, - .tseg2_min = 2, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - -static struct can_bittiming_const tcan4x5x_data_bittiming_const = { - .name = DEVICE_NAME, - .tseg1_min = 1, - .tseg1_max = 32, - .tseg2_min = 1, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv) { int wake_state = 0; @@ -469,8 +445,6 @@ static int tcan4x5x_can_probe(struct spi_device *spi) mcan_class->dev = &spi->dev; mcan_class->ops = &tcan4x5x_ops; mcan_class->is_peripheral = true; - mcan_class->bit_timing = &tcan4x5x_bittiming_const; - mcan_class->data_timing = &tcan4x5x_data_bittiming_const; mcan_class->net->irq = spi->irq; spi_set_drvdata(spi, priv); -- GitLab From a876e7e2a8e62712425be178d483ffdff09f0853 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 06:54:54 -0800 Subject: [PATCH 1496/1894] HID: uclogic: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. h should no longer be used in the format specifier for printk. Signed-off-by: Tom Rix Signed-off-by: Jiri Kosina --- drivers/hid/hid-uclogic-params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index d26d8cd98efcf..56406cee401ff 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -90,7 +90,7 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev, goto cleanup; } else if (rc < 0) { hid_err(hdev, - "failed retrieving string descriptor #%hhu: %d\n", + "failed retrieving string descriptor #%u: %d\n", idx, rc); goto cleanup; } -- GitLab From 4d2b71634b5ad142617e430bc6ef659331a576d0 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 06:59:28 -0800 Subject: [PATCH 1497/1894] HID: wiimote: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. h should no longer be used in the format specifier for printk. Signed-off-by: Tom Rix Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-wiimote-core.c b/drivers/hid/hid-wiimote-core.c index 41012681cafd1..4399d6c6afef2 100644 --- a/drivers/hid/hid-wiimote-core.c +++ b/drivers/hid/hid-wiimote-core.c @@ -1482,7 +1482,7 @@ static void handler_return(struct wiimote_data *wdata, const __u8 *payload) wdata->state.cmd_err = err; wiimote_cmd_complete(wdata); } else if (err) { - hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err, + hid_warn(wdata->hdev, "Remote error %u on req %u\n", err, cmd); } } -- GitLab From 6086f02a18aeae795a61a3fc6566920891ea3b52 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 5 Jan 2021 22:41:37 +0100 Subject: [PATCH 1498/1894] can: mcp251xfd: mcp251xfd_handle_tefif(): fix TEF vs. TX race condition The mcp251xfd driver uses a TX FIFO for sending CAN frames and a TX Event FIFO (TEF) for completed TX-requests. The TEF event handling in the mcp251xfd_handle_tefif() function has a race condition. It first increments the tx-ring's tail counter to signal that there's room in the TX and TEF FIFO, then it increments the TEF FIFO in hardware. A running mcp251xfd_start_xmit() on a different CPU might not stop the txqueue (as the tx-ring still shows free space). The next mcp251xfd_start_xmit() will push a message into the chip and the TX complete event might overflow the TEF FIFO. This patch changes the order to fix the problem. Fixes: 68c0c1c7f966 ("can: mcp251xfd: tef-path: reduce number of SPI core requests to set UINC bit") Link: https://lore.kernel.org/r/20210105214138.3150886-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 77129d5f410b6..85a1a8b7c0e7a 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1368,13 +1368,10 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) struct mcp251xfd_tx_ring *tx_ring = priv->tx; struct spi_transfer *last_xfer; - tx_ring->tail += len; - /* Increment the TEF FIFO tail pointer 'len' times in * a single SPI message. - */ - - /* Note: + * + * Note: * * "cs_change == 1" on the last transfer results in an * active chip select after the complete SPI @@ -1391,6 +1388,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) if (err) return err; + tx_ring->tail += len; + err = mcp251xfd_check_tef_tail(priv); if (err) return err; -- GitLab From 2fbb397f584077e3c90abd06829f5a1f66fdd5f4 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 5 Jan 2021 22:41:38 +0100 Subject: [PATCH 1499/1894] can: mcp251xfd: mcp251xfd_handle_rxif_ring(): first increment RX tail pointer in HW, then in driver The previous patch fixes a TEF vs. TX race condition, by first updating the TEF tail pointer in hardware, and then updating the driver internal pointer. The same pattern exists in the RX-path, too. This should be no problem, as the driver accesses the RX-FIFO from the interrupt handler only, thus the access is properly serialized. Fix the order here, too, so that the TEF- and RX-path look similar. Fixes: 1f652bb6bae7 ("can: mcp25xxfd: rx-path: reduce number of SPI core requests to set UINC bit") Link: https://lore.kernel.org/r/20210105214138.3150886-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 85a1a8b7c0e7a..36235afb0bc66 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1552,10 +1552,8 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, /* Increment the RX FIFO tail pointer 'len' times in a * single SPI message. - */ - ring->tail += len; - - /* Note: + * + * Note: * * "cs_change == 1" on the last transfer results in an * active chip select after the complete SPI @@ -1571,6 +1569,8 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, last_xfer->cs_change = 1; if (err) return err; + + ring->tail += len; } return 0; -- GitLab From 1169ec8f5d71044082a9898bbd1f1bf4a690c5a4 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 4 Jan 2021 09:03:27 +0000 Subject: [PATCH 1500/1894] can: rcar: Kconfig: update help description for CAN_RCAR config The rcar_can driver also supports RZ/G SoC's, update the description to reflect this. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210104090327.6547-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar/Kconfig b/drivers/net/can/rcar/Kconfig index 8d36101b78e3e..29cabc20109e5 100644 --- a/drivers/net/can/rcar/Kconfig +++ b/drivers/net/can/rcar/Kconfig @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 config CAN_RCAR - tristate "Renesas R-Car CAN controller" + tristate "Renesas R-Car and RZ/G CAN controller" depends on ARCH_RENESAS || ARM help Say Y here if you want to use CAN controller found on Renesas R-Car - SoCs. + or RZ/G SoCs. To compile this driver as a module, choose M here: the module will be called rcar_can. -- GitLab From 6ee49118f87cf02b36f68812bc49855b7b627a2b Mon Sep 17 00:00:00 2001 From: Sriram Dash Date: Mon, 4 Jan 2021 18:01:34 +0530 Subject: [PATCH 1501/1894] MAINTAINERS: Update MCAN MMIO device driver maintainer Update Pankaj Sharma as maintainer for mcan mmio device driver as I will be moving to a different role. Signed-off-by: Sriram Dash Acked-by: Pankaj Sharma Link: https://lore.kernel.org/r/20210104123134.16930-1-sriram.dash@samsung.com Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7c1e45c416b17..b15514a770e3c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10847,7 +10847,7 @@ F: drivers/media/radio/radio-maxiradio* MCAN MMIO DEVICE DRIVER M: Dan Murphy -M: Sriram Dash +M: Pankaj Sharma L: linux-can@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/can/bosch,m_can.yaml -- GitLab From 91bc156817a3c2007332b64b4f85c32aafbbbea6 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Wed, 6 Jan 2021 21:05:46 +0800 Subject: [PATCH 1502/1894] ALSA: hda/realtek: fix right sounds and mute/micmute LEDs for HP machines * The HP ZBook Fury 15/17 G7 Mobile Workstation are using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. * The right channel speaker is no sound and it needs to expose GPIO1 for initialing AMP. Add quirks to support them. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210106130549.100532-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3c1d2a3fb1a4f..dd82ff2bd5d65 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7970,6 +7970,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8780, "HP ZBook Fury 17 G7 Mobile Workstation", + ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", + ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), -- GitLab From 83b5bd628f65e6b4d1924b307d6a88a57827bdb0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 4 Jan 2021 12:36:14 +0000 Subject: [PATCH 1503/1894] arm64: Move PSTATE.TCO setting to separate functions For consistency with __uaccess_{disable,enable}_hw_pan(), move the PSTATE.TCO setting into dedicated __uaccess_{disable,enable}_tco() functions. Signed-off-by: Catalin Marinas Acked-by: Vincenzo Frascino Acked-by: Mark Rutland --- arch/arm64/include/asm/uaccess.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6f986e09a7815..f0fe0cc6abe0b 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -176,10 +176,21 @@ static inline void __uaccess_enable_hw_pan(void) * The Tag check override (TCO) bit disables temporarily the tag checking * preventing the issue. */ -static inline void uaccess_disable_privileged(void) +static inline void __uaccess_disable_tco(void) { asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0), ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +static inline void __uaccess_enable_tco(void) +{ + asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), + ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +static inline void uaccess_disable_privileged(void) +{ + __uaccess_disable_tco(); if (uaccess_ttbr0_disable()) return; @@ -189,8 +200,7 @@ static inline void uaccess_disable_privileged(void) static inline void uaccess_enable_privileged(void) { - asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), - ARM64_MTE, CONFIG_KASAN_HW_TAGS)); + __uaccess_enable_tco(); if (uaccess_ttbr0_enable()) return; -- GitLab From 05cd84691eafcd7959a1e120d5e72c0dd98c5d91 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 5 Jan 2021 20:06:39 +0530 Subject: [PATCH 1504/1894] dmabuf: fix use-after-free of dmabuf's file->f_inode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is observed 'use-after-free' on the dmabuf's file->f_inode with the race between closing the dmabuf file and reading the dmabuf's debug info. Consider the below scenario where P1 is closing the dma_buf file and P2 is reading the dma_buf's debug info in the system: P1 P2 dma_buf_debug_show() dma_buf_put() __fput() file->f_op->release() dput() .... dentry_unlink_inode() iput(dentry->d_inode) (where the inode is freed) mutex_lock(&db_list.lock) read 'dma_buf->file->f_inode' (the same inode is freed by P1) mutex_unlock(&db_list.lock) dentry->d_op->d_release()--> dma_buf_release() ..... mutex_lock(&db_list.lock) removes the dmabuf from the list mutex_unlock(&db_list.lock) In the above scenario, when dma_buf_put() is called on a dma_buf, it first frees the dma_buf's file->f_inode(=dentry->d_inode) and then removes this dma_buf from the system db_list. In between P2 traversing the db_list tries to access this dma_buf's file->f_inode that was freed by P1 which is a use-after-free case. Since, __fput() calls f_op->release first and then later calls the d_op->d_release, move the dma_buf's db_list removal from d_release() to f_op->release(). This ensures that dma_buf's file->f_inode is not accessed after it is released. Cc: # 5.4.x- Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops") Acked-by: Christian König Signed-off-by: Charan Teja Reddy Signed-off-by: Sumit Semwal Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/1609857399-31549-1-git-send-email-charante@codeaurora.org --- drivers/dma-buf/dma-buf.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index e63684d4cd904..9ad6397aaa97e 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -76,10 +76,6 @@ static void dma_buf_release(struct dentry *dentry) dmabuf->ops->release(dmabuf); - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); - if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) dma_resv_fini(dmabuf->resv); @@ -88,6 +84,22 @@ static void dma_buf_release(struct dentry *dentry) kfree(dmabuf); } +static int dma_buf_file_release(struct inode *inode, struct file *file) +{ + struct dma_buf *dmabuf; + + if (!is_dma_buf_file(file)) + return -EINVAL; + + dmabuf = file->private_data; + + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + + return 0; +} + static const struct dentry_operations dma_buf_dentry_ops = { .d_dname = dmabuffs_dname, .d_release = dma_buf_release, @@ -413,6 +425,7 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file) } static const struct file_operations dma_buf_fops = { + .release = dma_buf_file_release, .mmap = dma_buf_mmap_internal, .llseek = dma_buf_llseek, .poll = dma_buf_poll, -- GitLab From e89eed02a5f1b864fa5abafc8e8e71bd9fd66d1f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 5 Jan 2021 20:53:42 +0100 Subject: [PATCH 1505/1894] kcov, usb: hide in_serving_softirq checks in __usb_hcd_giveback_urb Done opencode in_serving_softirq() checks in in_serving_softirq() to avoid cluttering the code, hide them in kcov helpers instead. Fixes: aee9ddb1d371 ("kcov, usb: only collect coverage from __usb_hcd_giveback_urb in softirq") Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/aeb430c5bb90b0ccdf1ec302c70831c1a47b9c45.1609876340.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 +++----- include/linux/kcov.h | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 60886a7464c3a..ad5a0f405a75c 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1649,14 +1649,12 @@ static void __usb_hcd_giveback_urb(struct urb *urb) urb->status = status; /* * This function can be called in task context inside another remote - * coverage collection section, but KCOV doesn't support that kind of + * coverage collection section, but kcov doesn't support that kind of * recursion yet. Only collect coverage in softirq context for now. */ - if (in_serving_softirq()) - kcov_remote_start_usb((u64)urb->dev->bus->busnum); + kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - if (in_serving_softirq()) - kcov_remote_stop(); + kcov_remote_stop_softirq(); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index a10e84707d820..4e3037dc12048 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -52,6 +52,25 @@ static inline void kcov_remote_start_usb(u64 id) kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); } +/* + * The softirq flavor of kcov_remote_*() functions is introduced as a temporary + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding suport for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 + */ + +static inline void kcov_remote_start_usb_softirq(u64 id) +{ + if (in_serving_softirq()) + kcov_remote_start_usb(id); +} + +static inline void kcov_remote_stop_softirq(void) +{ + if (in_serving_softirq()) + kcov_remote_stop(); +} + #else static inline void kcov_task_init(struct task_struct *t) {} @@ -66,6 +85,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- GitLab From e2459108b5a0604c4b472cae2b3cb8d3444c77fb Mon Sep 17 00:00:00 2001 From: "taehyun.cho" Date: Thu, 7 Jan 2021 00:46:25 +0900 Subject: [PATCH 1506/1894] usb: gadget: enable super speed plus Enable Super speed plus in configfs to support USB3.1 Gen2. This ensures that when a USB gadget is plugged in, it is enumerated as Gen 2 and connected at 10 Gbps if the host and cable are capable of it. Many in-tree gadget functions (fs, midi, acm, ncm, mass_storage, etc.) already have SuperSpeed Plus support. Tested: plugged gadget into Linux host and saw: [284907.385986] usb 8-2: new SuperSpeedPlus Gen 2 USB device number 3 using xhci_hcd Tested-by: Lorenzo Colitti Acked-by: Felipe Balbi Signed-off-by: taehyun.cho Signed-off-by: Lorenzo Colitti Link: https://lore.kernel.org/r/20210106154625.2801030-1-lorenzo@google.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 408a5332a975b..36ffb43f9c1a0 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1543,7 +1543,7 @@ static const struct usb_gadget_driver configfs_driver_template = { .suspend = configfs_composite_suspend, .resume = configfs_composite_resume, - .max_speed = USB_SPEED_SUPER, + .max_speed = USB_SPEED_SUPER_PLUS, .driver = { .owner = THIS_MODULE, .name = "configfs-gadget", @@ -1583,7 +1583,7 @@ static struct config_group *gadgets_make( gi->composite.unbind = configfs_do_nothing; gi->composite.suspend = NULL; gi->composite.resume = NULL; - gi->composite.max_speed = USB_SPEED_SUPER; + gi->composite.max_speed = USB_SPEED_SUPER_PLUS; spin_lock_init(&gi->spinlock); mutex_init(&gi->lock); -- GitLab From 41952a66015466c3208aac96b14ffd92e0943589 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 6 Jan 2021 00:16:05 +0000 Subject: [PATCH 1507/1894] usb: typec: Fix copy paste error for NVIDIA alt-mode description The name of the module for the NVIDIA alt-mode is incorrect as it looks to be a copy-paste error from the entry above, update it to the correct typec_nvidia module name. Cc: Ajay Gupta Cc: Heikki Krogerus Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20210106001605.167917-1-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/altmodes/Kconfig b/drivers/usb/typec/altmodes/Kconfig index 187690fd1a5bd..60d375e9c3c7c 100644 --- a/drivers/usb/typec/altmodes/Kconfig +++ b/drivers/usb/typec/altmodes/Kconfig @@ -20,6 +20,6 @@ config TYPEC_NVIDIA_ALTMODE to enable support for VirtualLink devices with NVIDIA GPUs. To compile this driver as a module, choose M here: the - module will be called typec_displayport. + module will be called typec_nvidia. endmenu -- GitLab From 6c75c2bad36cfb43b144e6a0a76a69993c72097f Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 6 Jan 2021 19:49:04 -0800 Subject: [PATCH 1508/1894] usb: typec: Send uevent for num_altmodes update Generate a change uevent when the "number_of_alternate_modes" sysfs file for partners and plugs is updated by a port driver. Cc: Heikki Krogerus Cc: Benson Leung Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20210107034904.4112029-1-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index ebfd3113a9a80..8f77669f9cf4f 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -766,6 +766,7 @@ int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmod return ret; sysfs_notify(&partner->dev.kobj, NULL, "number_of_alternate_modes"); + kobject_uevent(&partner->dev.kobj, KOBJ_CHANGE); return 0; } @@ -923,6 +924,7 @@ int typec_plug_set_num_altmodes(struct typec_plug *plug, int num_altmodes) return ret; sysfs_notify(&plug->dev.kobj, NULL, "number_of_alternate_modes"); + kobject_uevent(&plug->dev.kobj, KOBJ_CHANGE); return 0; } -- GitLab From a5c7682aaaa10e42928d73de1c9e1e02d2b14c2e Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 4 Jan 2021 22:42:39 -0800 Subject: [PATCH 1509/1894] usb: dwc3: gadget: Clear wait flag on dequeue If an active transfer is dequeued, then the endpoint is freed to start a new transfer. Make sure to clear the endpoint's transfer wait flag for this case. Fixes: e0d19563eb6c ("usb: dwc3: gadget: Wait for transfer completion") Cc: stable@vger.kernel.org Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/b81cd5b5281cfbfdadb002c4bcf5c9be7c017cfd.1609828485.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 25f654b79e480..ee44321fee386 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1763,6 +1763,8 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, list_for_each_entry_safe(r, t, &dep->started_list, list) dwc3_gadget_move_cancelled_request(r); + dep->flags &= ~DWC3_EP_WAIT_TRANSFER_COMPLETE; + goto out; } } -- GitLab From e0658f970a7f3d85431c6803b7d5169444fb11b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 5 Jan 2021 18:55:47 +0100 Subject: [PATCH 1510/1894] drm/radeon: stop re-init the TTM page pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers are not supposed to init the page pool directly any more. Signed-off-by: Christian König Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/412153/ --- drivers/gpu/drm/radeon/radeon_ttm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d59ef6e92a402..23195d5d4e919 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -730,9 +730,6 @@ int radeon_ttm_init(struct radeon_device *rdev) } rdev->mman.initialized = true; - ttm_pool_init(&rdev->mman.bdev.pool, rdev->dev, rdev->need_swiotlb, - dma_addressing_limited(&rdev->pdev->dev)); - r = radeon_ttm_init_vram(rdev); if (r) { DRM_ERROR("Failed initializing VRAM heap.\n"); -- GitLab From a73858ef4d5e1d425e171f0f6a52864176a6a979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 5 Jan 2021 18:56:56 +0100 Subject: [PATCH 1511/1894] drm/ttm: unexport ttm_pool_init/fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers are not supposed to use this directly any more. Signed-off-by: Christian König Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/412156/ --- drivers/gpu/drm/ttm/ttm_pool.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 7b2f60616750d..a00b7ab9c14cf 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -507,7 +507,6 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } -EXPORT_SYMBOL(ttm_pool_init); /** * ttm_pool_fini - Cleanup a pool @@ -525,7 +524,6 @@ void ttm_pool_fini(struct ttm_pool *pool) for (j = 0; j < MAX_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } -EXPORT_SYMBOL(ttm_pool_fini); #ifdef CONFIG_DEBUG_FS /* Count the number of pages available in a pool_type */ -- GitLab From 1efd17e7acb6692bffc6c58718f41f27fdfd62f5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:19 +0800 Subject: [PATCH 1512/1894] iommu/vt-d: Fix misuse of ALIGN in qi_flush_piotlb() Use IS_ALIGNED() instead. Otherwise, an unaligned address will be ignored. Fixes: 33cd6e642d6a ("iommu/vt-d: Flush PASID-based iotlb for iova over first level") Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-1-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/dmar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b46dbfa6d0ed6..004feaed3c72c 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1461,8 +1461,8 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, int mask = ilog2(__roundup_pow_of_two(npages)); unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - if (WARN_ON_ONCE(!ALIGN(addr, align))) - addr &= ~(align - 1); + if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) + addr = ALIGN_DOWN(addr, align); desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | -- GitLab From 4df7b2268ad81a74168130e1fb04550a8bc980e1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:22 +0800 Subject: [PATCH 1513/1894] Revert "iommu: Add quirk for Intel graphic devices in map_sg" This reverts commit 65f746e8285f0a67d43517d86fedb9e29ead49f2. As commit 8a473dbadccf ("drm/i915: Fix DMA mapped scatterlist walks") and commit 934941ed5a30 ("drm/i915: Fix DMA mapped scatterlist lookup") fixed the DMA scatterlist limitations in the i915 driver, remove this temporary workaround. Cc: Tvrtko Ursulin Cc: Tom Murphy Cc: Logan Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/dma-iommu.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f0305e6aac1b8..4078358ed66ea 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -863,33 +863,6 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); int i, count = 0; - /* - * The Intel graphic driver is used to assume that the returned - * sg list is not combound. This blocks the efforts of converting - * Intel IOMMU driver to dma-iommu api's. Add this quirk to make the - * device driver work and should be removed once it's fixed in i915 - * driver. - */ - if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) && - to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL && - (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - for_each_sg(sg, s, nents, i) { - unsigned int s_iova_off = sg_dma_address(s); - unsigned int s_length = sg_dma_len(s); - unsigned int s_iova_len = s->length; - - s->offset += s_iova_off; - s->length = s_length; - sg_dma_address(s) = dma_addr + s_iova_off; - sg_dma_len(s) = s_length; - dma_addr += s_iova_len; - - pr_info_once("sg combining disabled due to i915 driver\n"); - } - - return nents; - } - for_each_sg(sg, s, nents, i) { /* Restore this segment's original unaligned fields first */ unsigned int s_iova_off = sg_dma_address(s); -- GitLab From 420d42f6f9db27d88bc4f83e3e668fcdacbf7e29 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:23 +0800 Subject: [PATCH 1514/1894] iommu/vt-d: Fix lockdep splat in sva bind()/unbind() Lock(&iommu->lock) without disabling irq causes lockdep warnings. ======================================================== WARNING: possible irq lock inversion dependency detected 5.11.0-rc1+ #828 Not tainted -------------------------------------------------------- kworker/0:1H/120 just changed the state of lock: ffffffffad9ea1b8 (device_domain_lock){..-.}-{2:2}, at: iommu_flush_dev_iotlb.part.0+0x32/0x120 but this lock took another, SOFTIRQ-unsafe lock in the past: (&iommu->lock){+.+.}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&iommu->lock); local_irq_disable(); lock(device_domain_lock); lock(&iommu->lock); lock(device_domain_lock); *** DEADLOCK *** Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4fa248b98031c..9bcedd360235d 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -281,6 +281,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct dmar_domain *dmar_domain; struct device_domain_info *info; struct intel_svm *svm = NULL; + unsigned long iflags; int ret = 0; if (WARN_ON(!iommu) || !data) @@ -381,12 +382,12 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, * each bind of a new device even with an existing PASID, we need to * call the nested mode setup function here. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_nested(iommu, dev, (pgd_t *)(uintptr_t)data->gpgd, data->hpasid, &data->vendor.vtd, dmar_domain, data->addr_width); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", data->hpasid, ret); @@ -486,6 +487,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; + unsigned long iflags; int pasid_max; int ret; @@ -605,14 +607,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); @@ -632,14 +634,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, * Binding a new device with existing PASID, need to setup * the PASID entry. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { kfree(sdev); goto out; -- GitLab From aded8c7c2b72f846a07a2c736b8e75bb8cf50a87 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 5 Jan 2021 16:50:38 -0800 Subject: [PATCH 1515/1894] iommu/arm-smmu-qcom: Initialize SCTLR of the bypass context On SM8150 it's occasionally observed that the boot hangs in between the writing of SMEs and context banks in arm_smmu_device_reset(). The problem seems to coincide with a display refresh happening after updating the stream mapping, but before clearing - and there by disabling translation - the context bank picked to emulate translation bypass. Resolve this by explicitly disabling the bypass context already in cfg_probe. Fixes: f9081b8ff593 ("iommu/arm-smmu-qcom: Implement S2CR quirk") Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210106005038.4152731-1-bjorn.andersson@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 5dff7ffbef119..1b83d140742f3 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -196,6 +196,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) set_bit(qsmmu->bypass_cbndx, smmu->context_map); + arm_smmu_cb_write(smmu, qsmmu->bypass_cbndx, ARM_SMMU_CB_SCTLR, 0); + reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS); arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg); } -- GitLab From 9ad9f45b3b91162b33abfe175ae75ab65718dbf5 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:55 +0800 Subject: [PATCH 1516/1894] iommu/vt-d: Move intel_iommu info from struct intel_svm to struct intel_svm_dev 'struct intel_svm' is shared by all devices bound to a give process, but records only a single pointer to a 'struct intel_iommu'. Consequently, cache invalidations may only be applied to a single DMAR unit, and are erroneously skipped for the other devices. In preparation for fixing this, rework the structures so that the iommu pointer resides in 'struct intel_svm_dev', allowing 'struct intel_svm' to track them in its device list. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Cc: Lu Baolu Cc: Jacob Pan Cc: Raj Ashok Cc: David Woodhouse Reported-by: Guo Kaijie Reported-by: Xin Zeng Signed-off-by: Guo Kaijie Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Tested-by: Guo Kaijie Cc: stable@vger.kernel.org # v5.0+ Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-2-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 9 +++++---- include/linux/intel-iommu.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9bcedd360235d..790ef3497e7e3 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -142,7 +142,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); if (sdev->dev_iotlb) { desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | @@ -166,7 +166,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); } } @@ -211,7 +211,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_pasid_tear_down_entry(svm->iommu, sdev->dev, + intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, svm->pasid, true); rcu_read_unlock(); @@ -364,6 +364,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, } sdev->dev = dev; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->iommu = iommu; /* Only count users if device has aux domains */ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) @@ -548,6 +549,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, goto out; } sdev->dev = dev; + sdev->iommu = iommu; ret = intel_iommu_enable_pasid(iommu, dev); if (ret) { @@ -577,7 +579,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, kfree(sdev); goto out; } - svm->iommu = iommu; if (pasid_max > intel_pasid_max_id) pasid_max = intel_pasid_max_id; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d956987ed032d..94522685a0d94 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -758,6 +758,7 @@ struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; + struct intel_iommu *iommu; struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; @@ -771,7 +772,6 @@ struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; - struct intel_iommu *iommu; unsigned int flags; u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ -- GitLab From 18abda7a2d555783d28ea1701f3ec95e96237a86 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:56 +0800 Subject: [PATCH 1517/1894] iommu/vt-d: Fix general protection fault in aux_detach_device() The aux-domain attach/detach are not tracked, some data structures might be used after free. This causes general protection faults when multiple subdevices are created and assigned to a same guest machine: | general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] SMP NOPTI | RIP: 0010:intel_iommu_aux_detach_device+0x12a/0x1f0 | [...] | Call Trace: | iommu_aux_detach_device+0x24/0x70 | vfio_mdev_detach_domain+0x3b/0x60 | ? vfio_mdev_set_domain+0x50/0x50 | iommu_group_for_each_dev+0x4f/0x80 | vfio_iommu_detach_group.isra.0+0x22/0x30 | vfio_iommu_type1_detach_group.cold+0x71/0x211 | ? find_exported_symbol_in_section+0x4a/0xd0 | ? each_symbol_section+0x28/0x50 | __vfio_group_unset_container+0x4d/0x150 | vfio_group_try_dissolve_container+0x25/0x30 | vfio_group_put_external_user+0x13/0x20 | kvm_vfio_group_put_external_user+0x27/0x40 [kvm] | kvm_vfio_destroy+0x45/0xb0 [kvm] | kvm_put_kvm+0x1bb/0x2e0 [kvm] | kvm_vm_release+0x22/0x30 [kvm] | __fput+0xcc/0x260 | ____fput+0xe/0x10 | task_work_run+0x8f/0xb0 | do_exit+0x358/0xaf0 | ? wake_up_state+0x10/0x20 | ? signal_wake_up_state+0x1a/0x30 | do_group_exit+0x47/0xb0 | __x64_sys_exit_group+0x18/0x20 | do_syscall_64+0x57/0x1d0 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix the crash by tracking the subdevices when attaching and detaching aux-domains. Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach") Co-developed-by: Xin Zeng Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-3-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 95 +++++++++++++++++++++++++++---------- include/linux/intel-iommu.h | 16 +++++-- 2 files changed, 82 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 788119c5b021b..d7720a8362682 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1877,6 +1877,7 @@ static struct dmar_domain *alloc_domain(int flags) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->subdevices); return domain; } @@ -2547,7 +2548,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->iommu = iommu; info->pasid_table = NULL; info->auxd_enabled = 0; - INIT_LIST_HEAD(&info->auxiliary_domains); + INIT_LIST_HEAD(&info->subdevices); if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -4475,33 +4476,61 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) domain->type == IOMMU_DOMAIN_UNMANAGED; } -static void auxiliary_link_device(struct dmar_domain *domain, - struct device *dev) +static inline struct subdev_domain_info * +lookup_subdev_info(struct dmar_domain *domain, struct device *dev) +{ + struct subdev_domain_info *sinfo; + + if (!list_empty(&domain->subdevices)) { + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + if (sinfo->pdev == dev) + return sinfo; + } + } + + return NULL; +} + +static int auxiliary_link_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = get_domain_info(dev); + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) - return; + return -EINVAL; + + if (!sinfo) { + sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); + sinfo->domain = domain; + sinfo->pdev = dev; + list_add(&sinfo->link_phys, &info->subdevices); + list_add(&sinfo->link_domain, &domain->subdevices); + } - domain->auxd_refcnt++; - list_add(&domain->auxd, &info->auxiliary_domains); + return ++sinfo->users; } -static void auxiliary_unlink_device(struct dmar_domain *domain, - struct device *dev) +static int auxiliary_unlink_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = get_domain_info(dev); + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); + int ret; assert_spin_locked(&device_domain_lock); - if (WARN_ON(!info)) - return; + if (WARN_ON(!info || !sinfo || sinfo->users <= 0)) + return -EINVAL; - list_del(&domain->auxd); - domain->auxd_refcnt--; + ret = --sinfo->users; + if (!ret) { + list_del(&sinfo->link_phys); + list_del(&sinfo->link_domain); + kfree(sinfo); + } - if (!domain->auxd_refcnt && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + return ret; } static int aux_domain_add_dev(struct dmar_domain *domain, @@ -4530,6 +4559,19 @@ static int aux_domain_add_dev(struct dmar_domain *domain, } spin_lock_irqsave(&device_domain_lock, flags); + ret = auxiliary_link_device(domain, dev); + if (ret <= 0) + goto link_failed; + + /* + * Subdevices from the same physical device can be attached to the + * same domain. For such cases, only the first subdevice attachment + * needs to go through the full steps in this function. So if ret > + * 1, just goto out. + */ + if (ret > 1) + goto out; + /* * iommu->lock must be held to attach domain to iommu and setup the * pasid entry for second level translation. @@ -4548,10 +4590,9 @@ static int aux_domain_add_dev(struct dmar_domain *domain, domain->default_pasid); if (ret) goto table_failed; - spin_unlock(&iommu->lock); - - auxiliary_link_device(domain, dev); + spin_unlock(&iommu->lock); +out: spin_unlock_irqrestore(&device_domain_lock, flags); return 0; @@ -4560,8 +4601,10 @@ table_failed: domain_detach_iommu(domain, iommu); attach_failed: spin_unlock(&iommu->lock); + auxiliary_unlink_device(domain, dev); +link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); - if (!domain->auxd_refcnt && domain->default_pasid > 0) + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) ioasid_put(domain->default_pasid); return ret; @@ -4581,14 +4624,18 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, info = get_domain_info(dev); iommu = info->iommu; - auxiliary_unlink_device(domain, dev); - - spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); - domain_detach_iommu(domain, iommu); - spin_unlock(&iommu->lock); + if (!auxiliary_unlink_device(domain, dev)) { + spin_lock(&iommu->lock); + intel_pasid_tear_down_entry(iommu, dev, + domain->default_pasid, false); + domain_detach_iommu(domain, iommu); + spin_unlock(&iommu->lock); + } spin_unlock_irqrestore(&device_domain_lock, flags); + + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) + ioasid_put(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 94522685a0d94..09c6a0bf38928 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -533,11 +533,10 @@ struct dmar_domain { /* Domain ids per IOMMU. Use u16 since * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ - unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ bool has_iotlb_device; struct list_head devices; /* all devices' list */ - struct list_head auxd; /* link to device's auxiliary list */ + struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -610,14 +609,21 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; }; +/* Per subdevice private data */ +struct subdev_domain_info { + struct list_head link_phys; /* link to phys device siblings */ + struct list_head link_domain; /* link to domain siblings */ + struct device *pdev; /* physical device derived from */ + struct dmar_domain *domain; /* aux-domain */ + int users; /* user count */ +}; + /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head auxiliary_domains; /* auxiliary domains - * attached to this device - */ + struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ -- GitLab From 7c29ada5e70083805bc3a68daa23441df421fbee Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:57 +0800 Subject: [PATCH 1518/1894] iommu/vt-d: Fix ineffective devTLB invalidation for subdevices iommu_flush_dev_iotlb() is called to invalidate caches on a device but only loops over the devices which are fully-attached to the domain. For sub-devices, this is ineffective and can result in invalid caching entries left on the device. Fix the missing invalidation by adding a loop over the subdevices and ensuring that 'domain->has_iotlb_device' is updated when attaching to subdevices. Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach") Signed-off-by: Liu Yi L Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-4-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 53 ++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d7720a8362682..65cf06d70bf4e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -719,6 +719,8 @@ static int domain_update_device_node(struct dmar_domain *domain) return nid; } +static void domain_update_iotlb(struct dmar_domain *domain); + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { @@ -744,6 +746,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); else domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); + + domain_update_iotlb(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -1464,17 +1468,22 @@ static void domain_update_iotlb(struct dmar_domain *domain) assert_spin_locked(&device_domain_lock); - list_for_each_entry(info, &domain->devices, link) { - struct pci_dev *pdev; - - if (!info->dev || !dev_is_pci(info->dev)) - continue; - - pdev = to_pci_dev(info->dev); - if (pdev->ats_enabled) { + list_for_each_entry(info, &domain->devices, link) + if (info->ats_enabled) { has_iotlb_device = true; break; } + + if (!has_iotlb_device) { + struct subdev_domain_info *sinfo; + + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + info = get_domain_info(sinfo->pdev); + if (info && info->ats_enabled) { + has_iotlb_device = true; + break; + } + } } domain->has_iotlb_device = has_iotlb_device; @@ -1555,25 +1564,37 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info) #endif } +static void __iommu_flush_dev_iotlb(struct device_domain_info *info, + u64 addr, unsigned int mask) +{ + u16 sid, qdep; + + if (!info || !info->ats_enabled) + return; + + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); +} + static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { - u16 sid, qdep; unsigned long flags; struct device_domain_info *info; + struct subdev_domain_info *sinfo; if (!domain->has_iotlb_device) return; spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(info, &domain->devices, link) { - if (!info->ats_enabled) - continue; + list_for_each_entry(info, &domain->devices, link) + __iommu_flush_dev_iotlb(info, addr, mask); - sid = info->bus << 8 | info->devfn; - qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, - qdep, addr, mask); + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + info = get_domain_info(sinfo->pdev); + __iommu_flush_dev_iotlb(info, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); } -- GitLab From 80c18e4ac20c9cde420cb3ffab48c936147cf07d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:41 +0000 Subject: [PATCH 1519/1894] io_uring: trigger eventfd for IOPOLL Make sure io_iopoll_complete() tries to wake up eventfd, which currently is skipped together with io_cqring_ev_posted() for non-SQPOLL IOPOLL. Add an iopoll version of io_cqring_ev_posted(), duplicates a bit of code, but they actually use different sets of wait queues may be for better. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 27a8c226abf8c..91e517ad1421c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1713,6 +1713,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) eventfd_signal(ctx->cq_ev_fd, 1); } +static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) +{ + if (ctx->flags & IORING_SETUP_SQPOLL) { + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + } + if (io_should_trigger_evfd(ctx)) + eventfd_signal(ctx->cq_ev_fd, 1); +} + /* Returns true if there are no backlogged entries after the flush */ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -2428,8 +2438,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, } io_commit_cqring(ctx); - if (ctx->flags & IORING_SETUP_SQPOLL) - io_cqring_ev_posted(ctx); + io_cqring_ev_posted_iopoll(ctx); io_req_free_batch_finish(ctx, &rb); if (!list_empty(&again)) -- GitLab From 4aa84f2ffa81f71e15e5cffc2cc6090dbee78f8e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:42 +0000 Subject: [PATCH 1520/1894] io_uring: dont kill fasync under completion_lock CPU0 CPU1 ---- ---- lock(&new->fa_lock); local_irq_disable(); lock(&ctx->completion_lock); lock(&new->fa_lock); lock(&ctx->completion_lock); *** DEADLOCK *** Move kill_fasync() out of io_commit_cqring() to io_cqring_ev_posted(), so it doesn't hold completion_lock while doing it. That saves from the reported deadlock, and it's just nice to shorten the locking time and untangle nested locks (compl_lock -> wq_head::lock). Reported-by: syzbot+91ca3f25bd7f795f019c@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 91e517ad1421c..401316fe2ae2a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1345,11 +1345,6 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) /* order cqe stores with ring update */ smp_store_release(&rings->cq.tail, ctx->cached_cq_tail); - - if (wq_has_sleeper(&ctx->cq_wait)) { - wake_up_interruptible(&ctx->cq_wait); - kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); - } } static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) @@ -1711,6 +1706,10 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); + if (wq_has_sleeper(&ctx->cq_wait)) { + wake_up_interruptible(&ctx->cq_wait); + kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); + } } static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) @@ -1721,6 +1720,10 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) } if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); + if (wq_has_sleeper(&ctx->cq_wait)) { + wake_up_interruptible(&ctx->cq_wait); + kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); + } } /* Returns true if there are no backlogged entries after the flush */ -- GitLab From b1445e59cc9a10fdb8f83810ae1f4feb941ab36b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:43 +0000 Subject: [PATCH 1521/1894] io_uring: synchronise ev_posted() with waitqueues waitqueue_active() needs smp_mb() to be in sync with waitqueues modification, but we miss it in io_cqring_ev_posted*() apart from cq_wait() case. Take an smb_mb() out of wq_has_sleeper() making it waitqueue_active(), and place it a few lines before, so it can synchronise other waitqueue_active() as well. The patch doesn't add any additional overhead, so even if there are no problems currently, it's just safer to have it this way. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 401316fe2ae2a..cb57e0360fcbb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1700,13 +1700,16 @@ static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx) static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); - if (wq_has_sleeper(&ctx->cq_wait)) { + if (waitqueue_active(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); } @@ -1714,13 +1717,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (ctx->flags & IORING_SETUP_SQPOLL) { if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); } if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); - if (wq_has_sleeper(&ctx->cq_wait)) { + if (waitqueue_active(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); } -- GitLab From 71008734d27f2276fcef23a5e546d358430f2d52 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Dec 2020 11:18:44 -0500 Subject: [PATCH 1522/1894] btrfs: print the actual offset in btrfs_root_name We're supposed to print the root_key.offset in btrfs_root_name in the case of a reloc root, not the objectid. Fix this helper to take the key so we have access to the offset when we need it. Fixes: 457f1864b569 ("btrfs: pretty print leaked root name") Reviewed-by: Qu Wenruo Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/print-tree.c | 10 +++++----- fs/btrfs/print-tree.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1dfd4b2d0e1e8..6b35b7e881369 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1457,7 +1457,7 @@ void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info) root = list_first_entry(&fs_info->allocated_roots, struct btrfs_root, leak_list); btrfs_err(fs_info, "leaked root %s refcount %d", - btrfs_root_name(root->root_key.objectid, buf), + btrfs_root_name(&root->root_key, buf), refcount_read(&root->refs)); while (refcount_read(&root->refs) > 1) btrfs_put_root(root); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fe5e0026129d5..aae1027bd76a1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,22 +26,22 @@ static const struct root_name_map root_map[] = { { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, }; -const char *btrfs_root_name(u64 objectid, char *buf) +const char *btrfs_root_name(const struct btrfs_key *key, char *buf) { int i; - if (objectid == BTRFS_TREE_RELOC_OBJECTID) { + if (key->objectid == BTRFS_TREE_RELOC_OBJECTID) { snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, - "TREE_RELOC offset=%llu", objectid); + "TREE_RELOC offset=%llu", key->offset); return buf; } for (i = 0; i < ARRAY_SIZE(root_map); i++) { - if (root_map[i].id == objectid) + if (root_map[i].id == key->objectid) return root_map[i].name; } - snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", objectid); + snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", key->objectid); return buf; } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 78b99385a503f..8c3e9319ec4ef 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -11,6 +11,6 @@ void btrfs_print_leaf(struct extent_buffer *l); void btrfs_print_tree(struct extent_buffer *c, bool follow); -const char *btrfs_root_name(u64 objectid, char *buf); +const char *btrfs_root_name(const struct btrfs_key *key, char *buf); #endif -- GitLab From 29b665cc51e8b602bf2a275734349494776e3dbc Mon Sep 17 00:00:00 2001 From: Su Yue Date: Sun, 3 Jan 2021 17:28:03 +0800 Subject: [PATCH 1523/1894] btrfs: prevent NULL pointer dereference in extent_io_tree_panic Some extent io trees are initialized with NULL private member (e.g. btrfs_device::alloc_state and btrfs_fs_info::excluded_extents). Dereference of a NULL tree->private as inode pointer will cause panic. Pass tree->fs_info as it's known to be valid in all cases. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208929 Fixes: 05912a3c04eb ("btrfs: drop extent_io_ops::tree_fs_info callback") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6e3b72e63e422..c9cee458e001b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -676,9 +676,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - struct inode *inode = tree->private_data; - - btrfs_panic(btrfs_sb(inode->i_sb), err, + btrfs_panic(tree->fs_info, err, "locking error: extent tree was modified by another thread while locked"); } -- GitLab From 347fb0cfc9bab5195c6701e62eda488310d7938f Mon Sep 17 00:00:00 2001 From: Su Yue Date: Sun, 3 Jan 2021 17:28:04 +0800 Subject: [PATCH 1524/1894] btrfs: tree-checker: check if chunk item end overflows While mounting a crafted image provided by user, kernel panics due to the invalid chunk item whose end is less than start. [66.387422] loop: module loaded [66.389773] loop0: detected capacity change from 262144 to 0 [66.427708] BTRFS: device fsid a62e00e8-e94e-4200-8217-12444de93c2e devid 1 transid 12 /dev/loop0 scanned by mount (613) [66.431061] BTRFS info (device loop0): disk space caching is enabled [66.431078] BTRFS info (device loop0): has skinny extents [66.437101] BTRFS error: insert state: end < start 29360127 37748736 [66.437136] ------------[ cut here ]------------ [66.437140] WARNING: CPU: 16 PID: 613 at fs/btrfs/extent_io.c:557 insert_state.cold+0x1a/0x46 [btrfs] [66.437369] CPU: 16 PID: 613 Comm: mount Tainted: G O 5.11.0-rc1-custom #45 [66.437374] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.437378] RIP: 0010:insert_state.cold+0x1a/0x46 [btrfs] [66.437420] RSP: 0018:ffff93e5414c3908 EFLAGS: 00010286 [66.437427] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [66.437431] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [66.437434] RBP: ffff93e5414c3938 R08: 0000000000000001 R09: 0000000000000001 [66.437438] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d72aa0 [66.437441] R13: ffff8ec78bc71628 R14: 0000000000000000 R15: 0000000002400000 [66.437447] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [66.437451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [66.437455] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [66.437460] PKRU: 55555554 [66.437464] Call Trace: [66.437475] set_extent_bit+0x652/0x740 [btrfs] [66.437539] set_extent_bits_nowait+0x1d/0x20 [btrfs] [66.437576] add_extent_mapping+0x1e0/0x2f0 [btrfs] [66.437621] read_one_chunk+0x33c/0x420 [btrfs] [66.437674] btrfs_read_chunk_tree+0x6a4/0x870 [btrfs] [66.437708] ? kvm_sched_clock_read+0x18/0x40 [66.437739] open_ctree+0xb32/0x1734 [btrfs] [66.437781] ? bdi_register_va+0x1b/0x20 [66.437788] ? super_setup_bdi_name+0x79/0xd0 [66.437810] btrfs_mount_root.cold+0x12/0xeb [btrfs] [66.437854] ? __kmalloc_track_caller+0x217/0x3b0 [66.437873] legacy_get_tree+0x34/0x60 [66.437880] vfs_get_tree+0x2d/0xc0 [66.437888] vfs_kern_mount.part.0+0x78/0xc0 [66.437897] vfs_kern_mount+0x13/0x20 [66.437902] btrfs_mount+0x11f/0x3c0 [btrfs] [66.437940] ? kfree+0x5ff/0x670 [66.437944] ? __kmalloc_track_caller+0x217/0x3b0 [66.437962] legacy_get_tree+0x34/0x60 [66.437974] vfs_get_tree+0x2d/0xc0 [66.437983] path_mount+0x48c/0xd30 [66.437998] __x64_sys_mount+0x108/0x140 [66.438011] do_syscall_64+0x38/0x50 [66.438018] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [66.438023] RIP: 0033:0x7f0138827f6e [66.438033] RSP: 002b:00007ffecd79edf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [66.438040] RAX: ffffffffffffffda RBX: 00007f013894c264 RCX: 00007f0138827f6e [66.438044] RDX: 00005593a4a41360 RSI: 00005593a4a33690 RDI: 00005593a4a3a6c0 [66.438047] RBP: 00005593a4a33440 R08: 0000000000000000 R09: 0000000000000001 [66.438050] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [66.438054] R13: 00005593a4a3a6c0 R14: 00005593a4a41360 R15: 00005593a4a33440 [66.438078] irq event stamp: 18169 [66.438082] hardirqs last enabled at (18175): [] console_unlock+0x4ff/0x5f0 [66.438088] hardirqs last disabled at (18180): [] console_unlock+0x467/0x5f0 [66.438092] softirqs last enabled at (16910): [] asm_call_irq_on_stack+0x12/0x20 [66.438097] softirqs last disabled at (16905): [] asm_call_irq_on_stack+0x12/0x20 [66.438103] ---[ end trace e114b111db64298b ]--- [66.438107] BTRFS error: found node 12582912 29360127 on insert of 37748736 29360127 [66.438127] BTRFS critical: panic in extent_io_tree_panic:679: locking error: extent tree was modified by another thread while locked (errno=-17 Object already exists) [66.441069] ------------[ cut here ]------------ [66.441072] kernel BUG at fs/btrfs/extent_io.c:679! [66.442064] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [66.443018] CPU: 16 PID: 613 Comm: mount Tainted: G W O 5.11.0-rc1-custom #45 [66.444538] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.446223] RIP: 0010:extent_io_tree_panic.isra.0+0x23/0x25 [btrfs] [66.450878] RSP: 0018:ffff93e5414c3948 EFLAGS: 00010246 [66.451840] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [66.453141] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [66.454445] RBP: ffff93e5414c3948 R08: 0000000000000001 R09: 0000000000000001 [66.455743] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d728c0 [66.457055] R13: ffff8ec78bc71628 R14: ffff8ec782d72aa0 R15: 0000000002400000 [66.458356] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [66.459841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [66.460895] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [66.462196] PKRU: 55555554 [66.462692] Call Trace: [66.463139] set_extent_bit.cold+0x30/0x98 [btrfs] [66.464049] set_extent_bits_nowait+0x1d/0x20 [btrfs] [66.490466] add_extent_mapping+0x1e0/0x2f0 [btrfs] [66.514097] read_one_chunk+0x33c/0x420 [btrfs] [66.534976] btrfs_read_chunk_tree+0x6a4/0x870 [btrfs] [66.555718] ? kvm_sched_clock_read+0x18/0x40 [66.575758] open_ctree+0xb32/0x1734 [btrfs] [66.595272] ? bdi_register_va+0x1b/0x20 [66.614638] ? super_setup_bdi_name+0x79/0xd0 [66.633809] btrfs_mount_root.cold+0x12/0xeb [btrfs] [66.652938] ? __kmalloc_track_caller+0x217/0x3b0 [66.671925] legacy_get_tree+0x34/0x60 [66.690300] vfs_get_tree+0x2d/0xc0 [66.708221] vfs_kern_mount.part.0+0x78/0xc0 [66.725808] vfs_kern_mount+0x13/0x20 [66.742730] btrfs_mount+0x11f/0x3c0 [btrfs] [66.759350] ? kfree+0x5ff/0x670 [66.775441] ? __kmalloc_track_caller+0x217/0x3b0 [66.791750] legacy_get_tree+0x34/0x60 [66.807494] vfs_get_tree+0x2d/0xc0 [66.823349] path_mount+0x48c/0xd30 [66.838753] __x64_sys_mount+0x108/0x140 [66.854412] do_syscall_64+0x38/0x50 [66.869673] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [66.885093] RIP: 0033:0x7f0138827f6e [66.945613] RSP: 002b:00007ffecd79edf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [66.977214] RAX: ffffffffffffffda RBX: 00007f013894c264 RCX: 00007f0138827f6e [66.994266] RDX: 00005593a4a41360 RSI: 00005593a4a33690 RDI: 00005593a4a3a6c0 [67.011544] RBP: 00005593a4a33440 R08: 0000000000000000 R09: 0000000000000001 [67.028836] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [67.045812] R13: 00005593a4a3a6c0 R14: 00005593a4a41360 R15: 00005593a4a33440 [67.216138] ---[ end trace e114b111db64298c ]--- [67.237089] RIP: 0010:extent_io_tree_panic.isra.0+0x23/0x25 [btrfs] [67.325317] RSP: 0018:ffff93e5414c3948 EFLAGS: 00010246 [67.347946] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [67.371343] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [67.394757] RBP: ffff93e5414c3948 R08: 0000000000000001 R09: 0000000000000001 [67.418409] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d728c0 [67.441906] R13: ffff8ec78bc71628 R14: ffff8ec782d72aa0 R15: 0000000002400000 [67.465436] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [67.511660] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [67.535047] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [67.558449] PKRU: 55555554 [67.581146] note: mount[613] exited with preempt_count 2 The image has a chunk item which has a logical start 37748736 and length 18446744073701163008 (-8M). The calculated end 29360127 overflows. EEXIST was caught by insert_state() because of the duplicate end and extent_io_tree_panic() was called. Add overflow check of chunk item end to tree checker so it can be detected early at mount time. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208929 CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 028e733e42f3b..582061c7b5471 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -760,6 +760,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, { struct btrfs_fs_info *fs_info = leaf->fs_info; u64 length; + u64 chunk_end; u64 stripe_len; u16 num_stripes; u16 sub_stripes; @@ -814,6 +815,12 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, "invalid chunk length, have %llu", length); return -EUCLEAN; } + if (unlikely(check_add_overflow(logical, length, &chunk_end))) { + chunk_err(leaf, chunk, logical, +"invalid chunk logical start and length, have logical start %llu length %llu", + logical, length); + return -EUCLEAN; + } if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { chunk_err(leaf, chunk, logical, "invalid chunk stripe length: %llu", -- GitLab From 50e31ef486afe60f128d42fb9620e2a63172c15c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Dec 2020 21:29:34 +0800 Subject: [PATCH 1525/1894] btrfs: reloc: fix wrong file extent type check to avoid false ENOENT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [BUG] There are several bug reports about recent kernel unable to relocate certain data block groups. Sometimes the error just goes away, but there is one reporter who can reproduce it reliably. The dmesg would look like: [438.260483] BTRFS info (device dm-10): balance: start -dvrange=34625344765952..34625344765953 [438.269018] BTRFS info (device dm-10): relocating block group 34625344765952 flags data|raid1 [450.439609] BTRFS info (device dm-10): found 167 extents, stage: move data extents [463.501781] BTRFS info (device dm-10): balance: ended with status: -2 [CAUSE] The ENOENT error is returned from the following call chain: add_data_references() |- delete_v1_space_cache(); |- if (!found) return -ENOENT; The variable @found is set to true if we find a data extent whose disk bytenr matches parameter @data_bytes. With extra debugging, the offending tree block looks like this: leaf bytenr = 42676709441536, data_bytenr = 34626327621632 ctime 1567904822.739884119 (2019-09-08 03:07:02) mtime 0.0 (1970-01-01 01:00:00) otime 0.0 (1970-01-01 01:00:00) item 27 key (51933 EXTENT_DATA 0) itemoff 9854 itemsize 53 generation 1517381 type 2 (prealloc) prealloc data disk byte 34626327621632 nr 262144 <<< prealloc data offset 0 nr 262144 item 28 key (52262 ROOT_ITEM 0) itemoff 9415 itemsize 439 generation 2618893 root_dirid 256 bytenr 42677048360960 level 3 refs 1 lastsnap 2618893 byte_limit 0 bytes_used 5557338112 flags 0x0(none) uuid d0d4361f-d231-6d40-8901-fe506e4b2b53 Although item 27 has disk bytenr 34626327621632, which matches the data_bytenr, its type is prealloc, not reg. This makes the existing code skip that item, and return ENOENT. [FIX] The code is modified in commit 19b546d7a1b2 ("btrfs: relocation: Use btrfs_find_all_leafs to locate data extent parent tree leaves"), before that commit, we use something like "if (type == BTRFS_FILE_EXTENT_INLINE) continue;" But in that offending commit, we use (type == BTRFS_FILE_EXTENT_REG), ignoring BTRFS_FILE_EXTENT_PREALLOC. Fix it by also checking BTRFS_FILE_EXTENT_PREALLOC. Reported-by: Stéphane Lesimple Link: https://lore.kernel.org/linux-btrfs/505cabfa88575ed6dbe7cb922d8914fb@lesimple.fr Fixes: 19b546d7a1b2 ("btrfs: relocation: Use btrfs_find_all_leafs to locate data extent parent tree leaves") CC: stable@vger.kernel.org # 5.6+ Tested-By: Stéphane Lesimple Reviewed-by: Su Yue Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 19b7db8b21171..df63ef64c5c0d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2975,11 +2975,16 @@ static int delete_v1_space_cache(struct extent_buffer *leaf, return 0; for (i = 0; i < btrfs_header_nritems(leaf); i++) { + u8 type; + btrfs_item_key_to_cpu(leaf, &key, i); if (key.type != BTRFS_EXTENT_DATA_KEY) continue; ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG && + type = btrfs_file_extent_type(leaf, ei); + + if ((type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) && btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) { found = true; space_cache_ino = key.objectid; -- GitLab From 04a6a536bc3fd1436fc78c546c6b3ecdccbfaf6d Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Dec 2020 04:49:54 +0000 Subject: [PATCH 1526/1894] fs: Fix freeze_bdev()/thaw_bdev() accounting of bd_fsfreeze_sb freeze/thaw_bdev() currently use bdev->bd_fsfreeze_count to infer whether or not bdev->bd_fsfreeze_sb is valid (it's valid iff bd_fsfreeze_count is non-zero). thaw_bdev() doesn't nullify bd_fsfreeze_sb. But this means a freeze_bdev() call followed by a thaw_bdev() call can leave bd_fsfreeze_sb with a non-null value, while bd_fsfreeze_count is zero. If freeze_bdev() is called again, and this time get_active_super() returns NULL (e.g. because the FS is unmounted), we'll end up with bd_fsfreeze_count > 0, but bd_fsfreeze_sb is *untouched* - it stays the same (now garbage) value. A subsequent thaw_bdev() will decide that the bd_fsfreeze_sb value is legitimate (since bd_fsfreeze_count > 0), and attempt to use it. Fix this by always setting bd_fsfreeze_sb to NULL when bd_fsfreeze_count is successfully decremented to 0 in thaw_sb(). Alternatively, we could set bd_fsfreeze_sb to whatever get_active_super() returns in freeze_bdev() whenever bd_fsfreeze_count is successfully incremented to 1 from 0 (which can be achieved cleanly by moving the line currently setting bd_fsfreeze_sb to immediately after the "sync:" label, but it might be a little too subtle/easily overlooked in future). This fixes the currently panicking xfstests generic/085. Fixes: 040f04bd2e82 ("fs: simplify freeze_bdev/thaw_bdev") Signed-off-by: Satya Tangirala Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3e5b02f6606c4..e454c5a810436 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -605,6 +605,8 @@ int thaw_bdev(struct block_device *bdev) error = thaw_super(sb); if (error) bdev->bd_fsfreeze_count++; + else + bdev->bd_fsfreeze_sb = NULL; out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; -- GitLab From 17ffd35809c34b9564edb10727d02eb62958ba5c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Jan 2021 19:20:29 +0100 Subject: [PATCH 1527/1894] cpufreq: intel_pstate: Use HWP capabilities in intel_cpufreq_adjust_perf() If turbo P-states cannot be used, either due to the configuration of the processor, or because intel_pstate is not allowed to used them, the maximum available P-state with HWP enabled corresponds to the HWP_CAP.GUARANTEED value which is not static. It can be adjusted by an out-of-band agent or during an Intel Speed Select performance level change, so long as it remains less than or equal to HWP_CAP.MAX. However, if turbo P-states cannot be used, intel_cpufreq_adjust_perf() always uses pstate.max_pstate (set during the initialization of the driver only) as the maximum available P-state, so it may miss a change of the HWP_CAP.GUARANTEED value. Prevent that from happening by modifyig intel_cpufreq_adjust_perf() to always read the "guaranteed" and "maximum turbo" performance levels from the cached HWP_CAP value. Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1a660466dd756..32bc11851b8d3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2653,12 +2653,13 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, unsigned long capacity) { struct cpudata *cpu = all_cpu_data[cpunum]; + u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached); int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; update_turbo_state(); - cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate : - cpu->pstate.turbo_pstate; + cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : + HWP_HIGHEST_PERF(hwp_cap); /* Optimization: Avoid unnecessary divisions. */ -- GitLab From 943bdd0cecad06da8392a33093230e30e501eccc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Jan 2021 10:19:57 +0000 Subject: [PATCH 1528/1894] cpufreq: powernow-k8: pass policy rather than use cpufreq_cpu_get() Currently there is an unlikely case where cpufreq_cpu_get() returns a NULL policy and this will cause a NULL pointer dereference later on. Fix this by passing the policy to transition_frequency_fidvid() from the caller and hence eliminating the need for the cpufreq_cpu_get() and cpufreq_cpu_put(). Thanks to Viresh Kumar for suggesting the fix. Addresses-Coverity: ("Dereference null return") Fixes: b43a7ffbf33b ("cpufreq: Notify all policy->cpus in cpufreq_notify_transition()") Suggested-by: Viresh Kumar Signed-off-by: Colin Ian King Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k8.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 0acc9e241cd7d..b9ccb6a3dad98 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -878,9 +878,9 @@ static int get_transition_latency(struct powernow_k8_data *data) /* Take a frequency, and issue the fid/vid transition command */ static int transition_frequency_fidvid(struct powernow_k8_data *data, - unsigned int index) + unsigned int index, + struct cpufreq_policy *policy) { - struct cpufreq_policy *policy; u32 fid = 0; u32 vid = 0; int res; @@ -912,9 +912,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - policy = cpufreq_cpu_get(smp_processor_id()); - cpufreq_cpu_put(policy); - cpufreq_freq_transition_begin(policy, &freqs); res = transition_fid_vid(data, fid, vid); cpufreq_freq_transition_end(policy, &freqs, res); @@ -969,7 +966,7 @@ static long powernowk8_target_fn(void *arg) powernow_k8_acpi_pst_values(data, newstate); - ret = transition_frequency_fidvid(data, newstate); + ret = transition_frequency_fidvid(data, newstate, pol); if (ret) { pr_err("transition frequency failed\n"); -- GitLab From aa7a1bb02bb44399be69b0a1cbb6495d9eec29fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Jan 2021 19:19:18 +0100 Subject: [PATCH 1529/1894] ACPI: PM: s2idle: Drop unused local variables and related code Two local variables in drivers/acpi/x86/s2idle.c are never read, so drop them along with the code updating their values (in vain). Fixes: fef98671194b ("ACPI: PM: s2idle: Move x86-specific code to the x86 directory") Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 25fea34b544c6..2b69536cdccba 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -105,18 +105,8 @@ static void lpi_device_get_constraints_amd(void) for (i = 0; i < out_obj->package.count; i++) { union acpi_object *package = &out_obj->package.elements[i]; - struct lpi_device_info_amd info = { }; - if (package->type == ACPI_TYPE_INTEGER) { - switch (i) { - case 0: - info.revision = package->integer.value; - break; - case 1: - info.count = package->integer.value; - break; - } - } else if (package->type == ACPI_TYPE_PACKAGE) { + if (package->type == ACPI_TYPE_PACKAGE) { lpi_constraints_table = kcalloc(package->package.count, sizeof(*lpi_constraints_table), GFP_KERNEL); @@ -135,12 +125,10 @@ static void lpi_device_get_constraints_amd(void) for (k = 0; k < info_obj->package.count; ++k) { union acpi_object *obj = &info_obj->package.elements[k]; - union acpi_object *obj_new; list = &lpi_constraints_table[lpi_constraints_table_size]; list->min_dstate = -1; - obj_new = &obj[k]; switch (k) { case 0: dev_info.enabled = obj->integer.value; -- GitLab From ee61cfd955a64a58ed35cbcfc54068fcbd486945 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 31 Dec 2020 19:35:25 +0800 Subject: [PATCH 1530/1894] ACPI: scan: add stub acpi_create_platform_device() for !CONFIG_ACPI It adds a stub acpi_create_platform_device() for !CONFIG_ACPI build, so that caller doesn't have to deal with !CONFIG_ACPI build issue. Reported-by: kernel test robot Signed-off-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2630c2e953f73..053bf05fb1f76 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -885,6 +885,13 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } +static inline struct platform_device * +acpi_create_platform_device(struct acpi_device *adev, + struct property_entry *properties) +{ + return NULL; +} + static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; -- GitLab From 240bdc605e6a9d0309bd003de3413f6f729eca18 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Tue, 29 Dec 2020 11:17:59 +0000 Subject: [PATCH 1531/1894] ACPI: Update Kconfig help text for items that are no longer modular The CONTAINER and HOTPLUG_MEMORY options mention modules but are bool only, so if selected are always built in. Drop the help text about modules. Signed-off-by: Peter Robinson Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index edf1558c11052..ebcf534514be3 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -395,9 +395,6 @@ config ACPI_CONTAINER This helps support hotplug of nodes, CPUs, and memory. - To compile this driver as a module, choose M here: - the module will be called container. - config ACPI_HOTPLUG_MEMORY bool "Memory Hotplug" depends on MEMORY_HOTPLUG @@ -411,9 +408,6 @@ config ACPI_HOTPLUG_MEMORY removing memory devices at runtime, you need not enable this driver. - To compile this driver as a module, choose M here: - the module will be called acpi_memhotplug. - config ACPI_HOTPLUG_IOAPIC bool depends on PCI -- GitLab From 47f4469970d8861bc06d2d4d45ac8200ff07c693 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 5 Jan 2021 17:11:45 +0800 Subject: [PATCH 1532/1894] Revert "device property: Keep secondary firmware node secondary by type" While commit d5dcce0c414f ("device property: Keep secondary firmware node secondary by type") describes everything correct in its commit message, the change it made does the opposite and original commit c15e1bdda436 ("device property: Fix the secondary firmware node handling in set_primary_fwnode()") was fully correct. Revert the former one here and improve documentation in the next patch. Fixes: d5dcce0c414f ("device property: Keep secondary firmware node secondary by type") Signed-off-by: Bard Liao Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Cc: 5.10+ # 5.10+ Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 25e08e5f40bd9..51b9545a050b7 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4433,7 +4433,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; if (!(parent && fn == parent->fwnode)) - fn->secondary = ERR_PTR(-ENODEV); + fn->secondary = NULL; } else { dev->fwnode = NULL; } -- GitLab From 3f7bddaf5d5a83aa2eb1e6d72db221d3ec43c813 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 5 Jan 2021 17:11:46 +0800 Subject: [PATCH 1533/1894] device property: add description of fwnode cases There are only four valid fwnode cases which are - primary --> secondary --> -ENODEV - primary --> NULL - secondary --> -ENODEV - NULL dev->fwnode should be converted between the 4 cases above no matter how/when set_primary_fwnode() and set_secondary_fwnode() are called. Describe it in the code so people will keep it in mind. Signed-off-by: Bard Liao Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus [ rjw: Comment edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 51b9545a050b7..14f1658167425 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4414,6 +4414,12 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) * * Set the device's firmware node pointer to @fwnode, but if a secondary * firmware node of the device is present, preserve it. + * + * Valid fwnode cases are: + * - primary --> secondary --> -ENODEV + * - primary --> NULL + * - secondary --> -ENODEV + * - NULL */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { @@ -4432,6 +4438,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; + /* Set fn->secondary = NULL, so fn remains the primary fwnode */ if (!(parent && fn == parent->fwnode)) fn->secondary = NULL; } else { -- GitLab From 2b5f09cadfc576817c0450e01d454f750909b103 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 18 Dec 2020 09:53:40 -0800 Subject: [PATCH 1534/1894] drm/msm/dp: postpone irq_hpd event during connection pending state irq_hpd event can only be executed at connected state. Therefore irq_hpd event should be postponed if it happened at connection pending state. This patch also make sure both link rate and lane are valid before start link training. Signed-off-by: Kuogee Hsieh Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 7 +++++++ drivers/gpu/drm/msm/dp/dp_panel.c | 12 +++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 6e971d552911f..3bc7ed21de286 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -693,6 +693,13 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } + if (state == ST_CONNECT_PENDING) { + /* wait until ST_CONNECTED */ + dp_add_event(dp, EV_IRQ_HPD_INT, 0, 1); /* delay = 1 */ + mutex_unlock(&dp->event_mutex); + return 0; + } + ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); if (ret == -ECONNRESET) { /* cable unplugged */ dp->core_initialized = false; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 97dca3e378b7b..d1780bcac8cc8 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -167,12 +167,18 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, panel = container_of(dp_panel, struct dp_panel_private, dp_panel); rc = dp_panel_read_dpcd(dp_panel); + if (rc) { + DRM_ERROR("read dpcd failed %d\n", rc); + return rc; + } + bw_code = drm_dp_link_rate_to_bw_code(dp_panel->link_info.rate); - if (rc || !is_link_rate_valid(bw_code) || + if (!is_link_rate_valid(bw_code) || !is_lane_count_valid(dp_panel->link_info.num_lanes) || (bw_code > dp_panel->max_bw_code)) { - DRM_ERROR("read dpcd failed %d\n", rc); - return rc; + DRM_ERROR("Illegal link rate=%d lane=%d\n", dp_panel->link_info.rate, + dp_panel->link_info.num_lanes); + return -EINVAL; } if (dp_panel->dfp_present) { -- GitLab From d863f0c7b536288e2bd40cbc01c10465dd226b11 Mon Sep 17 00:00:00 2001 From: Craig Tatlor Date: Wed, 30 Dec 2020 17:29:42 +0200 Subject: [PATCH 1535/1894] drm/msm: Call msm_init_vram before binding the gpu vram.size is needed when binding a gpu without an iommu and is defined in msm_init_vram(), so run that before binding it. Signed-off-by: Craig Tatlor Reviewed-by: Brian Masney Tested-by: Alexey Minnekhanov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ce9bb6e929c22..549ffb60e9ca1 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -457,14 +457,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) drm_mode_config_init(ddev); - /* Bind all our sub-components: */ - ret = component_bind_all(dev, ddev); + ret = msm_init_vram(ddev); if (ret) goto err_destroy_mdss; - ret = msm_init_vram(ddev); + /* Bind all our sub-components: */ + ret = component_bind_all(dev, ddev); if (ret) - goto err_msm_uninit; + goto err_destroy_mdss; dma_set_max_seg_size(dev, UINT_MAX); -- GitLab From 3f7759e7b7585a0bffda06d4eddc6b0b850ef6c3 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Wed, 30 Dec 2020 17:29:43 +0200 Subject: [PATCH 1536/1894] drm/msm: Add modparam to allow vram carveout Using the GPU with a VRAM Carveout is a security vulnerability. Nevertheless it is sometimes required, especially when no IOMMU implementation is available for a certain platform. Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/adreno_device.c | 4 ++++ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 1 + 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 7e82c41a85f1a..bdc989183c648 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -534,8 +534,10 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev) if (!gpu->aspace) { dev_err(dev->dev, "No memory protection without MMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } return gpu; diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 93da6683a8661..4534633fe7cdb 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -564,8 +564,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) * implement a cmdstream validator. */ DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index c0be3a0f36b2c..82bebb40234de 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -692,8 +692,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) * implement a cmdstream validator. */ DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 87c8b033ad1a6..12e75ba360f95 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -18,6 +18,10 @@ bool snapshot_debugbus = false; MODULE_PARM_DESC(snapshot_debugbus, "Include debugbus sections in GPU devcoredump (if not fused off)"); module_param_named(snapshot_debugbus, snapshot_debugbus, bool, 0600); +bool allow_vram_carveout = false; +MODULE_PARM_DESC(allow_vram_carveout, "Allow using VRAM Carveout, in place of IOMMU"); +module_param_named(allow_vram_carveout, allow_vram_carveout, bool, 0600); + static const struct adreno_info gpulist[] = { { .rev = ADRENO_REV(2, 0, 0, 0), diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index c3775f79525a7..fe5444a1482ae 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -18,6 +18,7 @@ #include "adreno_pm4.xml.h" extern bool snapshot_debugbus; +extern bool allow_vram_carveout; enum { ADRENO_FW_PM4 = 0, -- GitLab From c4151604f0603d5700072183a05828ff87d764e4 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 21 Dec 2020 06:13:20 +0100 Subject: [PATCH 1537/1894] cpufreq: intel_pstate: remove obsolete functions percent_fp() was used in intel_pstate_pid_reset(), which was removed in commit 9d0ef7af1f2d ("cpufreq: intel_pstate: Do not use PID-based P-state selection") and hence, percent_fp() is unused since then. percent_ext_fp() was last used in intel_pstate_update_perf_limits(), which was refactored in commit 1a4fe38add8b ("cpufreq: intel_pstate: Remove max/min fractions to limit performance"), and hence, percent_ext_fp() is unused since then. make CC=clang W=1 points us those unused functions: drivers/cpufreq/intel_pstate.c:79:23: warning: unused function 'percent_fp' [-Wunused-function] static inline int32_t percent_fp(int percent) ^ drivers/cpufreq/intel_pstate.c:94:23: warning: unused function 'percent_ext_fp' [-Wunused-function] static inline int32_t percent_ext_fp(int percent) ^ Remove those obsolete functions. Signed-off-by: Lukas Bulwahn Reviewed-by: Nathan Chancellor Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 32bc11851b8d3..be05e038d956c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -76,11 +76,6 @@ static inline int ceiling_fp(int32_t x) return ret; } -static inline int32_t percent_fp(int percent) -{ - return div_fp(percent, 100); -} - static inline u64 mul_ext_fp(u64 x, u64 y) { return (x * y) >> EXT_FRAC_BITS; @@ -91,11 +86,6 @@ static inline u64 div_ext_fp(u64 x, u64 y) return div64_u64(x << EXT_FRAC_BITS, y); } -static inline int32_t percent_ext_fp(int percent) -{ - return div_ext_fp(percent, 100); -} - /** * struct sample - Store performance sample * @core_avg_perf: Ratio of APERF/MPERF which is the actual average -- GitLab From 00fd44a1a4700718d5d962432b55c09820f7e709 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 4 Jan 2021 20:30:41 +0100 Subject: [PATCH 1538/1894] drm/msm: Only enable A6xx LLCC code on A6xx Using this code on A5xx (and probably older too) causes a smmu bug. Fixes: 474dadb8b0d5 ("drm/msm/a6xx: Add support for using system cache(LLC)") Signed-off-by: Konrad Dybcio Tested-by: AngeloGioacchino Del Regno Reviewed-by: Jordan Crouse Reviewed-by: Sai Prakash Ranjan Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 21 ++++++++++++--------- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 6cf9975e951ed..f09175698827a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -191,8 +191,6 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct io_pgtable_domain_attr pgtbl_cfg; struct iommu_domain *iommu; struct msm_mmu *mmu; struct msm_gem_address_space *aspace; @@ -202,13 +200,18 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, if (!iommu) return NULL; - /* - * This allows GPU to set the bus attributes required to use system - * cache on behalf of the iommu page table walker. - */ - if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; - iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); + + if (adreno_is_a6xx(adreno_gpu)) { + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct io_pgtable_domain_attr pgtbl_cfg; + /* + * This allows GPU to set the bus attributes required to use system + * cache on behalf of the iommu page table walker. + */ + if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; + iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); + } } mmu = msm_iommu_new(&pdev->dev, iommu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index fe5444a1482ae..b3d9a333591b2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -212,6 +212,11 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu) return gpu->revn == 540; } +static inline bool adreno_is_a6xx(struct adreno_gpu *gpu) +{ + return ((gpu->revn < 700 && gpu->revn > 599)); +} + static inline int adreno_is_a618(struct adreno_gpu *gpu) { return gpu->revn == 618; -- GitLab From 3c638cdb8ecc0442552156e0fed8708dd2c7f35b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 16 Dec 2020 12:07:53 +0200 Subject: [PATCH 1539/1894] RDMA/restrack: Don't treat as an error allocation ID wrapping xa_alloc_cyclic() call returns positive number if ID allocation succeeded but wrapped. It is not an error, so normalize the "ret" variable to zero as marker of not-an-error. drivers/infiniband/core/restrack.c:261 rdma_restrack_add() warn: 'ret' can be either negative or positive Fixes: fd47c2f99f04 ("RDMA/restrack: Convert internal DB from hash to XArray") Link: https://lore.kernel.org/r/20201216100753.1127638-1-leon@kernel.org Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/restrack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index e0a41c8670023..ff1551b3cf619 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -254,6 +254,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) } else { ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, &rt->next_id, GFP_KERNEL); + ret = (ret < 0) ? ret : 0; } out: -- GitLab From afded6d83aa7b35dab675c730528109cc58d6847 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Dec 2020 20:43:13 +0200 Subject: [PATCH 1540/1894] misc: pvpanic: Check devm_ioport_map() for NULL Inconveniently devm_ioport_map() and devm_ioremap_resource() return errors differently, i.e. former uses simply NULL pointer, while the latter an error pointer. Due to this, we have to check each of them separately. Fixes: f104060813fe ("misc: pvpanic: Combine ACPI and platform drivers") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201228184313.57610-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pvpanic.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 951b37da5e3ca..41cab297d66e7 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -55,12 +55,23 @@ static int pvpanic_mmio_probe(struct platform_device *pdev) struct resource *res; res = platform_get_mem_or_io(pdev, 0); - if (res && resource_type(res) == IORESOURCE_IO) + if (!res) + return -EINVAL; + + switch (resource_type(res)) { + case IORESOURCE_IO: base = devm_ioport_map(dev, res->start, resource_size(res)); - else + if (!base) + return -ENOMEM; + break; + case IORESOURCE_MEM: base = devm_ioremap_resource(dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); + if (IS_ERR(base)) + return PTR_ERR(base); + break; + default: + return -EINVAL; + } atomic_notifier_chain_register(&panic_notifier_list, &pvpanic_panic_nb); -- GitLab From d8f5c29653c3f6995e8979be5623d263e92f6b86 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 5 Jan 2021 16:22:25 -0800 Subject: [PATCH 1541/1894] net: ipv6: fib: flush exceptions when purging route Route removal is handled by two code paths. The main removal path is via fib6_del_route() which will handle purging any PMTU exceptions from the cache, removing all per-cpu copies of the DST entry used by the route, and releasing the fib6_info struct. The second removal location is during fib6_add_rt2node() during a route replacement operation. This path also calls fib6_purge_rt() to handle cleaning up the per-cpu copies of the DST entries and releasing the fib6_info associated with the older route, but it does not flush any PMTU exceptions that the older route had. Since the older route is removed from the tree during the replacement, we lose any way of accessing it again. As these lingering DSTs and the fib6_info struct are holding references to the underlying netdevice struct as well, unregistering that device from the kernel can never complete. Fixes: 2b760fcf5cfb3 ("ipv6: hook up exception table to store dst cache") Signed-off-by: Sean Tranchetti Reviewed-by: David Ahern Link: https://lore.kernel.org/r/1609892546-11389-1-git-send-email-stranche@quicinc.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 605cdd38a919a..f43e275557251 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1025,6 +1025,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, { struct fib6_table *table = rt->fib6_table; + /* Flush all cached dst in exception table */ + rt6_flush_exceptions(rt); fib6_drop_pcpu_from(rt, table); if (rt->nh && !list_empty(&rt->nh_list)) @@ -1927,9 +1929,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; - /* Flush all cached dst in exception table */ - rt6_flush_exceptions(rt); - /* Reset round-robin state, if necessary */ if (rcu_access_pointer(fn->rr_ptr) == rt) fn->rr_ptr = NULL; -- GitLab From 5316a7c0130acf09bfc8bb0092407006010fcccc Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 5 Jan 2021 16:22:26 -0800 Subject: [PATCH 1542/1894] tools: selftests: add test for changing routes with PTMU exceptions Adds new 2 new tests to the PTMU script: pmtu_ipv4/6_route_change. These tests explicitly test for a recently discovered problem in the IPv6 routing framework where PMTU exceptions were not properly released when replacing a route via "ip route change ...". After creating PMTU exceptions, the route from the device A to R1 will be replaced with a new route, then device A will be deleted. If the PMTU exceptions were properly cleaned up by the kernel, this device deletion will succeed. Otherwise, the unregistration of the device will stall, and messages such as the following will be logged in dmesg: unregister_netdevice: waiting for veth_A-R1 to become free. Usage count = 4 Signed-off-by: Sean Tranchetti Reviewed-by: David Ahern Link: https://lore.kernel.org/r/1609892546-11389-2-git-send-email-stranche@quicinc.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 71 ++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 464e31eabc733..64cd2e23c5687 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -162,7 +162,15 @@ # - list_flush_ipv6_exception # Using the same topology as in pmtu_ipv6, create exceptions, and check # they are shown when listing exception caches, gone after flushing them - +# +# - pmtu_ipv4_route_change +# Use the same topology as in pmtu_ipv4, but issue a route replacement +# command and delete the corresponding device afterward. This tests for +# proper cleanup of the PMTU exceptions by the route replacement path. +# Device unregistration should complete successfully +# +# - pmtu_ipv6_route_change +# Same as above but with IPv6 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -224,7 +232,9 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 - list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" + list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" NS_A="ns-A" NS_B="ns-B" @@ -1782,6 +1792,63 @@ test_list_flush_ipv6_exception() { return ${fail} } +test_pmtu_ipvX_route_change() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + gw="${prefix4}.${a_r1}.2" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + gw="${prefix6}:${a_r1}::2" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Replace the route from A to R1 + run_cmd ${ns_a} ip route change default via ${gw} + + # Delete the device in A + run_cmd ${ns_a} ip link del "veth_A-R1" +} + +test_pmtu_ipv4_route_change() { + test_pmtu_ipvX_route_change 4 +} + +test_pmtu_ipv6_route_change() { + test_pmtu_ipvX_route_change 6 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." -- GitLab From 384b77fd48fd683a82760bc88bef8611cba997fc Mon Sep 17 00:00:00 2001 From: Amanoel Dawod Date: Sat, 26 Dec 2020 18:58:40 -0500 Subject: [PATCH 1543/1894] Fonts: font_ter16x32: Update font with new upstream Terminus release This is just a maintenance patch to update font_ter16x32.c with changes and minor fixes added in new upstream Terminus v4.49. >From release notes of new version 4.49, this brings: - Altered ascii grave in some sizes to be more useful as a back quote. - Fixed 21B5, added 21B2 and 21B3. Just as my initial submission of the font, above changes were obtained from new ter-i32b.psf font source. Terminus font sources are available for download at SourceForge: https://sourceforge.net/projects/terminus-font/files/terminus-font-4.49/ Simply running `make` in source directory will build the .psf font files. Signed-off-by: Amanoel Dawod Link: https://lore.kernel.org/r/20201226235840.26290-1-kernel@amanoeldawod.com Signed-off-by: Greg Kroah-Hartman --- lib/fonts/font_ter16x32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c index 1955d624177cf..5baedc573dd6b 100644 --- a/lib/fonts/font_ter16x32.c +++ b/lib/fonts/font_ter16x32.c @@ -774,8 +774,8 @@ static const struct font_data fontdata_ter16x32 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xfc, 0x7f, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 95 */ - 0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00, 0x07, 0x00, - 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00, + 0x07, 0x00, 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1169,7 +1169,7 @@ static const struct font_data fontdata_ter16x32 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7f, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e, + 0x7e, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e, 0x03, 0x8e, 0x3f, 0x8e, 0x7f, 0xfe, 0xf3, 0xfe, 0xe3, 0x80, 0xe3, 0x80, 0xe3, 0x80, 0xf3, 0xce, 0x7f, 0xfe, 0x3e, 0xfc, 0x00, 0x00, 0x00, 0x00, -- GitLab From a306aba9c8d869b1fdfc8ad9237f1ed718ea55e6 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 15:42:48 +0800 Subject: [PATCH 1544/1894] RDMA/usnic: Fix memleak in find_free_vf_and_create_qp_grp If usnic_ib_qp_grp_create() fails at the first call, dev_list will not be freed on error, which leads to memleak. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Link: https://lore.kernel.org/r/20201226074248.2893-1-dinghao.liu@zju.edu.cn Signed-off-by: Dinghao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 38a37770c0162..3705c6b8b2237 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, } usnic_uiom_free_dev_list(dev_list); + dev_list = NULL; } /* Try to find resources on an unused vf */ @@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, qp_grp_check: if (IS_ERR_OR_NULL(qp_grp)) { usnic_err("Failed to allocate qp_grp\n"); + if (usnic_ib_share_vf) + usnic_uiom_free_dev_list(dev_list); return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); } return qp_grp; -- GitLab From abf8ef953a43e74aac3c54a94975f21bd483199b Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Nov 2020 04:38:11 +0200 Subject: [PATCH 1545/1894] net/mlx5: Check if lag is supported before creating one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a memleak issue by preventing to create a lag and add PFs if lag is not supported. comm “python3”, pid 349349, jiffies 4296985507 (age 1446.976s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ……………. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ……………. backtrace: [<000000005b216ae7>] mlx5_lag_add+0x1d5/0×3f0 [mlx5_core] [<000000000445aa55>] mlx5e_nic_enable+0x66/0×1b0 [mlx5_core] [<00000000c56734c3>] mlx5e_attach_netdev+0x16e/0×200 [mlx5_core] [<0000000030439d1f>] mlx5e_attach+0x5c/0×90 [mlx5_core] [<0000000018fd8615>] mlx5e_add+0x1a4/0×410 [mlx5_core] [<0000000068bc504b>] mlx5_add_device+0x72/0×120 [mlx5_core] [<000000009fce51f9>] mlx5_register_device+0x77/0xb0 [mlx5_core] [<00000000d0d81ff3>] mlx5_load_one+0xc58/0×1eb0 [mlx5_core] [<0000000045077adc>] init_one+0x3ea/0×920 [mlx5_core] [<0000000043287674>] pci_device_probe+0xcd/0×150 [<00000000dafd3279>] really_probe+0x1c9/0×4b0 [<00000000f06bdd84>] driver_probe_device+0x5d/0×140 [<00000000e3d508b6>] device_driver_attach+0x4f/0×60 [<0000000084fba0f0>] bind_store+0xbf/0×120 [<00000000bf6622b3>] kernfs_fop_write+0x114/0×1b0 Fixes: 9b412cc35f00 ("net/mlx5e: Add LAG warning if bond slave is not lag master") Signed-off-by: Mark Zhang Reviewed-by: Leon Romanovsky Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index f3d45ef082cde..83a05371e2aa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -564,7 +564,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) struct mlx5_core_dev *tmp_dev; int i, err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) return; tmp_dev = mlx5_get_next_phys_dev(dev); @@ -582,12 +584,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0) return; - for (i = 0; i < MLX5_MAX_PORTS; i++) { - tmp_dev = ldev->pf[i].dev; - if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) || - MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS) + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (!ldev->pf[i].dev) break; - } if (i >= MLX5_MAX_PORTS) ldev->flags |= MLX5_LAG_FLAG_READY; -- GitLab From 9c9be85f6b59d80efe4705109c0396df18d4e11d Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 24 Nov 2020 22:16:23 +0200 Subject: [PATCH 1546/1894] net/mlx5e: Add missing capability check for uplink follow Expose firmware indication that it supports setting eswitch uplink state to follow (follow the physical link). Condition setting the eswitch uplink admin-state with this capability bit. Older FW may not support the uplink state setting. Fixes: 7d0314b11cdd ("net/mlx5e: Modify uplink state on interface up/down") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7a79d330c0751..6a852b4901aa0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3161,7 +3161,8 @@ static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, mlx5_set_port_admin_status(mdev, state); - if (mlx5_eswitch_mode(mdev) != MLX5_ESWITCH_LEGACY) + if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_GEN(mdev, uplink_follow)) return; if (state == MLX5_PORT_UP) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8fbddec26eb8c..442c0160caab5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1280,7 +1280,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ece_support[0x1]; u8 reserved_at_a4[0x7]; u8 log_max_srq[0x5]; - u8 reserved_at_b0[0x2]; + u8 reserved_at_b0[0x1]; + u8 uplink_follow[0x1]; u8 ts_cqe_to_dest_cqn[0x1]; u8 reserved_at_b3[0xd]; -- GitLab From 0f2dcade69f2af56b74bce432e48ff3957830ce2 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 14 Dec 2020 03:38:40 +0200 Subject: [PATCH 1547/1894] net/mlx5: Use port_num 1 instead of 0 when delete a RoCE address In multi-port mode, FW reports syndrome 0x2ea48 (invalid vhca_port_number) if the port_num is not 1 or 2. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 0fc7de4aa572f..8e0dddc6383f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -116,7 +116,7 @@ free: static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) { mlx5_core_roce_gid_set(dev, 0, 0, 0, - NULL, NULL, false, 0, 0); + NULL, NULL, false, 0, 1); } static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid) -- GitLab From eed38eeee734756596e2cc163bdc7dac3be501b1 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Mon, 7 Dec 2020 08:15:18 +0000 Subject: [PATCH 1548/1894] net/mlx5e: CT: Use per flow counter when CT flow accounting is enabled Connection counters may be shared for both directions when the counter is used for connection aging purposes. However, if TC flow accounting is enabled then a unique counter is required per direction. Instantiate a unique counter per direction if the conntrack accounting extension is enabled. Use a shared counter when the connection accounting extension is disabled. Fixes: 1edae2335adf ("net/mlx5e: CT: Use the same counter for both directions") Signed-off-by: Oz Shlomo Reported-by: Marcelo Ricardo Leitner Reviewed-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 77 ++++++++++++------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index e521254d886ef..072363e73f1ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -118,16 +118,17 @@ struct mlx5_ct_tuple { u16 zone; }; -struct mlx5_ct_shared_counter { +struct mlx5_ct_counter { struct mlx5_fc *counter; refcount_t refcount; + bool is_shared; }; struct mlx5_ct_entry { struct rhash_head node; struct rhash_head tuple_node; struct rhash_head tuple_nat_node; - struct mlx5_ct_shared_counter *shared_counter; + struct mlx5_ct_counter *counter; unsigned long cookie; unsigned long restore_cookie; struct mlx5_ct_tuple tuple; @@ -394,13 +395,14 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, } static void -mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) +mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) { - if (!refcount_dec_and_test(&entry->shared_counter->refcount)) + if (entry->counter->is_shared && + !refcount_dec_and_test(&entry->counter->refcount)) return; - mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter); - kfree(entry->shared_counter); + mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); + kfree(entry->counter); } static void @@ -699,7 +701,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->dest_ft = ct_priv->post_ct; attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; - attr->counter = entry->shared_counter->counter; + attr->counter = entry->counter->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); @@ -732,13 +734,34 @@ err_attr: return err; } -static struct mlx5_ct_shared_counter * +static struct mlx5_ct_counter * +mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_ct_counter *counter; + int ret; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + counter->is_shared = false; + counter->counter = mlx5_fc_create(ct_priv->dev, true); + if (IS_ERR(counter->counter)) { + ct_dbg("Failed to create counter for ct entry"); + ret = PTR_ERR(counter->counter); + kfree(counter); + return ERR_PTR(ret); + } + + return counter; +} + +static struct mlx5_ct_counter * mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) { struct mlx5_ct_tuple rev_tuple = entry->tuple; - struct mlx5_ct_shared_counter *shared_counter; - struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_ct_counter *shared_counter; struct mlx5_ct_entry *rev_entry; __be16 tmp_port; int ret; @@ -767,25 +790,20 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, tuples_ht_params); if (rev_entry) { - if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) { + if (refcount_inc_not_zero(&rev_entry->counter->refcount)) { mutex_unlock(&ct_priv->shared_counter_lock); - return rev_entry->shared_counter; + return rev_entry->counter; } } mutex_unlock(&ct_priv->shared_counter_lock); - shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL); - if (!shared_counter) - return ERR_PTR(-ENOMEM); - - shared_counter->counter = mlx5_fc_create(dev, true); - if (IS_ERR(shared_counter->counter)) { - ct_dbg("Failed to create counter for ct entry"); - ret = PTR_ERR(shared_counter->counter); - kfree(shared_counter); + shared_counter = mlx5_tc_ct_counter_create(ct_priv); + if (IS_ERR(shared_counter)) { + ret = PTR_ERR(shared_counter); return ERR_PTR(ret); } + shared_counter->is_shared = true; refcount_set(&shared_counter->refcount, 1); return shared_counter; } @@ -798,10 +816,13 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, { int err; - entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); - if (IS_ERR(entry->shared_counter)) { - err = PTR_ERR(entry->shared_counter); - ct_dbg("Failed to create counter for ct entry"); + if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) + entry->counter = mlx5_tc_ct_counter_create(ct_priv); + else + entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); + + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); return err; } @@ -820,7 +841,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, err_nat: mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); err_orig: - mlx5_tc_ct_shared_counter_put(ct_priv, entry); + mlx5_tc_ct_counter_put(ct_priv, entry); return err; } @@ -918,7 +939,7 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, tuples_ht_params); mutex_unlock(&ct_priv->shared_counter_lock); - mlx5_tc_ct_shared_counter_put(ct_priv, entry); + mlx5_tc_ct_counter_put(ct_priv, entry); } @@ -956,7 +977,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, if (!entry) return -ENOENT; - mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse); + mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); -- GitLab From b544011f0e58ce43c40105468d6dc67f980a0c7a Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 13 Nov 2020 06:06:28 +0200 Subject: [PATCH 1549/1894] net/mlx5e: Fix SWP offsets when vlan inserted by driver In case WQE includes inline header the vlan is inserted by driver even if vlan offload is set. On geneve over vlan interface where software parser is used the SWP offsets should be updated according to the added vlan. Fixes: e3cfc7e6b7bd ("net/mlx5e: TX, Add geneve tunnel stateless offload support") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 9 +++++++++ .../net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 8 +++++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 9 +++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7943eb30b837e..4880f21792730 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -371,6 +371,15 @@ struct mlx5e_swp_spec { u8 tun_l4_proto; }; +static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset += VLAN_HLEN / 2; + eseg->swp_outer_l4_offset += VLAN_HLEN / 2; + eseg->swp_inner_l3_offset += VLAN_HLEN / 2; + eseg->swp_inner_l4_offset += VLAN_HLEN / 2; +} + static inline void mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, struct mlx5e_swp_spec *swp_spec) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 899b98aca0d3f..1fae7fab8297e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -51,7 +51,7 @@ static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) } static inline void -mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs) { struct mlx5e_swp_spec swp_spec = {}; unsigned int offset = 0; @@ -85,6 +85,8 @@ mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) } mlx5e_set_eseg_swp(skb, eseg, &swp_spec); + if (skb_vlan_tag_present(skb) && ihs) + mlx5e_eseg_swp_offsets_add_vlan(eseg); } #else @@ -163,7 +165,7 @@ static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5_wqe_eth_seg *eseg) + struct mlx5_wqe_eth_seg *eseg, u16 ihs) { #ifdef CONFIG_MLX5_EN_IPSEC if (xfrm_offload(skb)) @@ -172,7 +174,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, #if IS_ENABLED(CONFIG_GENEVE) if (skb->encapsulation) - mlx5e_tx_tunnel_accel(skb, eseg); + mlx5e_tx_tunnel_accel(skb, eseg, ihs); #endif return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index e47e2a0059d0a..61ed671fe741b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -682,9 +682,9 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, - struct mlx5_wqe_eth_seg *eseg) + struct mlx5_wqe_eth_seg *eseg, u16 ihs) { - if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg))) + if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg, ihs))) return false; mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); @@ -714,7 +714,8 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { struct mlx5_wqe_eth_seg eseg = {}; - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, + attr.ihs))) return NETDEV_TX_OK; mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); @@ -731,7 +732,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) /* May update the WQE, but may not post other WQEs. */ mlx5e_accel_tx_finish(sq, wqe, &accel, (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs))) return NETDEV_TX_OK; mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); -- GitLab From 25c904b59aaf4816337acd415514b0c47715f604 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Mon, 4 Jan 2021 12:54:40 +0200 Subject: [PATCH 1550/1894] net/mlx5: E-Switch, fix changing vf VLANID Adding vf VLANID for the first time, or after having cleared previously defined VLANID works fine, however, attempting to change an existing vf VLANID clears the rules on the firmware, but does not add new rules for the new vf VLANID. Fix this by changing the logic in function esw_acl_egress_lgcy_setup() so that it will always configure egress rules. Fixes: ea651a86d468 ("net/mlx5: E-Switch, Refactor eswitch egress acl codes") Signed-off-by: Alaa Hleihel Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 2b85d4777303a..3e19b1721303f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -95,22 +95,21 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, return 0; } - if (!IS_ERR_OR_NULL(vport->egress.acl)) - return 0; - - vport->egress.acl = esw_acl_table_create(esw, vport->vport, - MLX5_FLOW_NAMESPACE_ESW_EGRESS, - table_size); - if (IS_ERR(vport->egress.acl)) { - err = PTR_ERR(vport->egress.acl); - vport->egress.acl = NULL; - goto out; + if (!vport->egress.acl) { + vport->egress.acl = esw_acl_table_create(esw, vport->vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + table_size); + if (IS_ERR(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + goto out; + } + + err = esw_acl_egress_lgcy_groups_create(esw, vport); + if (err) + goto out; } - err = esw_acl_egress_lgcy_groups_create(esw, vport); - if (err) - goto out; - esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->info.vlan, vport->info.qos); -- GitLab From e13ed0ac064dd6ee964155ba9fdc2f3c3785934c Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 14 Dec 2020 13:53:03 +0200 Subject: [PATCH 1551/1894] net/mlx5e: In skb build skip setting mark in switchdev mode sop_drop_qpn field in the cqe is used by two features, in SWITCHDEV mode to restore the chain id in case of a miss and in LEGACY mode to support skbedit mark action. In build RX skb, the skb mark field is set regardless of the configured mode which cause a corruption of the mark field in case of switchdev mode. Fix by overriding the mark value back to 0 in the representor tc update skb flow. Fixes: 8f1e0b97cc70 ("net/mlx5: E-Switch, Mark miss packets with new chain id mapping") Signed-off-by: Maor Dickman Reviewed-by: Raed Salem Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index d29af7b9c695a..76177f7c5ec29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -626,6 +626,11 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, if (!reg_c0) return true; + /* If reg_c0 is not equal to the default flow tag then skb->mark + * is not supported and must be reset back to 0. + */ + skb->mark = 0; + priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; -- GitLab From b1c0aca3d3ddeebeec57ada9c2df9ed647939249 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 27 Dec 2020 16:33:19 +0200 Subject: [PATCH 1552/1894] net/mlx5e: ethtool, Fix restriction of autoneg with 56G Prior to this patch, configuring speed to 50G with autoneg off over devices supporting 50G per lane failed. Support for 50G per lane introduced a new set of link-modes, on which driver always performed a speed validation as if only legacy link-modes were configured. Fix driver speed validation to force setting autoneg over 56G only if in legacy link-mode. Fixes: 3d7cadae51f1 ("net/mlx5e: ethtool, Fix analysis of speed setting") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d9076d543104f..2d37742a888c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1010,6 +1010,22 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); } +static int mlx5e_speed_validate(struct net_device *netdev, bool ext, + const unsigned long link_modes, u8 autoneg) +{ + /* Extended link-mode has no speed limitations. */ + if (ext) + return 0; + + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + return -EINVAL; + } + return 0; +} + static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; @@ -1103,13 +1119,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext); - if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && - autoneg != AUTONEG_ENABLE) { - netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", - __func__); - err = -EINVAL; + err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); + if (err) goto out; - } link_modes = link_modes & eproto.cap; if (!link_modes) { -- GitLab From 4d8be21112f6fa2ac4b8a13f35866ad65b11d48c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 4 Jan 2021 10:08:36 +0200 Subject: [PATCH 1553/1894] net/mlx5: Release devlink object if adev fails Add missed freeing previously allocated devlink object. Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c08315b51fd3a..ca6f2fc39ea0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1368,8 +1368,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) MLX5_COREDEV_VF : MLX5_COREDEV_PF; dev->priv.adev_idx = mlx5_adev_idx_alloc(); - if (dev->priv.adev_idx < 0) - return dev->priv.adev_idx; + if (dev->priv.adev_idx < 0) { + err = dev->priv.adev_idx; + goto adev_init_err; + } err = mlx5_mdev_init(dev, prof_sel); if (err) @@ -1403,6 +1405,7 @@ pci_init_err: mlx5_mdev_uninit(dev); mdev_init_err: mlx5_adev_idx_free(dev->priv.adev_idx); +adev_init_err: mlx5_devlink_free(devlink); return err; -- GitLab From 7a6eb072a9548492ead086f3e820e9aac71c7138 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 28 Dec 2020 16:48:40 +0800 Subject: [PATCH 1554/1894] net/mlx5e: Fix two double free cases mlx5e_create_ttc_table_groups() frees ft->g on failure of kvzalloc(), but such failure will be caught by its caller in mlx5e_create_ttc_table() and ft->g will be freed again in mlx5e_destroy_flow_table(). The same issue also occurs in mlx5e_create_ttc_table_groups(). Set ft->g to NULL after kfree() to avoid double free. Fixes: 7b3722fa9ef6 ("net/mlx5e: Support RSS for GRE tunneled packets") Fixes: 33cfaaa8f36f ("net/mlx5e: Split the main flow steering table") Signed-off-by: Dinghao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index fa8149f6eb088..44a2dfbc38534 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -942,6 +942,7 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } @@ -1087,6 +1088,7 @@ static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } -- GitLab From 5b0bb12c58ac7d22e05b5bfdaa30a116c8c32e32 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 21 Dec 2020 19:27:31 +0800 Subject: [PATCH 1555/1894] net/mlx5e: Fix memleak in mlx5e_create_l2_table_groups When mlx5_create_flow_group() fails, ft->g should be freed just like when kvzalloc() fails. The caller of mlx5e_create_l2_table_groups() does not catch this issue on failure, which leads to memleak. Fixes: 33cfaaa8f36f ("net/mlx5e: Split the main flow steering table") Signed-off-by: Dinghao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 44a2dfbc38534..e02e5895703d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1392,6 +1392,7 @@ err_destroy_groups: ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); kvfree(in); + kfree(ft->g); return err; } -- GitLab From f2bc3af6353cb2a33dfa9d270d999d839eef54cb Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 29 Dec 2020 18:46:53 -0800 Subject: [PATCH 1556/1894] RDMA/ocrdma: Fix use after free in ocrdma_dealloc_ucontext_pd() In ocrdma_dealloc_ucontext_pd() uctx->cntxt_pd is assigned to the variable pd and then after uctx->cntxt_pd is freed, the variable pd is passed to function _ocrdma_dealloc_pd() which dereferences pd directly or through its call to ocrdma_mbx_dealloc_pd(). Reorder the free using the variable pd. Cc: stable@vger.kernel.org Fixes: 21a428a019c9 ("RDMA: Handle PD allocations by IB/core") Link: https://lore.kernel.org/r/20201230024653.1516495-1-trix@redhat.com Signed-off-by: Tom Rix Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bc98bd950d99f..3acb5c10b1553 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } - kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; _ocrdma_dealloc_pd(dev, pd); + kfree(pd); } static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) -- GitLab From f3562f5e00bbae2a6b292941ec76a9140aa3b7dd Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 6 Jan 2021 17:17:35 +0100 Subject: [PATCH 1557/1894] docs: octeontx2: tune rst markup Commit 80b9414832a1 ("docs: octeontx2: Add Documentation for NPA health reporters") added new documentation with improper formatting for rst, and caused a few new warnings for make htmldocs in octeontx2.rst:169--202. Tune markup and formatting for better presentation in the HTML view. Signed-off-by: Lukas Bulwahn Acked-by: Randy Dunlap Acked-by: George Cherian Link: https://lore.kernel.org/r/20210106161735.21751-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2.rst | 62 +++++++++++-------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index d3fcf536d14e1..61e850460e18f 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -164,46 +164,56 @@ Devlink health reporters NPA Reporters ------------- -The NPA reporters are responsible for reporting and recovering the following group of errors +The NPA reporters are responsible for reporting and recovering the following group of errors: + 1. GENERAL events + - Error due to operation of unmapped PF. - Error due to disabled alloc/free for other HW blocks (NIX, SSO, TIM, DPI and AURA). + 2. ERROR events + - Fault due to NPA_AQ_INST_S read or NPA_AQ_RES_S write. - AQ Doorbell Error. + 3. RAS events + - RAS Error Reporting for NPA_AQ_INST_S/NPA_AQ_RES_S. + 4. RVU events + - Error due to unmapped slot. -Sample Output -------------- -~# devlink health -pci/0002:01:00.0: - reporter hw_npa_intr - state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_gen - state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_err - state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_ras - state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true +Sample Output:: + + ~# devlink health + pci/0002:01:00.0: + reporter hw_npa_intr + state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_gen + state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_err + state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_ras + state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true Each reporter dumps the + - Error Type - Error Register value - Reason in words -For eg: -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen - NPA_AF_GENERAL: - NPA General Interrupt Reg : 1 - NIX0: free disabled RX -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr - NPA_AF_RVU: - NPA RVU Interrupt Reg : 1 - Unmap Slot Error -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err - NPA_AF_ERR: - NPA Error Interrupt Reg : 4096 - AQ Doorbell Error +For example:: + + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen + NPA_AF_GENERAL: + NPA General Interrupt Reg : 1 + NIX0: free disabled RX + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr + NPA_AF_RVU: + NPA RVU Interrupt Reg : 1 + Unmap Slot Error + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err + NPA_AF_ERR: + NPA Error Interrupt Reg : 4096 + AQ Doorbell Error -- GitLab From 0ef597c3ac49a62e1a2c1c10f88dd76fde1e1636 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 5 Jan 2021 06:58:15 +0100 Subject: [PATCH 1558/1894] docs: remove mention of ENABLE_MUST_CHECK We removed ENABLE_MUST_CHECK in 196793946264 ("Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK"), so let's remove docs' mentions. At the same time, fix the outdated text related to ENABLE_WARN_DEPRECATED that wasn't removed in 3337d5cfe5e08 ("configs: get rid of obsolete CONFIG_ENABLE_WARN_DEPRECATED"). Finally, reflow the paragraph. Signed-off-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20210105055815.GA5173@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/process/4.Coding.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst index c27e59d2f7029..0825dc496f22d 100644 --- a/Documentation/process/4.Coding.rst +++ b/Documentation/process/4.Coding.rst @@ -249,10 +249,8 @@ features; most of these are found in the "kernel hacking" submenu. Several of these options should be turned on for any kernel used for development or testing purposes. In particular, you should turn on: - - ENABLE_MUST_CHECK and FRAME_WARN to get an - extra set of warnings for problems like the use of deprecated interfaces - or ignoring an important return value from a function. The output - generated by these warnings can be verbose, but one need not worry about + - FRAME_WARN to get warnings for stack frames larger than a given amount. + The output generated can be verbose, but one need not worry about warnings from other parts of the kernel. - DEBUG_OBJECTS will add code to track the lifetime of various objects -- GitLab From a734a7235ef3768dd3c9b7034f663ae6b260375f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 1 Jan 2021 22:14:47 +0100 Subject: [PATCH 1559/1894] docs: binfmt-misc: Fix .rst formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "name below" is not part of the /proc path and should not be formatted in monospace. "doesn``t" is rendered in HTML with a double backtick. Revert it back to "doesn't". Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20210101211447.1021412-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/binfmt-misc.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst index 7a864131e5ea7..59cd902e35497 100644 --- a/Documentation/admin-guide/binfmt-misc.rst +++ b/Documentation/admin-guide/binfmt-misc.rst @@ -23,7 +23,7 @@ Here is what the fields mean: - ``name`` is an identifier string. A new /proc file will be created with this - ``name below /proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for + name below ``/proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for obvious reasons. - ``type`` is the type of recognition. Give ``M`` for magic and ``E`` for extension. @@ -83,7 +83,7 @@ Here is what the fields mean: ``F`` - fix binary The usual behaviour of binfmt_misc is to spawn the binary lazily when the misc format file is invoked. However, - this doesn``t work very well in the face of mount namespaces and + this doesn't work very well in the face of mount namespaces and changeroots, so the ``F`` mode opens the binary as soon as the emulation is installed and uses the opened image to spawn the emulator, meaning it is always available once installed, -- GitLab From 25942e5ecbac33918ec2f0869ca9a374dbb023f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 31 Dec 2020 20:08:31 -0800 Subject: [PATCH 1560/1894] Documentation/admin-guide: kernel-parameters: hyphenate comma-separated Hyphenate "comma separated" when it is used as a compound adjective. hyphenated. Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20210101040831.4148-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c722ec19cd004..9e3cdb271d06c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1385,7 +1385,7 @@ ftrace_filter=[function-list] [FTRACE] Limit the functions traced by the function - tracer at boot up. function-list is a comma separated + tracer at boot up. function-list is a comma-separated list of functions. This list can be changed at run time by the set_ftrace_filter file in the debugfs tracing directory. @@ -1399,13 +1399,13 @@ ftrace_graph_filter=[function-list] [FTRACE] Limit the top level callers functions traced by the function graph tracer at boot up. - function-list is a comma separated list of functions + function-list is a comma-separated list of functions that can be changed at run time by the set_graph_function file in the debugfs tracing directory. ftrace_graph_notrace=[function-list] [FTRACE] Do not trace from the functions specified in - function-list. This list is a comma separated list of + function-list. This list is a comma-separated list of functions that can be changed at run time by the set_graph_notrace file in the debugfs tracing directory. @@ -2421,7 +2421,7 @@ when set. Format: - libata.force= [LIBATA] Force configurations. The format is comma + libata.force= [LIBATA] Force configurations. The format is comma- separated list of "[ID:]VAL" where ID is PORT[.DEVICE]. PORT and DEVICE are decimal numbers matching port, link or device. Basically, it matches @@ -5145,7 +5145,7 @@ stacktrace_filter=[function-list] [FTRACE] Limit the functions that the stack tracer - will trace at boot up. function-list is a comma separated + will trace at boot up. function-list is a comma-separated list of functions. This list can be changed at run time by the stack_trace_filter file in the debugfs tracing directory. Note, this enables stack tracing @@ -5348,7 +5348,7 @@ trace_event=[event-list] [FTRACE] Set and start specified trace events in order to facilitate early boot debugging. The event-list is a - comma separated list of trace events to enable. See + comma-separated list of trace events to enable. See also Documentation/trace/events.rst trace_options=[option-list] -- GitLab From 9d54ee78aef62c29b15ae2f58a70b1d1cd63a8f0 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Thu, 7 Jan 2021 18:26:10 +0530 Subject: [PATCH 1561/1894] docs: admin-guide: bootconfig: Fix feils to fails s/feils/fails/p Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210107125610.1576368-1-unixbhaskar@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/bootconfig.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index 9b90efcc3a35e..452b7dcd7f6be 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -154,7 +154,7 @@ get the boot configuration data. Because of this "piggyback" method, there is no need to change or update the boot loader and the kernel image itself as long as the boot loader passes the correct initrd file size. If by any chance, the boot -loader passes a longer size, the kernel feils to find the bootconfig data. +loader passes a longer size, the kernel fails to find the bootconfig data. To do this operation, Linux kernel provides "bootconfig" command under tools/bootconfig, which allows admin to apply or delete the config file -- GitLab From bb12433bf56e76789c6b08b36c546f745a6aa6e1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 Jan 2021 12:34:36 -0800 Subject: [PATCH 1562/1894] ARC: unbork 5.11 bootup: fix snafu in _TIF_NOTIFY_SIGNAL handling Linux 5.11.rcX was failing to boot on ARC HSDK board. Turns out we have a couple of issues, this being the first one, and I'm to blame as I didn't pay attention during review. TIF_NOTIFY_SIGNAL support requires checking multiple TIF_* bits in kernel return code path. Old code only needed to check a single bit so BBIT0 worked. New code needs to check multiple bits so AND instruction. So needs to use bit mask variant _TIF_SIGPENDING Cc: Jens Axboe Fixes: 53855e12588743ea128 ("arc: add support for TIF_NOTIFY_SIGNAL") Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/34 Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1f5308abf36d6..1743506081da6 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -307,7 +307,7 @@ resume_user_mode_begin: mov r0, sp ; pt_regs for arg to do_signal()/do_notify_resume() GET_CURR_THR_INFO_FLAGS r9 - and.f 0, r9, TIF_SIGPENDING|TIF_NOTIFY_SIGNAL + and.f 0, r9, _TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL bz .Lchk_notify_resume ; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs -- GitLab From 9e7a67dee27902fedab880b9af909bd4acd0fba9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jan 2021 00:15:21 +0100 Subject: [PATCH 1563/1894] selftests: netfilter: add selftest for ipip pmtu discovery with enabled connection tracking Convert Christians bug description into a reproducer. Cc: Shuah Khan Reported-by: Christian Perle Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- tools/testing/selftests/netfilter/Makefile | 3 +- .../selftests/netfilter/ipip-conntrack-mtu.sh | 206 ++++++++++++++++++ 2 files changed, 208 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a374e10ef5065..3006a8e5b41a1 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,8 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh + nft_queue.sh nft_meta.sh \ + ipip-conntrack-mtu.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh new file mode 100755 index 0000000000000..4a6f5c3b32155 --- /dev/null +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rnd=$(mktemp -u XXXXXXXX) +rx=$(mktemp) + +r_a="ns-ra-$rnd" +r_b="ns-rb-$rnd" +r_w="ns-rw-$rnd" +c_a="ns-ca-$rnd" +c_b="ns-cb-$rnd" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "iptables --version" "run test without iptables" +checktool "ip -Version" "run test without ip tool" +checktool "which nc" "run test without nc (netcat)" +checktool "ip netns add ${r_a}" "create net namespace" + +for n in ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns add ${n} +done + +cleanup() { + for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns del ${n} + done + rm -f ${rx} +} + +trap cleanup EXIT + +test_path() { + msg="$1" + + ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + + sleep 1 + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + done + + wait + + bytes=$(wc -c < ${rx}) + + if [ $bytes -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} +ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_a} link set $dev up +done + +ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 +ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 + +ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 +ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} +ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_b} link set $dev up +done + +ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 +ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 + +ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 +ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 +ip -net ${c_a} link set dev lo up +ip -net ${c_a} link set dev veth0 up +ip -net ${c_a} route add default via 192.168.10.1 + +# Client A +ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 +ip -net ${c_b} link set dev veth0 up +ip -net ${c_b} link set dev lo up +ip -net ${c_b} route add default via 192.168.20.1 + +# Wan +ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 +ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 + +ip -net ${r_w} link set dev lo up +ip -net ${r_w} link set dev veth0 up mtu 1400 +ip -net ${r_w} link set dev veth1 up mtu 1400 + +ip -net ${r_a} link set dev veth0 mtu 1400 +ip -net ${r_b} link set dev veth0 mtu 1400 + +ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" -- GitLab From 50c661670f6a3908c273503dfa206dfc7aa54c07 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jan 2021 00:15:22 +0100 Subject: [PATCH 1564/1894] net: fix pmtu check in nopmtudisc mode For some reason ip_tunnel insist on setting the DF bit anyway when the inner header has the DF bit set, EVEN if the tunnel was configured with 'nopmtudisc'. This means that the script added in the previous commit cannot be made to work by adding the 'nopmtudisc' flag to the ip tunnel configuration. Doing so breaks connectivity even for the without-conntrack/netfilter scenario. When nopmtudisc is set, the tunnel will skip the mtu check, so no icmp error is sent to client. Then, because inner header has DF set, the outer header gets added with DF bit set as well. IP stack then sends an error to itself because the packet exceeds the device MTU. Fixes: 23a3647bc4f93 ("ip_tunnels: Use skb-len to PMTU check.") Cc: Stefano Brivio Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ee65c9225178d..64594aa755f05 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -759,8 +759,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph, - 0, 0, false)) { + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) + df |= (inner_iph->frag_off & htons(IP_DF)); + + if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { ip_rt_put(rt); goto tx_error; } @@ -788,10 +791,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - df = tnl_params->frag_off; - if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) - df |= (inner_iph->frag_off&htons(IP_DF)); - max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); if (max_headroom > dev->needed_headroom) -- GitLab From bb4cc1a18856a73f0ff5137df0c2a31f4c50f6cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jan 2021 00:15:23 +0100 Subject: [PATCH 1565/1894] net: ip: always refragment ip defragmented packets Conntrack reassembly records the largest fragment size seen in IPCB. However, when this gets forwarded/transmitted, fragmentation will only be forced if one of the fragmented packets had the DF bit set. In that case, a flag in IPCB will force fragmentation even if the MTU is large enough. This should work fine, but this breaks with ip tunnels. Consider client that sends a UDP datagram of size X to another host. The client fragments the datagram, so two packets, of size y and z, are sent. DF bit is not set on any of these packets. Middlebox netfilter reassembles those packets back to single size-X packet, before routing decision. packet-size-vs-mtu checks in ip_forward are irrelevant, because DF bit isn't set. At output time, ip refragmentation is skipped as well because x is still smaller than the mtu of the output device. If ttransmit device is an ip tunnel, the packet size increases to x+overhead. Also, tunnel might be configured to force DF bit on outer header. In this case, packet will be dropped (exceeds MTU) and an ICMP error is generated back to sender. But sender already respects the announced MTU, all the packets that it sent did fit the announced mtu. Force refragmentation as per original sizes unconditionally so ip tunnel will encapsulate the fragments instead. The only other solution I see is to place ip refragmentation in the ip_tunnel code to handle this case. Fixes: d6b915e29f4ad ("ip_fragment: don't forward defragmented DF packet") Reported-by: Christian Perle Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 89fff5f59eea4..2ed0b01f72f01 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -302,7 +302,7 @@ static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff * if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); - if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) + if (skb->len > mtu || IPCB(skb)->frag_max_size) return ip_fragment(net, sk, skb, mtu, ip_finish_output2); return ip_finish_output2(net, sk, skb); -- GitLab From 2aa078932ff6c66bf10cc5b3144440dbfa7d813d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:36 -0800 Subject: [PATCH 1566/1894] KVM: x86/mmu: Use -1 to flag an undefined spte in get_mmio_spte() Return -1 from the get_walk() helpers if the shadow walk doesn't fill at least one spte, which can theoretically happen if the walk hits a not-present PDPTR. Returning the root level in such a case will cause get_mmio_spte() to return garbage (uninitialized stack data). In practice, such a scenario should be impossible as KVM shouldn't get a reserved-bit page fault with a not-present PDPTR. Note, using mmu->root_level in get_walk() is wrong for other reasons, too, but that's now a moot point. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Cc: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 7 ++++++- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7a6ae9e90bd70..a48cd12c01d75 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3488,7 +3488,7 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct kvm_shadow_walk_iterator iterator; - int leaf = vcpu->arch.mmu->root_level; + int leaf = -1; u64 spte; @@ -3532,6 +3532,11 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) else leaf = get_walk(vcpu, addr, sptes); + if (unlikely(leaf < 0)) { + *sptep = 0ull; + return reserved; + } + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 84c8f06bec261..50cec7a15ddb0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1152,8 +1152,8 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; - int leaf = vcpu->arch.mmu->shadow_root_level; gfn_t gfn = addr >> PAGE_SHIFT; + int leaf = -1; tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; -- GitLab From 39b4d43e6003cee51cd119596d3c33d0449eb44c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:37 -0800 Subject: [PATCH 1567/1894] KVM: x86/mmu: Get root level from walkers when retrieving MMIO SPTE Get the so called "root" level from the low level shadow page table walkers instead of manually attempting to calculate it higher up the stack, e.g. in get_mmio_spte(). When KVM is using PAE shadow paging, the starting level of the walk, from the callers perspective, is not the CR3 root but rather the PDPTR "root". Checking for reserved bits from the CR3 root causes get_mmio_spte() to consume uninitialized stack data due to indexing into sptes[] for a level that was not filled by get_walk(). This can result in false positives and/or negatives depending on what garbage happens to be on the stack. Opportunistically nuke a few extra newlines. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Reported-by: Richard Herbert Cc: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 ++++++--------- arch/x86/kvm/mmu/tdp_mmu.c | 5 ++++- arch/x86/kvm/mmu/tdp_mmu.h | 4 +++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a48cd12c01d75..52f36c8790862 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { struct kvm_shadow_walk_iterator iterator; int leaf = -1; u64 spte; - walk_shadow_page_lockless_begin(vcpu); - for (shadow_walk_init(&iterator, vcpu, addr); + for (shadow_walk_init(&iterator, vcpu, addr), + *root_level = iterator.level; shadow_walk_okay(&iterator); __shadow_walk_next(&iterator, spte)) { leaf = iterator.level; @@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) if (!is_shadow_present_pte(spte)) break; - } walk_shadow_page_lockless_end(vcpu); @@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->shadow_root_level; - int leaf; - int level; + int root, leaf, level; bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) { @@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) } if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) - leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes); + leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else - leaf = get_walk(vcpu, addr, sptes); + leaf = get_walk(vcpu, addr, sptes, &root); if (unlikely(leaf < 0)) { *sptep = 0ull; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 50cec7a15ddb0..a4f9447f83276 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; gfn_t gfn = addr >> PAGE_SHIFT; int leaf = -1; + *root_level = vcpu->arch.mmu->shadow_root_level; + tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; sptes[leaf - 1] = iter.old_spte; diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 556e065503f69..cbbdbadd1526f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes); +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level); + #endif /* __KVM_X86_MMU_TDP_MMU_H */ -- GitLab From dde81f9477d018a96fba991c5928c6ab8cc109f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:38 -0800 Subject: [PATCH 1568/1894] KVM: x86/mmu: Use raw level to index into MMIO walks' sptes array Bump the size of the sptes array by one and use the raw level of the SPTE to index into the sptes array. Using the SPTE level directly improves readability by eliminating the need to reason out why the level is being adjusted when indexing the array. The array is on the stack and is not explicitly initialized; bumping its size is nothing more than a superficial adjustment to the stack frame. Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-4-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 +++++++-------- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 52f36c8790862..4798a4472066d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3500,7 +3500,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level leaf = iterator.level; spte = mmu_spte_get_lockless(iterator.sptep); - sptes[leaf - 1] = spte; + sptes[leaf] = spte; if (!is_shadow_present_pte(spte)) break; @@ -3514,7 +3514,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level /* return true if reserved bit is detected on spte. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { - u64 sptes[PT64_ROOT_MAX_LEVEL]; + u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; struct rsvd_bits_validate *rsvd_check; int root, leaf, level; bool reserved = false; @@ -3537,16 +3537,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level - 1])) + if (!is_shadow_present_pte(sptes[level])) break; /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid * adding a Jcc in the loop. */ - reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) | - __is_rsvd_bits_set(rsvd_check, sptes[level - 1], - level); + reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | + __is_rsvd_bits_set(rsvd_check, sptes[level], level); } if (reserved) { @@ -3554,10 +3553,10 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) __func__, addr); for (level = root; level >= leaf; level--) pr_err("------ spte 0x%llx level %d.\n", - sptes[level - 1], level); + sptes[level], level); } - *sptep = sptes[leaf - 1]; + *sptep = sptes[leaf]; return reserved; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a4f9447f83276..efef571806ad6 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1160,7 +1160,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; - sptes[leaf - 1] = iter.old_spte; + sptes[leaf] = iter.old_spte; } return leaf; -- GitLab From 9aa418792f5f11ef5d6f72265e1f8ae07efd5784 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:39 -0800 Subject: [PATCH 1569/1894] KVM: x86/mmu: Optimize not-present/MMIO SPTE check in get_mmio_spte() Check only the terminal leaf for a "!PRESENT || MMIO" SPTE when looking for reserved bits on valid, non-MMIO SPTEs. The get_walk() helpers terminate their walks if a not-present or MMIO SPTE is encountered, i.e. the non-terminal SPTEs have already been verified to be regular SPTEs. This eliminates an extra check-and-branch in a relatively hot loop. Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4798a4472066d..769855f5f0a12 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3511,7 +3511,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level return leaf; } -/* return true if reserved bit is detected on spte. */ +/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; @@ -3534,11 +3534,20 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) return reserved; } + *sptep = sptes[leaf]; + + /* + * Skip reserved bits checks on the terminal leaf if it's not a valid + * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by + * design, always have reserved bits set. The purpose of the checks is + * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs. + */ + if (!is_shadow_present_pte(sptes[leaf])) + leaf++; + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; - for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level])) - break; + for (level = root; level >= leaf; level--) /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid @@ -3546,7 +3555,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) */ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | __is_rsvd_bits_set(rsvd_check, sptes[level], level); - } if (reserved) { pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", @@ -3556,8 +3564,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) sptes[level], level); } - *sptep = sptes[leaf]; - return reserved; } -- GitLab From f65cf84ee769767536dc367acc9568ddb6e4c9f4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 18 Dec 2020 23:37:11 -0700 Subject: [PATCH 1570/1894] KVM: SVM: Add register operand to vmsave call in sev_es_vcpu_load When using LLVM's integrated assembler (LLVM_IAS=1) while building x86_64_defconfig + CONFIG_KVM=y + CONFIG_KVM_AMD=y, the following build error occurs: $ make LLVM=1 LLVM_IAS=1 arch/x86/kvm/svm/sev.o arch/x86/kvm/svm/sev.c:2004:15: error: too few operands for instruction asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); ^ arch/x86/kvm/svm/sev.c:28:17: note: expanded from macro '__ex' #define __ex(x) __kvm_handle_fault_on_reboot(x) ^ ./arch/x86/include/asm/kvm_host.h:1646:10: note: expanded from macro '__kvm_handle_fault_on_reboot' "666: \n\t" \ ^ :2:2: note: instantiated into assembly here vmsave ^ 1 error generated. This happens because LLVM currently does not support calling vmsave without the fixed register operand (%rax for 64-bit and %eax for 32-bit). This will be fixed in LLVM 12 but the kernel currently supports LLVM 10.0.1 and newer so this needs to be handled. Add the proper register using the _ASM_AX macro, which matches the vmsave call in vmenter.S. Fixes: 861377730aa9 ("KVM: SVM: Provide support for SEV-ES vCPU loading") Link: https://reviews.llvm.org/D93524 Link: https://github.com/ClangBuiltLinux/linux/issues/1216 Signed-off-by: Nathan Chancellor Message-Id: <20201219063711.3526947-1-natechancellor@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9858d5ae9dddd..563ced07b0b85 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2001,7 +2001,7 @@ void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu) * of which one step is to perform a VMLOAD. Since hardware does not * perform a VMSAVE on VMRUN, the host savearea must be updated. */ - asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); + asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); /* * Certain MSRs are restored on VMEXIT, only save ones that aren't -- GitLab From 52782d5b63725a6c4bf642557c83507430064110 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 20 Dec 2020 21:03:39 +0100 Subject: [PATCH 1571/1894] KVM/SVM: Remove leftover __svm_vcpu_run prototype from svm.c Commit 16809ecdc1e8a moved __svm_vcpu_run the prototype to svm.h, but forgot to remove the original from svm.c. Fixes: 16809ecdc1e8a ("KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests") Cc: Tom Lendacky Cc: Paolo Bonzini Signed-off-by: Uros Bizjak Message-Id: <20201220200339.65115-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cce0143a6f801..6824d611dc5dd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3677,8 +3677,6 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } -void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); - static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, struct vcpu_svm *svm) { -- GitLab From e42ac777d661e878c3b9bac56df11e226cab3010 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:32 +0100 Subject: [PATCH 1572/1894] KVM: selftests: Factor out guest mode code demand_paging_test, dirty_log_test, and dirty_log_perf_test have redundant guest mode code. Factor it out. Also, while adding a new include, remove the ones we don't need. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-2-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/demand_paging_test.c | 107 ++++----------- .../selftests/kvm/dirty_log_perf_test.c | 121 +++++------------ tools/testing/selftests/kvm/dirty_log_test.c | 125 ++++++------------ .../selftests/kvm/include/guest_modes.h | 21 +++ tools/testing/selftests/kvm/lib/guest_modes.c | 70 ++++++++++ 6 files changed, 189 insertions(+), 257 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/guest_modes.h create mode 100644 tools/testing/selftests/kvm/lib/guest_modes.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c7ca4faba2721..a7286a08c3ae9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 3d96a7bfaff30..946161a9ce2d8 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,23 +7,20 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ +#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ #include #include -#include -#include -#include #include #include #include -#include -#include #include +#include -#include "perf_test_util.h" -#include "processor.h" +#include "kvm_util.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" #ifdef __NR_userfaultfd @@ -248,9 +245,14 @@ static int setup_demand_paging(struct kvm_vm *vm, return 0; } -static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay) +struct test_params { + bool use_uffd; + useconds_t uffd_delay; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); - if (use_uffd) { + if (p->use_uffd) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); @@ -308,7 +310,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, r = setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], - uffd_delay, &uffd_args[vcpu_id], + p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); @@ -339,7 +341,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("All vCPU threads joined\n"); - if (use_uffd) { + if (p->use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ @@ -362,38 +364,19 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, free(guest_data_prototype); free(vcpu_threads); - if (use_uffd) { + if (p->use_uffd) { free(uffd_handler_threads); free(uffd_args); free(pipefds); } } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" " [-b memory] [-v vcpus]\n", name); - printf(" -m: specify the guest mode ID to test\n" - " (default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -u: use User Fault FD to handle vCPU page\n" " faults.\n"); printf(" -d: add a delay in usec to the User Fault\n" @@ -410,53 +393,22 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - bool mode_selected = false; - unsigned int mode; - int opt, i; - bool use_uffd = false; - useconds_t uffd_delay = 0; - -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + struct test_params p = {}; + int opt; + + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { switch (opt) { case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'u': - use_uffd = true; + p.use_uffd = true; break; case 'd': - uffd_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(uffd_delay >= 0, - "A negative UFFD delay is not supported."); + p.uffd_delay = strtoul(optarg, NULL, 0); + TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); break; case 'b': guest_percpu_mem_size = parse_size(optarg); @@ -473,14 +425,7 @@ int main(int argc, char *argv[]) } } - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9c6a7be31e033..506741eb5d7f3 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -12,16 +12,14 @@ #include #include -#include #include #include #include -#include #include "kvm_util.h" -#include "perf_test_util.h" -#include "processor.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -89,9 +87,15 @@ static void *vcpu_worker(void *data) return NULL; } -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - uint64_t phys_offset, int wr_fract) +struct test_params { + unsigned long iterations; + uint64_t phys_offset; + int wr_fract; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long *bmap; @@ -108,7 +112,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); - perf_test_args.wr_fract = wr_fract; + perf_test_args.wr_fract = p->wr_fract; guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -156,7 +160,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); - while (iteration < iterations) { + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs * dirtying memory again. @@ -210,15 +214,15 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - avg = timespec_div(get_dirty_log_total, iterations); + avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, get_dirty_log_total.tv_sec, + p->iterations, get_dirty_log_total.tv_sec, get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); if (dirty_log_manual_caps) { - avg = timespec_div(clear_dirty_log_total, iterations); + avg = timespec_div(clear_dirty_log_total, p->iterations); pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, clear_dirty_log_total.tv_sec, + p->iterations, clear_dirty_log_total.tv_sec, clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); } @@ -228,20 +232,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] " "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); @@ -250,14 +242,7 @@ static void help(char *name) TEST_HOST_LOOP_N); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -272,74 +257,43 @@ static void help(char *name) int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i; - int wr_fract = 1; + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .wr_fract = 1, + }; + int opt; dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { switch (opt) { case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'f': - wr_fract = atoi(optarg); - TEST_ASSERT(wr_fract >= 1, + p.wr_fract = atoi(optarg); + TEST_ASSERT(p.wr_fract >= 1, "Write fraction cannot be less than one"); break; case 'v': nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(nr_vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -348,18 +302,11 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); - pr_info("Test iterations: %"PRIu64"\n", iterations); + pr_info("Test iterations: %"PRIu64"\n", p.iterations); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, iterations, phys_offset, wr_fract); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 471baecb7772a..bb2752d78fe3a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -9,8 +9,6 @@ #include #include -#include -#include #include #include #include @@ -20,8 +18,9 @@ #include #include -#include "test_util.h" #include "kvm_util.h" +#include "test_util.h" +#include "guest_modes.h" #include "processor.h" #define VCPU_ID 1 @@ -673,9 +672,15 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - unsigned long interval, uint64_t phys_offset) +struct test_params { + unsigned long iterations; + unsigned long interval; + uint64_t phys_offset; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; struct kvm_vm *vm; unsigned long *bmap; @@ -709,12 +714,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_page_size = getpagesize(); host_num_pages = vm_num_host_pages(mode, guest_num_pages); - if (!phys_offset) { + if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { - guest_test_phys_mem = phys_offset; + guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ @@ -758,9 +763,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - while (iteration < iterations) { + while (iteration < p->iterations) { /* Give the vcpu thread some time to dirty some pages */ - usleep(interval * 1000); + usleep(p->interval * 1000); log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); @@ -783,20 +788,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); @@ -813,51 +806,23 @@ static void help(char *name) printf(" -M: specify the host logging mode " "(default: run all log modes). Supported modes: \n\t"); log_modes_dump(); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); puts(""); exit(0); } int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - unsigned long interval = TEST_HOST_LOOP_INTERVAL; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i, j; + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .interval = TEST_HOST_LOOP_INTERVAL, + }; + int opt, i; sem_init(&dirty_ring_vcpu_stop, 0, 0); sem_init(&dirty_ring_vcpu_cont, 0, 0); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { @@ -865,24 +830,16 @@ int main(int argc, char *argv[]) test_dirty_ring_count = strtol(optarg, NULL, 10); break; case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'I': - interval = strtol(optarg, NULL, 10); + p.interval = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'M': if (!strcmp(optarg, "all")) { @@ -911,32 +868,24 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); - TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); + p.iterations, p.interval); srandom(time(0)); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - if (host_log_mode_option == LOG_MODE_ALL) { - /* Run each log mode */ - for (j = 0; j < LOG_MODE_NUM; j++) { - pr_info("Testing Log Mode '%s'\n", - log_modes[j].name); - host_log_mode = j; - run_test(i, iterations, interval, phys_offset); - } - } else { - host_log_mode = host_log_mode_option; - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (i = 0; i < LOG_MODE_NUM; i++) { + pr_info("Testing Log Mode '%s'\n", log_modes[i].name); + host_log_mode = i; + for_each_guest_mode(run_test, &p); } + } else { + host_log_mode = host_log_mode_option; + for_each_guest_mode(run_test, &p); } return 0; diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h new file mode 100644 index 0000000000000..b691df33e64e1 --- /dev/null +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "kvm_util.h" + +struct guest_mode { + bool supported; + bool enabled; +}; + +extern struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_append(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +void guest_modes_append_default(void); +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg); +void guest_modes_help(void); +void guest_modes_cmdline(const char *arg); diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c new file mode 100644 index 0000000000000..25bff307c71f2 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "guest_modes.h" + +struct guest_mode guest_modes[NUM_VM_MODES]; + +void guest_modes_append_default(void) +{ + guest_mode_append(VM_MODE_DEFAULT, true, true); + +#ifdef __aarch64__ + guest_mode_append(VM_MODE_P40V48_64K, true, true); + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + if (limit >= 52) + guest_mode_append(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_64K, true, true); + } + } +#endif +} + +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) +{ + int i; + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + func(i, arg); + } +} + +void guest_modes_help(void) +{ + int i; + + printf(" -m: specify the guest mode ID to test\n" + " (default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } +} + +void guest_modes_cmdline(const char *arg) +{ + static bool mode_selected; + unsigned int mode; + int i; + + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; +} -- GitLab From 1133e17ea7c9929ff7b90e81d8926f9e870748e9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:33 +0100 Subject: [PATCH 1573/1894] KVM: selftests: Use vm_create_with_vcpus in create_vm Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/demand_paging_test.c | 2 +- .../selftests/kvm/dirty_log_perf_test.c | 2 - .../testing/selftests/kvm/include/kvm_util.h | 8 ++++ .../selftests/kvm/include/perf_test_util.h | 47 +++++-------------- tools/testing/selftests/kvm/lib/kvm_util.c | 9 +--- 5 files changed, 21 insertions(+), 47 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 946161a9ce2d8..b0c41de32e9bb 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,7 +7,7 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ +#define _GNU_SOURCE /* for pipe2 */ #include #include diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 506741eb5d7f3..36bea75a8d6fa 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -8,8 +8,6 @@ * Copyright (C) 2020, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include #include #include diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index dfa9d369e8fc6..149766ecd68be 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -70,6 +70,14 @@ enum vm_guest_mode { #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 239421e4f6b81..cd4c258f458d3 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -13,9 +13,6 @@ #define MAX_VCPUS 512 -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - #define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ @@ -94,41 +91,26 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes) { struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; uint64_t guest_num_pages; - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - - perf_test_args.vm = vm; - perf_test_args.guest_page_size = vm_get_page_size(vm); perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, "Guest memory size is not guest page size aligned."); - guest_num_pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; /* * If there should be more memory in the guest test region than there @@ -140,18 +122,13 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * perf_test_args.guest_page_size; guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); - #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem &= ~((1 << 20) - 1); #endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); /* Add an extra memory slot for testing */ @@ -177,8 +154,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - vm_vcpu_add_default(vm, vcpu_id, guest_code); - vcpu_args->vcpu_id = vcpu_id; vcpu_args->gva = guest_test_virt_mem + (vcpu_id * vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 88ef7067f1e66..fa5a90e6c6f07 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -153,14 +153,7 @@ const char * const vm_guest_mode_string[] = { _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; - -static const struct vm_guest_mode_params vm_guest_mode_params[] = { +const struct vm_guest_mode_params vm_guest_mode_params[] = { { 52, 48, 0x1000, 12 }, { 52, 48, 0x10000, 16 }, { 48, 48, 0x1000, 12 }, -- GitLab From b268b6f0bd36322358accb15c45683a9e1220231 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:34 +0100 Subject: [PATCH 1574/1894] KVM: selftests: Implement perf_test_util more conventionally It's not conventional C to put non-inline functions in header files. Create a source file for the functions instead. Also reduce the amount of globals and rename the functions to something less generic. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-4-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/demand_paging_test.c | 11 +- .../selftests/kvm/dirty_log_perf_test.c | 22 +-- .../testing/selftests/kvm/include/kvm_util.h | 1 + .../selftests/kvm/include/perf_test_util.h | 142 ++---------------- .../selftests/kvm/lib/perf_test_util.c | 134 +++++++++++++++++ 6 files changed, 166 insertions(+), 146 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/perf_test_util.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a7286a08c3ae9..fe41c6a0fa67d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index b0c41de32e9bb..cdad1eca72f74 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -36,12 +36,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static char *guest_data_prototype; static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -263,7 +265,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int vcpu_id; int r; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); if (p->use_uffd) { uffd_handler_threads = @@ -359,8 +361,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); free(guest_data_prototype); free(vcpu_threads); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 36bea75a8d6fa..2283a0ec74a97 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -22,11 +22,14 @@ /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + /* Host variables */ static u64 dirty_log_manual_caps; static bool host_quit; static uint64_t iteration; -static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; +static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS]; static void *vcpu_worker(void *data) { @@ -38,7 +41,7 @@ static void *vcpu_worker(void *data) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); @@ -108,7 +111,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = p->wr_fract; @@ -126,7 +129,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); sync_global_to_guest(vm, perf_test_args); @@ -152,7 +155,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Enable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, KVM_MEM_LOG_DIRTY_PAGES); ts_diff = timespec_diff_now(start); pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", @@ -179,7 +182,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration, ts_diff.tv_sec, ts_diff.tv_nsec); clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap); ts_diff = timespec_diff_now(start); get_dirty_log_total = timespec_add(get_dirty_log_total, @@ -189,7 +192,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (dirty_log_manual_caps) { clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0, host_num_pages); ts_diff = timespec_diff_now(start); @@ -207,7 +210,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0); ts_diff = timespec_diff_now(start); pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); @@ -226,8 +229,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(vcpu_threads); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 149766ecd68be..5cbb861525edf 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_MAX_VCPUS 512 /* * Callers of kvm_util only have an incomplete/opaque description of the diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index cd4c258f458d3..b1188823c31b7 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -9,35 +9,15 @@ #define SELFTEST_KVM_PERF_TEST_UTIL_H #include "kvm_util.h" -#include "processor.h" - -#define MAX_VCPUS 512 - -#define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 #define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; - -/* Number of VCPUs for the test */ -static int nr_vcpus = 1; +#define PERF_TEST_MEM_SLOT_INDEX 1 -struct vcpu_args { +struct perf_test_vcpu_args { uint64_t gva; uint64_t pages; @@ -51,119 +31,21 @@ struct perf_test_args { uint64_t guest_page_size; int wr_fract; - struct vcpu_args vcpu_args[MAX_VCPUS]; + struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; -static struct perf_test_args perf_test_args; +extern struct perf_test_args perf_test_args; /* - * Continuously write to the first 8 bytes of each page in the - * specified region. + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. */ -static void guest_code(uint32_t vcpu_id) -{ - struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); - - gva = vcpu_args->gva; - pages = vcpu_args->pages; - - while (true) { - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); - - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); - } - - GUEST_SYNC(1); - } -} - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t guest_num_pages; - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - perf_test_args.host_page_size = getpagesize(); - perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; - - guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); - - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - vm = vm_create_with_vcpus(mode, vcpus, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, - 0, guest_code, NULL); - - perf_test_args.vm = vm; - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - return vm; -} - -static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) -{ - vm_paddr_t vcpu_gpa; - struct vcpu_args *vcpu_args; - int vcpu_id; - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - - vcpu_args->vcpu_id = vcpu_id; - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; +extern uint64_t guest_test_phys_mem; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - } -} +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes); +void perf_test_destroy_vm(struct kvm_vm *vm); +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c new file mode 100644 index 0000000000000..9be1944c2d1c9 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + */ + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" + +struct perf_test_args perf_test_args; + +uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t guest_num_pages; + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + PERF_TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +void perf_test_destroy_vm(struct kvm_vm *vm) +{ + ucall_uninit(vm); + kvm_vm_free(vm); +} + +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} -- GitLab From 2f80d502d627f30257ba7e3655e71c373b7d1a5a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 22 Dec 2020 05:20:43 -0500 Subject: [PATCH 1575/1894] KVM: x86: fix shift out of bounds reported by UBSAN Since we know that e >= s, we can reassociate the left shift, changing the shifted number from 1 to 2 in exchange for decreasing the right hand side by 1. Reported-by: syzbot+e87846c48bf72bc85311@syzkaller.appspotmail.com Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 9c4a9c8e43d90..581925e476d6c 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -49,7 +49,7 @@ static inline u64 rsvd_bits(int s, int e) if (e < s) return 0; - return ((1ULL << (e - s + 1)) - 1) << s; + return ((2ULL << (e - s)) - 1) << s; } void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask); -- GitLab From 7f0c1f1a8277de906a242a6ef907476149f006de Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Jan 2021 10:29:16 -0800 Subject: [PATCH 1576/1894] MAINTAINERS: Really update email address for Sean Christopherson Use my @google.com address in MAINTAINERS, somehow only the .mailmap entry was added when the original update patch was applied. Fixes: c2b1209d852f ("MAINTAINERS: Update email address for Sean Christopherson") Cc: kvm@vger.kernel.org Reported-by: Nathan Chancellor Signed-off-by: Sean Christopherson Message-Id: <20210106182916.331743-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 281de213ef478..9d8b77332ae38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9672,7 +9672,7 @@ F: tools/testing/selftests/kvm/s390x/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) M: Paolo Bonzini -R: Sean Christopherson +R: Sean Christopherson R: Vitaly Kuznetsov R: Wanpeng Li R: Jim Mattson -- GitLab From de7860c8a388e4cb757c7da26889b9e2641ffcfe Mon Sep 17 00:00:00 2001 From: Stephen Zhang Date: Fri, 18 Dec 2020 15:51:37 +0800 Subject: [PATCH 1577/1894] KVM: x86: change in pv_eoi_get_pending() to make code more readable Signed-off-by: Stephen Zhang Message-Id: <1608277897-1932-1-git-send-email-stephenzhangzsd@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3136e05831cf3..78823227c5929 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -674,7 +674,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) (unsigned long long)vcpu->arch.pv_eoi.msr_val); return false; } - return val & 0x1; + return val & KVM_PV_EOI_ENABLED; } static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) -- GitLab From 88bf56d04bc3564542049ec4ec168a8b60d0b48c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 17 Dec 2020 23:41:18 +0800 Subject: [PATCH 1578/1894] kvm: check tlbs_dirty directly In kvm_mmu_notifier_invalidate_range_start(), tlbs_dirty is used as: need_tlb_flush |= kvm->tlbs_dirty; with need_tlb_flush's type being int and tlbs_dirty's type being long. It means that tlbs_dirty is always used as int and the higher 32 bits is useless. We need to check tlbs_dirty in a correct way and this change checks it directly without propagating it to need_tlb_flush. Note: it's _extremely_ unlikely this neglecting of higher 32 bits can cause problems in practice. It would require encountering tlbs_dirty on a 4 billion count boundary, and KVM would need to be using shadow paging or be running a nested guest. Cc: stable@vger.kernel.org Fixes: a4ee1ca4a36e ("KVM: MMU: delay flush all tlbs on sync_page path") Signed-off-by: Lai Jiangshan Message-Id: <20201217154118.16497-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3abcb2ce5b7db..19dae28904f71 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -485,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mmu_notifier_count++; need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end, range->flags); - need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->tlbs_dirty) kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); -- GitLab From a889ea54b3daa63ee1463dc19ed699407d61458b Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 6 Jan 2021 16:19:34 -0800 Subject: [PATCH 1579/1894] KVM: x86/mmu: Ensure TDP MMU roots are freed after yield Many TDP MMU functions which need to perform some action on all TDP MMU roots hold a reference on that root so that they can safely drop the MMU lock in order to yield to other threads. However, when releasing the reference on the root, there is a bug: the root will not be freed even if its reference count (root_count) is reduced to 0. To simplify acquiring and releasing references on TDP MMU root pages, and to ensure that these roots are properly freed, move the get/put operations into another TDP MMU root iterator macro. Moving the get/put operations into an iterator macro also helps simplify control flow when a root does need to be freed. Note that using the list_for_each_entry_safe macro would not have been appropriate in this situation because it could keep a pointer to the next root across an MMU lock release + reacquire, during which time that root could be freed. Reported-by: Maciej S. Szmigiero Suggested-by: Paolo Bonzini Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Fixes: 063afacd8730 ("kvm: x86/mmu: Support invalidate range MMU notifier for TDP MMU") Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU") Fixes: 14881998566d ("kvm: x86/mmu: Support disabling dirty logging for the tdp MMU") Signed-off-by: Ben Gardon Message-Id: <20210107001935.3732070-1-bgardon@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 104 +++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 56 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6574af2d09944..2ef8615f9dba8 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -44,7 +44,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); } -#define for_each_tdp_mmu_root(_kvm, _root) \ +static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) +{ + if (kvm_mmu_put_root(kvm, root)) + kvm_tdp_mmu_free_root(kvm, root); +} + +static inline bool tdp_mmu_next_root_valid(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + lockdep_assert_held(&kvm->mmu_lock); + + if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link)) + return false; + + kvm_mmu_get_root(kvm, root); + return true; + +} + +static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + struct kvm_mmu_page *next_root; + + next_root = list_next_entry(root, link); + tdp_mmu_put_root(kvm, root); + return next_root; +} + +/* + * Note: this iterator gets and puts references to the roots it iterates over. + * This makes it safe to release the MMU lock and yield within the loop, but + * if exiting the loop early, the caller must drop the reference to the most + * recent root. (Unless keeping a live reference is desirable.) + */ +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ + for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \ + typeof(*_root), link); \ + tdp_mmu_next_root_valid(_kvm, _root); \ + _root = tdp_mmu_next_root(_kvm, _root)) + +#define for_each_tdp_mmu_root(_kvm, _root) \ list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) @@ -447,18 +488,9 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) struct kvm_mmu_page *root; bool flush = false; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) flush |= zap_gfn_range(kvm, root, start, end, true); - kvm_mmu_put_root(kvm, root); - } - return flush; } @@ -619,13 +651,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, int ret = 0; int as_id; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) { as_id = kvm_mmu_page_as_id(root); slots = __kvm_memslots(kvm, as_id); kvm_for_each_memslot(memslot, slots) { @@ -647,8 +673,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, ret |= handler(kvm, memslot, root, gfn_start, gfn_end, data); } - - kvm_mmu_put_root(kvm, root); } return ret; @@ -838,21 +862,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -906,21 +922,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -1029,21 +1037,13 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; } @@ -1089,21 +1089,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, struct kvm_mmu_page *root; int root_as_id; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - zap_collapsible_spte_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } } -- GitLab From c0dba6e46825716db15c4b3a8f05c85b4a59edda Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 6 Jan 2021 16:19:35 -0800 Subject: [PATCH 1580/1894] KVM: x86/mmu: Clarify TDP MMU page list invariants The tdp_mmu_roots and tdp_mmu_pages in struct kvm_arch should only contain pages with tdp_mmu_page set to true. tdp_mmu_pages should not contain any pages with a non-zero root_count and tdp_mmu_roots should only contain pages with a positive root_count, unless a thread holds the MMU lock and is in the process of modifying the list. Various functions expect these invariants to be maintained, but they are not explictily documented. Add to the comments on both fields to document the above invariants. Signed-off-by: Ben Gardon Message-Id: <20210107001935.3732070-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3ab7b46087b74..afed3da3b3a09 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1010,9 +1010,21 @@ struct kvm_arch { */ bool tdp_mmu_enabled; - /* List of struct tdp_mmu_pages being used as roots */ + /* + * List of struct kvmp_mmu_pages being used as roots. + * All struct kvm_mmu_pages in the list should have + * tdp_mmu_page set. + * All struct kvm_mmu_pages in the list should have a positive + * root_count except when a thread holds the MMU lock and is removing + * an entry from the list. + */ struct list_head tdp_mmu_roots; - /* List of struct tdp_mmu_pages not being used as roots */ + + /* + * List of struct kvmp_mmu_pages not being used as roots. + * All struct kvm_mmu_pages in the list should have + * tdp_mmu_page set and a root_count of 0. + */ struct list_head tdp_mmu_pages; }; -- GitLab From 81f76adad560dfc39cb9625cf1e00a7e2b7b88df Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:52 +0200 Subject: [PATCH 1581/1894] KVM: nSVM: correctly restore nested_run_pending on migration The code to store it on the migration exists, but no code was restoring it. One of the side effects of fixing this is that L1->L2 injected events are no longer lost when migration happens with nested run pending. Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b0b667456b2e7..a466336aab43a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1194,6 +1194,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, * in the registers, the save area of the nested state instead * contains saved L1 state. */ + + svm->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); hsave->save = *save; -- GitLab From 56fe28de8c4f0167275c411c0daa5709e9a47bd7 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:54 +0200 Subject: [PATCH 1582/1894] KVM: nSVM: mark vmcb as dirty when forcingly leaving the guest mode We overwrite most of vmcb fields while doing so, so we must mark it as dirty. Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a466336aab43a..a622e63739b4a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -754,6 +754,7 @@ void svm_leave_nested(struct vcpu_svm *svm) leave_guest_mode(&svm->vcpu); copy_vmcb_control_area(&vmcb->control, &hsave->control); nested_svm_uninit_mmu_context(&svm->vcpu); + vmcb_mark_all_dirty(svm->vmcb); } kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); -- GitLab From f2c7ef3ba9556d62a7e2bb23b563c6510007d55c Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:51 +0200 Subject: [PATCH 1583/1894] KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit It is possible to exit the nested guest mode, entered by svm_set_nested_state prior to first vm entry to it (e.g due to pending event) if the nested run was not pending during the migration. In this case we must not switch to the nested msr permission bitmap. Also add a warning to catch similar cases in the future. Fixes: a7d5c7ce41ac1 ("KVM: nSVM: delay MSR permission processing to first nested VM run") Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ arch/x86/kvm/vmx/nested.c | 2 ++ arch/x86/kvm/x86.c | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a622e63739b4a..cb4c6ee10029c 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -199,6 +199,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (!nested_svm_vmrun_msrpm(svm)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = @@ -595,6 +596,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->nested.vmcb12_gpa = 0; WARN_ON_ONCE(svm->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); + /* in case we halted in L2 */ svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e2f26564a12de..0fbb46990dfce 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4442,6 +4442,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + /* Service the TLB flush request for L2 before switching to L1. */ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) kvm_vcpu_flush_tlb_current(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e28ab76b80dc9..f6e7b25c40e23 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8789,7 +8789,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { - if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { + if (WARN_ON_ONCE(!is_guest_mode(vcpu))) + ; + else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; goto out; } -- GitLab From 647daca25d24fb6eadc7b6cd680ad3e6eed0f3d5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 4 Jan 2021 14:20:01 -0600 Subject: [PATCH 1584/1894] KVM: SVM: Add support for booting APs in an SEV-ES guest Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence, where the guest vCPU register state is updated and then the vCPU is VMRUN to begin execution of the AP. For an SEV-ES guest, this won't work because the guest register state is encrypted. Following the GHCB specification, the hypervisor must not alter the guest register state, so KVM must track an AP/vCPU boot. Should the guest want to park the AP, it must use the AP Reset Hold exit event in place of, for example, a HLT loop. First AP boot (first INIT-SIPI-SIPI sequence): Execute the AP (vCPU) as it was initialized and measured by the SEV-ES support. It is up to the guest to transfer control of the AP to the proper location. Subsequent AP boot: KVM will expect to receive an AP Reset Hold exit event indicating that the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to awaken it. When the AP Reset Hold exit event is received, KVM will place the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI sequence, KVM will make the vCPU runnable. It is again up to the guest to then transfer control of the AP to the proper location. To differentiate between an actual HLT and an AP Reset Hold, a new MP state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is placed in upon receiving the AP Reset Hold exit event. Additionally, to communicate the AP Reset Hold exit event up to userspace (if needed), a new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD. A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds a new function that, for non SEV-ES guests, invokes the original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests, implements the logic above. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/svm/sev.c | 22 ++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 10 ++++++++++ arch/x86/kvm/svm/svm.h | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 26 +++++++++++++++++++++----- include/uapi/linux/kvm.h | 2 ++ 8 files changed, 63 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index afed3da3b3a09..3d6616f6f6ef8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1299,6 +1299,8 @@ struct kvm_x86_ops { void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); + + void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); }; struct kvm_x86_nested_ops { @@ -1480,6 +1482,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_vcpu_halt(struct kvm_vcpu *vcpu); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 78823227c5929..43cceadd073ed 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; - kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 563ced07b0b85..c8ffdbc81709e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1563,6 +1563,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; break; case SVM_VMGEXIT_NMI_COMPLETE: + case SVM_VMGEXIT_AP_HLT_LOOP: case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; @@ -1888,6 +1889,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_NMI_COMPLETE: ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); break; + case SVM_VMGEXIT_AP_HLT_LOOP: + ret = kvm_emulate_ap_reset_hold(&svm->vcpu); + break; case SVM_VMGEXIT_AP_JUMP_TABLE: { struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; @@ -2040,3 +2044,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm) wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); } } + +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + /* First SIPI: Use the values as initially set by the VMM */ + if (!svm->received_first_sipi) { + svm->received_first_sipi = true; + return; + } + + /* + * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where + * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a + * non-zero value. + */ + ghcb_set_sw_exit_info_2(svm->ghcb, 1); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6824d611dc5dd..7ef171790d02b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4382,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT)); } +static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) +{ + if (!sev_es_guest(vcpu->kvm)) + return kvm_vcpu_deliver_sipi_vector(vcpu, vector); + + sev_vcpu_deliver_sipi_vector(vcpu, vector); +} + static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); @@ -4524,6 +4532,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .msr_filter_changed = svm_msr_filter_changed, .complete_emulated_msr = svm_complete_emulated_msr, + + .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5431e6335e2e8..0fe874ae54982 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -185,6 +185,7 @@ struct vcpu_svm { struct vmcb_save_area *vmsa; struct ghcb *ghcb; struct kvm_host_map ghcb_map; + bool received_first_sipi; /* SEV-ES scratch area support */ void *ghcb_sa; @@ -591,6 +592,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); void sev_es_vcpu_put(struct vcpu_svm *svm); +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); /* vmenter.S */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 75c9c6a0a3a45..2af05d3b05909 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7707,6 +7707,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .msr_filter_changed = vmx_msr_filter_changed, .complete_emulated_msr = kvm_complete_insn_gp, .cpu_dirty_log_size = vmx_cpu_dirty_log_size, + + .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, }; static __init int hardware_setup(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6e7b25c40e23..0287840b93e09 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7976,17 +7976,22 @@ void kvm_arch_exit(void) kmem_cache_destroy(x86_fpu_cache); } -int kvm_vcpu_halt(struct kvm_vcpu *vcpu) +int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) { ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { - vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + vcpu->arch.mp_state = state; return 1; } else { - vcpu->run->exit_reason = KVM_EXIT_HLT; + vcpu->run->exit_reason = reason; return 0; } } + +int kvm_vcpu_halt(struct kvm_vcpu *vcpu) +{ + return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); +} EXPORT_SYMBOL_GPL(kvm_vcpu_halt); int kvm_emulate_halt(struct kvm_vcpu *vcpu) @@ -8000,6 +8005,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + + return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); + #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, unsigned long clock_type) @@ -9096,6 +9109,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: + case KVM_MP_STATE_AP_RESET_HOLD: vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -9522,8 +9536,9 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); - if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && - vcpu->arch.pv.pv_unhalted) + if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED || + vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) && + vcpu->arch.pv.pv_unhalted) mp_state->mp_state = KVM_MP_STATE_RUNNABLE; else mp_state->mp_state = vcpu->arch.mp_state; @@ -10154,6 +10169,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); kvm_rip_write(vcpu, 0); } +EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); int kvm_arch_hardware_enable(void) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 886802b8ffba3..374c67875cdbd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; -- GitLab From 717df0f4cdc9044c415431a3522b3e9ccca5b4a3 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:06 +0530 Subject: [PATCH 1585/1894] chtls: Fix hardware tid leak send_abort_rpl() is not calculating cpl_abort_req_rss offset and ends up sending wrong TID with abort_rpl WR causng tid leaks. Replaced send_abort_rpl() with chtls_send_abort_rpl() as it is redundant. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls_cm.c | 39 ++----------------- 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index a0e0d8a83681b..561b5f2273afa 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1997,39 +1997,6 @@ static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev, spin_unlock_bh(&cdev->deferq.lock); } -static void send_abort_rpl(struct sock *sk, struct sk_buff *skb, - struct chtls_dev *cdev, int status, int queue) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct sk_buff *reply_skb; - struct chtls_sock *csk; - - csk = rcu_dereference_sk_user_data(sk); - - reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl), - GFP_KERNEL); - - if (!reply_skb) { - req->status = (queue << 1); - t4_defer_reply(skb, cdev, send_defer_abort_rpl); - return; - } - - set_abort_rpl_wr(reply_skb, GET_TID(req), status); - kfree_skb(skb); - - set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue); - if (csk_conn_inline(csk)) { - struct l2t_entry *e = csk->l2t_entry; - - if (e && sk->sk_state != TCP_SYN_RECV) { - cxgb4_l2t_send(csk->egress_dev, reply_skb, e); - return; - } - } - cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); -} - static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb, struct chtls_dev *cdev, int status, int queue) @@ -2079,8 +2046,8 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb) skb->sk = NULL; do_abort_syn_rcv(child, lsk); - send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, - CPL_ABORT_NO_RST, queue); + chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, + CPL_ABORT_NO_RST, queue); } static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) @@ -2111,7 +2078,7 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) int queue = csk->txq_idx; do_abort_syn_rcv(sk, psk); - send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); + chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); } else { skb->sk = sk; BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv; -- GitLab From 827d329105bfde6701f0077e34a09c4a86e27145 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:07 +0530 Subject: [PATCH 1586/1894] chtls: Remove invalid set_tcb call At the time of SYN_RECV, connection information is not initialized at FW, updating tcb flag over uninitialized connection causes adapter crash. We don't need to update the flag during SYN_RECV state, so avoid this. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 561b5f2273afa..431d1e3844ab6 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -2096,9 +2096,6 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) int queue = csk->txq_idx; if (is_neg_adv(req->status)) { - if (sk->sk_state == TCP_SYN_RECV) - chtls_set_tcb_tflag(sk, 0, 0); - kfree_skb(skb); return; } -- GitLab From 5a5fac9966bb6d513198634b0b1357be7e8447d2 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:08 +0530 Subject: [PATCH 1587/1894] chtls: Fix panic when route to peer not configured If route to peer is not configured, we might get non tls devices from dst_neigh_lookup() which is invalid, adding a check to avoid it. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls_cm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 431d1e3844ab6..04a8bd5af3b9d 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1109,6 +1109,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, const struct cpl_pass_accept_req *req, struct chtls_dev *cdev) { + struct adapter *adap = pci_get_drvdata(cdev->pdev); struct neighbour *n = NULL; struct inet_sock *newinet; const struct iphdr *iph; @@ -1118,9 +1119,10 @@ static struct sock *chtls_recv_sock(struct sock *lsk, struct dst_entry *dst; struct tcp_sock *tp; struct sock *newsk; + bool found = false; u16 port_id; int rxq_idx; - int step; + int step, i; iph = (const struct iphdr *)network_hdr; newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb); @@ -1152,7 +1154,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, n = dst_neigh_lookup(dst, &ip6h->saddr); #endif } - if (!n) + if (!n || !n->dev) goto free_sk; ndev = n->dev; @@ -1161,6 +1163,13 @@ static struct sock *chtls_recv_sock(struct sock *lsk, if (is_vlan_dev(ndev)) ndev = vlan_dev_real_dev(ndev); + for_each_port(adap, i) + if (cdev->ports[i] == ndev) + found = true; + + if (!found) + goto free_dst; + port_id = cxgb4_port_idx(ndev); csk = chtls_sock_create(cdev); @@ -1238,6 +1247,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, free_csk: chtls_sock_release(&csk->kref); free_dst: + neigh_release(n); dst_release(dst); free_sk: inet_csk_prepare_forced_close(newsk); -- GitLab From f8d15d29d6e6b32704c8fce9229716ca145a0de2 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:09 +0530 Subject: [PATCH 1588/1894] chtls: Avoid unnecessary freeing of oreq pointer In chtls_pass_accept_request(), removing the chtls_reqsk_free() call to avoid oreq freeing twice. Here oreq is the pointer to struct request_sock. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 04a8bd5af3b9d..3022c802d09a5 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1397,7 +1397,7 @@ static void chtls_pass_accept_request(struct sock *sk, newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev); if (!newsk) - goto free_oreq; + goto reject; if (chtls_get_module(newsk)) goto reject; @@ -1413,8 +1413,6 @@ static void chtls_pass_accept_request(struct sock *sk, kfree_skb(skb); return; -free_oreq: - chtls_reqsk_free(oreq); reject: mk_tid_release(reply_skb, 0, tid); cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); -- GitLab From a84b2c0d5fa23da6d6c8c0d5f5c93184a2744d3e Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:10 +0530 Subject: [PATCH 1589/1894] chtls: Replace skb_dequeue with skb_peek The skb is unlinked twice, one in __skb_dequeue in function chtls_reset_synq() and another in cleanup_syn_rcv_conn(). So in this patch using skb_peek() instead of __skb_dequeue(), so that unlink will be handled only in cleanup_syn_rcv_conn(). Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 3022c802d09a5..ff3969a24d749 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -621,7 +621,7 @@ static void chtls_reset_synq(struct listen_ctx *listen_ctx) while (!skb_queue_empty(&listen_ctx->synq)) { struct chtls_sock *csk = - container_of((struct synq *)__skb_dequeue + container_of((struct synq *)skb_peek (&listen_ctx->synq), struct chtls_sock, synq); struct sock *child = csk->sk; -- GitLab From eade1e0a4fb31d48eeb1589d9bb859ae4dd6181d Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:11 +0530 Subject: [PATCH 1590/1894] chtls: Added a check to avoid NULL pointer dereference In case of server removal lookup_stid() may return NULL pointer, which is used as listen_ctx. So added a check before accessing this pointer. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index ff3969a24d749..1c6d3c93a0c82 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1597,6 +1597,11 @@ static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb) sk_wake_async(sk, 0, POLL_OUT); data = lookup_stid(cdev->tids, stid); + if (!data) { + /* listening server close */ + kfree_skb(skb); + goto unlock; + } lsk = ((struct listen_ctx *)data)->lsk; bh_lock_sock(lsk); -- GitLab From 15ef6b0e30b354253e2c10b3836bc59767eb162b Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:12 +0530 Subject: [PATCH 1591/1894] chtls: Fix chtls resources release sequence CPL_ABORT_RPL is sent after releasing the resources by calling chtls_release_resources(sk); and chtls_conn_done(sk); eventually causing kernel panic. Fixing it by calling release in appropriate order. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 1c6d3c93a0c82..51dd030b3b366 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -2058,9 +2058,9 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb) queue = csk->txq_idx; skb->sk = NULL; - do_abort_syn_rcv(child, lsk); chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, CPL_ABORT_NO_RST, queue); + do_abort_syn_rcv(child, lsk); } static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) @@ -2090,8 +2090,8 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) if (!sock_owned_by_user(psk)) { int queue = csk->txq_idx; - do_abort_syn_rcv(sk, psk); chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); + do_abort_syn_rcv(sk, psk); } else { skb->sk = sk; BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv; @@ -2135,12 +2135,12 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb)) return; - chtls_release_resources(sk); - chtls_conn_done(sk); } chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev, rst_status, queue); + chtls_release_resources(sk); + chtls_conn_done(sk); } static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb) -- GitLab From cf7b2ae4d70432fa94ebba3fbaab825481ae7189 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 21 Dec 2020 23:52:00 +0100 Subject: [PATCH 1592/1894] riscv: return -ENOSYS for syscall -1 Properly return -ENOSYS for syscall -1 instead of leaving the return value uninitialized. This fixes the strace teststuite. Fixes: 5340627e3fe0 ("riscv: add support for SECCOMP and SECCOMP_FILTER") Cc: stable@vger.kernel.org Signed-off-by: Andreas Schwab Reviewed-by: Tycho Andersen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 524d918f3601b..d07763001eb0c 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -186,14 +186,7 @@ check_syscall_nr: * Syscall number held in a7. * If syscall number is above allowed value, redirect to ni_syscall. */ - bge a7, t0, 1f - /* - * Check if syscall is rejected by tracer, i.e., a7 == -1. - * If yes, we pretend it was executed. - */ - li t1, -1 - beq a7, t1, ret_from_syscall_rejected - blt a7, t1, 1f + bgeu a7, t0, 1f /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR -- GitLab From 11f4c2e940e2f317c9d8fb5a79702f2a4a02ff98 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:34 +0900 Subject: [PATCH 1593/1894] riscv: Fix kernel time_init() If of_clk_init() is not called in time_init(), clock providers defined in the system device tree are not initialized, resulting in failures for other devices to initialize due to missing clocks. Similarly to other architectures and to the default kernel time_init() implementation, call of_clk_init() before executing timer_probe() in time_init(). Signed-off-by: Damien Le Moal Acked-by: Stephen Boyd Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/time.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 4d3a1048ad8b1..8a5cf99c07762 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -24,6 +25,8 @@ void __init time_init(void) riscv_timebase = prop; lpj_fine = riscv_timebase / HZ; + + of_clk_init(NULL); timer_probe(); } -- GitLab From 1f1496a923b6ba16679074fe77100e1b53cdb880 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:35 +0900 Subject: [PATCH 1594/1894] riscv: Fix sifive serial driver Setup the port uartclk in sifive_serial_probe() so that the base baud rate is correctly printed during device probe instead of always showing "0". I.e. the probe message is changed from 38000000.serial: ttySIF0 at MMIO 0x38000000 (irq = 1, base_baud = 0) is a SiFive UART v0 to the correct: 38000000.serial: ttySIF0 at MMIO 0x38000000 (irq = 1, base_baud = 115200) is a SiFive UART v0 Signed-off-by: Damien Le Moal Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- drivers/tty/serial/sifive.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 1066eebe3b28b..328d5a78792fe 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -1000,6 +1000,7 @@ static int sifive_serial_probe(struct platform_device *pdev) /* Set up clock divider */ ssp->clkin_rate = clk_get_rate(ssp->clk); ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE; + ssp->port.uartclk = ssp->baud_rate * 16; __ssp_update_div(ssp); platform_set_drvdata(pdev, ssp); -- GitLab From 643437b996bac9267785e0bd528332e2d5811067 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:36 +0900 Subject: [PATCH 1595/1894] riscv: Enable interrupts during syscalls with M-Mode When running is M-Mode (no MMU config), MPIE does not get set. This results in all syscalls being executed with interrupts disabled as handle_exception never sets SR_IE as it always sees SR_PIE being cleared. Fix this by always force enabling interrupts in handle_syscall when CONFIG_RISCV_M_MODE is enabled. Signed-off-by: Damien Le Moal Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d07763001eb0c..48de316c68c18 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -155,6 +155,15 @@ skip_context_tracking: tail do_trap_unknown handle_syscall: +#ifdef CONFIG_RISCV_M_MODE + /* + * When running is M-Mode (no MMU config), MPIE does not get set. + * As a result, we need to force enable interrupts here because + * handle_exception did not do set SR_IE as it always sees SR_PIE + * being cleared. + */ + csrs CSR_STATUS, SR_IE +#endif #if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) /* Recover a0 - a7 for system calls */ REG_L a0, PT_A0(sp) -- GitLab From ac7996d680d8b4a51bb99bbdcee3dc838b985498 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Jan 2021 12:39:16 +0000 Subject: [PATCH 1596/1894] octeontx2-af: fix memory leak of lmac and lmac->name Currently the error return paths don't kfree lmac and lmac->name leading to some memory leaks. Fix this by adding two error return paths that kfree these objects Addresses-Coverity: ("Resource leak") Fixes: 1463f382f58d ("octeontx2-af: Add support for CGX link management") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210107123916.189748-1-colin.king@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 7d0f962909437..1a8f5a039d502 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -871,8 +871,10 @@ static int cgx_lmac_init(struct cgx *cgx) if (!lmac) return -ENOMEM; lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL); - if (!lmac->name) - return -ENOMEM; + if (!lmac->name) { + err = -ENOMEM; + goto err_lmac_free; + } sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i); lmac->lmac_id = i; lmac->cgx = cgx; @@ -883,7 +885,7 @@ static int cgx_lmac_init(struct cgx *cgx) CGX_LMAC_FWI + i * 9), cgx_fwi_event_handler, 0, lmac->name, lmac); if (err) - return err; + goto err_irq; /* Enable interrupt */ cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S, @@ -895,6 +897,12 @@ static int cgx_lmac_init(struct cgx *cgx) } return cgx_lmac_verify_fwi_version(cgx); + +err_irq: + kfree(lmac->name); +err_lmac_free: + kfree(lmac); + return err; } static int cgx_lmac_exit(struct cgx *cgx) -- GitLab From 07e61a979ca4dddb3661f59328b3cd109f6b0070 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Jan 2021 16:48:21 +0200 Subject: [PATCH 1597/1894] nexthop: Fix off-by-one error in error path A reference was not taken for the current nexthop entry, so do not try to put it in the error path. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5e1b22d4f939f..f8035cfa9c206 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1459,7 +1459,7 @@ static struct nexthop *nexthop_create_group(struct net *net, return nh; out_no_nh: - for (; i >= 0; --i) + for (i--; i >= 0; --i) nexthop_put(nhg->nh_entries[i].nh); kfree(nhg->spare); -- GitLab From 7b01e53eee6dce7a8a6736e06b99b68cd0cc7a27 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Jan 2021 16:48:22 +0200 Subject: [PATCH 1598/1894] nexthop: Unlink nexthop group entry in error path In case of error, remove the nexthop group entry from the list to which it was previously added. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f8035cfa9c206..712cdc061cde9 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1459,8 +1459,10 @@ static struct nexthop *nexthop_create_group(struct net *net, return nh; out_no_nh: - for (i--; i >= 0; --i) + for (i--; i >= 0; --i) { + list_del(&nhg->nh_entries[i].nh_list); nexthop_put(nhg->nh_entries[i].nh); + } kfree(nhg->spare); kfree(nhg); -- GitLab From b19218b27f3477316d296e8bcf4446aaf017aa69 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 7 Jan 2021 16:48:23 +0200 Subject: [PATCH 1599/1894] nexthop: Bounce NHA_GATEWAY in FDB nexthop groups The function nh_check_attr_group() is called to validate nexthop groups. The intention of that code seems to have been to bounce all attributes above NHA_GROUP_TYPE except for NHA_FDB. However instead it bounces all these attributes except when NHA_FDB attribute is present--then it accepts them. NHA_FDB validation that takes place before, in rtm_to_nh_config(), already bounces NHA_OIF, NHA_BLACKHOLE, NHA_ENCAP and NHA_ENCAP_TYPE. Yet further back, NHA_GROUPS and NHA_MASTER are bounced unconditionally. But that still leaves NHA_GATEWAY as an attribute that would be accepted in FDB nexthop groups (with no meaning), so long as it keeps the address family as unspecified: # ip nexthop add id 1 fdb via 127.0.0.1 # ip nexthop add id 10 fdb via default group 1 The nexthop code is still relatively new and likely not used very broadly, and the FDB bits are newer still. Even though there is a reproducer out there, it relies on an improbable gateway arguments "via default", "via all" or "via any". Given all this, I believe it is OK to reformulate the condition to do the right thing and bounce NHA_GATEWAY. Fixes: 38428d68719c ("nexthop: support for fdb ecmp nexthops") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 712cdc061cde9..e53e43aef7854 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -627,7 +627,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - if (tb[NHA_FDB]) + if (i == NHA_FDB) continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); -- GitLab From a5c9ca76a1c61fb5e4c35de8eb25aa925b03c9e4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Jan 2021 16:48:24 +0200 Subject: [PATCH 1600/1894] selftests: fib_nexthops: Fix wrong mausezahn invocation For IPv6 traffic, mausezahn needs to be invoked with '-6'. Otherwise an error is returned: # ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" Failed to set source IPv4 address. Please check if source is set to a valid IPv4 address. Invalid command line parameters! Fixes: 7c741868ceab ("selftests: Add torture tests to nexthop tests") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_nexthops.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index eb693a3b7b4a1..4c7d33618437c 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -869,7 +869,7 @@ ipv6_torture() pid3=$! ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 -- GitLab From 0b9902c1fcc59ba75268386c0420a554f8844168 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 7 Jan 2021 18:24:40 +0100 Subject: [PATCH 1601/1894] s390/qeth: fix deadlock during recovery When qeth_dev_layer2_store() - holding the discipline_mutex - waits inside qeth_l*_remove_device() for a qeth_do_reset() thread to complete, we can hit a deadlock if qeth_do_reset() concurrently calls qeth_set_online() and thus tries to aquire the discipline_mutex. Move the discipline_mutex locking outside of qeth_set_online() and qeth_set_offline(), and turn the discipline into a parameter so that callers understand the dependency. To fix the deadlock, we can now relax the locking: As already established, qeth_l*_remove_device() waits for qeth_do_reset() to complete. So qeth_do_reset() itself is under no risk of having card->discipline ripped out while it's running, and thus doesn't need to take the discipline_mutex. Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core.h | 3 ++- drivers/s390/net/qeth_core_main.c | 35 +++++++++++++++++++------------ drivers/s390/net/qeth_l2_main.c | 7 +++++-- drivers/s390/net/qeth_l3_main.c | 7 +++++-- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 6f5ddc3eab8c5..28f637042d444 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1079,7 +1079,8 @@ struct qeth_card *qeth_get_card_by_busid(char *bus_id); void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask); int qeth_threads_running(struct qeth_card *, unsigned long); -int qeth_set_offline(struct qeth_card *card, bool resetting); +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting); int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, int (*reply_cb) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index f4b60294a9695..d45e223fc5218 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5507,12 +5507,12 @@ out: return rc; } -static int qeth_set_online(struct qeth_card *card) +static int qeth_set_online(struct qeth_card *card, + const struct qeth_discipline *disc) { bool carrier_ok; int rc; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 2, "setonlin"); @@ -5529,7 +5529,7 @@ static int qeth_set_online(struct qeth_card *card) /* no need for locking / error handling at this early stage: */ qeth_set_real_num_tx_queues(card, qeth_tx_actual_queues(card)); - rc = card->discipline->set_online(card, carrier_ok); + rc = disc->set_online(card, carrier_ok); if (rc) goto err_online; @@ -5537,7 +5537,6 @@ static int qeth_set_online(struct qeth_card *card) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; err_online: @@ -5552,15 +5551,14 @@ err_hardsetup: qdio_free(CARD_DDEV(card)); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return rc; } -int qeth_set_offline(struct qeth_card *card, bool resetting) +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting) { int rc, rc2, rc3; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 3, "setoffl"); @@ -5581,7 +5579,7 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) cancel_work_sync(&card->rx_mode_work); - card->discipline->set_offline(card); + disc->set_offline(card); qeth_qdio_clear_card(card, 0); qeth_drain_output_queues(card); @@ -5602,16 +5600,19 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; } EXPORT_SYMBOL_GPL(qeth_set_offline); static int qeth_do_reset(void *data) { + const struct qeth_discipline *disc; struct qeth_card *card = data; int rc; + /* Lock-free, other users will block until we are done. */ + disc = card->discipline; + QETH_CARD_TEXT(card, 2, "recover1"); if (!qeth_do_run_thread(card, QETH_RECOVER_THREAD)) return 0; @@ -5619,8 +5620,8 @@ static int qeth_do_reset(void *data) dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_offline(card, true); - rc = qeth_set_online(card); + qeth_set_offline(card, disc, true); + rc = qeth_set_online(card, disc); if (!rc) { dev_info(&card->gdev->dev, "Device successfully recovered!\n"); @@ -6647,7 +6648,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) } } - rc = qeth_set_online(card); + mutex_lock(&card->discipline_mutex); + rc = qeth_set_online(card, card->discipline); + mutex_unlock(&card->discipline_mutex); + err: return rc; } @@ -6655,8 +6659,13 @@ err: static int qeth_core_set_offline(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - return qeth_set_offline(card, false); + mutex_lock(&card->discipline_mutex); + rc = qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + + return rc; } static void qeth_core_shutdown(struct ccwgroup_device *gdev) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 4ed0fb0705a52..37279b1e29f6e 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2207,8 +2207,11 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (gdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + if (gdev->state == CCWGROUP_ONLINE) { + mutex_lock(&card->discipline_mutex); + qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d138ac432d012..8d474179ce982 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1970,8 +1970,11 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (cgdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + if (cgdev->state == CCWGROUP_ONLINE) { + mutex_lock(&card->discipline_mutex); + qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) -- GitLab From b41b554c1ee75070a14c02a88496b1f231c7eacc Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 7 Jan 2021 18:24:41 +0100 Subject: [PATCH 1602/1894] s390/qeth: fix locking for discipline setup / removal Due to insufficient locking, qeth_core_set_online() and qeth_dev_layer2_store() can run in parallel, both attempting to load & setup the discipline (and stepping on each other toes along the way). A similar race can also occur between qeth_core_remove_device() and qeth_dev_layer2_store(). Access to .discipline is meant to be protected by the discipline_mutex, so add/expand the locking in qeth_core_remove_device() and qeth_core_set_online(). Adjust the locking in qeth_l*_remove_device() accordingly, as it's now handled by the callers in a consistent manner. Based on an initial patch by Ursula Braun. Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core_main.c | 7 +++++-- drivers/s390/net/qeth_l2_main.c | 5 +---- drivers/s390/net/qeth_l3_main.c | 5 +---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index d45e223fc5218..cf18d87da41e2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6585,6 +6585,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) break; default: card->info.layer_enforced = true; + /* It's so early that we don't need the discipline_mutex yet. */ rc = qeth_core_load_discipline(card, enforced_disc); if (rc) goto err_load; @@ -6617,10 +6618,12 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev) QETH_CARD_TEXT(card, 2, "removedv"); + mutex_lock(&card->discipline_mutex); if (card->discipline) { card->discipline->remove(gdev); qeth_core_free_discipline(card); } + mutex_unlock(&card->discipline_mutex); qeth_free_qdio_queues(card); @@ -6635,6 +6638,7 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) int rc = 0; enum qeth_discipline_id def_discipline; + mutex_lock(&card->discipline_mutex); if (!card->discipline) { def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 : QETH_DISCIPLINE_LAYER2; @@ -6648,11 +6652,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) } } - mutex_lock(&card->discipline_mutex); rc = qeth_set_online(card, card->discipline); - mutex_unlock(&card->discipline_mutex); err: + mutex_unlock(&card->discipline_mutex); return rc; } diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 37279b1e29f6e..4254caf1d9b69 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2207,11 +2207,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (gdev->state == CCWGROUP_ONLINE) { - mutex_lock(&card->discipline_mutex); + if (gdev->state == CCWGROUP_ONLINE) qeth_set_offline(card, card->discipline, false); - mutex_unlock(&card->discipline_mutex); - } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 8d474179ce982..6970597bc8859 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1970,11 +1970,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (cgdev->state == CCWGROUP_ONLINE) { - mutex_lock(&card->discipline_mutex); + if (cgdev->state == CCWGROUP_ONLINE) qeth_set_offline(card, card->discipline, false); - mutex_unlock(&card->discipline_mutex); - } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) -- GitLab From f9c4845385c8f6631ebd5dddfb019ea7a285fba4 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 7 Jan 2021 18:24:42 +0100 Subject: [PATCH 1603/1894] s390/qeth: fix L2 header access in qeth_l3_osa_features_check() ip_finish_output_gso() may call .ndo_features_check() even before the skb has a L2 header. This conflicts with qeth_get_ip_version()'s attempt to inspect the L2 header via vlan_eth_hdr(). Switch to vlan_get_protocol(), as already used further down in the common qeth_features_check() path. Fixes: f13ade199391 ("s390/qeth: run non-offload L3 traffic over common xmit path") Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_l3_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 6970597bc8859..4c2cae7ae9a7f 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1813,7 +1813,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - if (qeth_get_ip_version(skb) != 4) + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) features &= ~NETIF_F_HW_VLAN_CTAG_TX; return qeth_features_check(skb, dev, features); } -- GitLab From 3545454c7801e391b0d966f82c98614d45394770 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Thu, 7 Jan 2021 20:58:18 +0100 Subject: [PATCH 1604/1894] net: dsa: lantiq_gswip: Exclude RMII from modes that report 1 GbE Exclude RMII from modes that report 1 GbE support. Reduced MII supports up to 100 MbE. Fixes: 14fceff4771e ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210107195818.3878-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 4b36d89bec061..662e68a0e7e61 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1436,11 +1436,12 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port, phylink_set(mask, Pause); phylink_set(mask, Asym_Pause); - /* With the exclusion of MII and Reverse MII, we support Gigabit, - * including Half duplex + /* With the exclusion of MII, Reverse MII and Reduced MII, we + * support Gigabit, including Half duplex */ if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII) { + state->interface != PHY_INTERFACE_MODE_REVMII && + state->interface != PHY_INTERFACE_MODE_RMII) { phylink_set(mask, 1000baseT_Full); phylink_set(mask, 1000baseT_Half); } -- GitLab From 2d2f6f1b4799428d160c021dd652bc3e3593945e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jan 2021 19:36:40 +0100 Subject: [PATCH 1605/1894] block: pre-initialize struct block_device in bdev_alloc_inode bdev_evict_inode and bdev_free_inode are also called for the root inode of bdevfs, for which bdev_alloc is never called. Move the zeroing o f struct block_device and the initialization of the bd_bdi field into bdev_alloc_inode to make sure they are initialized for the root inode as well. Fixes: e6cb53827ed6 ("block: initialize struct block_device in bdev_alloc") Reported-by: Alexey Kardashevskiy Tested-by: Alexey Kardashevskiy Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index e454c5a810436..3b8963e228a1b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -776,8 +776,11 @@ static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); + if (!ei) return NULL; + memset(&ei->bdev, 0, sizeof(ei->bdev)); + ei->bdev.bd_bdi = &noop_backing_dev_info; return &ei->vfs_inode; } @@ -871,14 +874,12 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) mapping_set_gfp_mask(&inode->i_data, GFP_USER); bdev = I_BDEV(inode); - memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); bdev->bd_disk = disk; bdev->bd_partno = partno; bdev->bd_inode = inode; - bdev->bd_bdi = &noop_backing_dev_info; #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif -- GitLab From ae28d1aae48a1258bd09a6f707ebb4231d79a761 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 17 Dec 2020 14:31:18 -0800 Subject: [PATCH 1606/1894] x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR Currently, when moving a task to a resource group the PQR_ASSOC MSR is updated with the new closid and rmid in an added task callback. If the task is running, the work is run as soon as possible. If the task is not running, the work is executed later in the kernel exit path when the kernel returns to the task again. Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task is running is the right thing to do. Queueing work for a task that is not running is unnecessary (the PQR_ASSOC MSR is already updated when the task is scheduled in) and causing system resource waste with the way in which it is implemented: Work to update the PQR_ASSOC register is queued every time the user writes a task id to the "tasks" file, even if the task already belongs to the resource group. This could result in multiple pending work items associated with a single task even if they are all identical and even though only a single update with most recent values is needed. Specifically, even if a task is moved between different resource groups while it is sleeping then it is only the last move that is relevant but yet a work item is queued during each move. This unnecessary queueing of work items could result in significant system resource waste, especially on tasks sleeping for a long time. For example, as demonstrated by Shakeel Butt in [1] writing the same task id to the "tasks" file can quickly consume significant memory. The same problem (wasted system resources) occurs when moving a task between different resource groups. As pointed out by Valentin Schneider in [2] there is an additional issue with the way in which the queueing of work is done in that the task_struct update is currently done after the work is queued, resulting in a race with the register update possibly done before the data needed by the update is available. To solve these issues, update the PQR_ASSOC MSR in a synchronous way right after the new closid and rmid are ready during the task movement, only if the task is running. If a moved task is not running nothing is done since the PQR_ASSOC MSR will be updated next time the task is scheduled. This is the same way used to update the register when tasks are moved as part of resource group removal. [1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ [2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com [ bp: Massage commit message and drop the two update_task_closid_rmid() variants. ] Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Reported-by: Shakeel Butt Reported-by: Valentin Schneider Signed-off-by: Fenghua Yu Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: James Morse Reviewed-by: Valentin Schneider Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 112 ++++++++++--------------- 1 file changed, 43 insertions(+), 69 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 29ffb95b25fff..1c6f8a60ac52a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -525,89 +525,63 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp) kfree(rdtgrp); } -struct task_move_callback { - struct callback_head work; - struct rdtgroup *rdtgrp; -}; - -static void move_myself(struct callback_head *head) +static void _update_task_closid_rmid(void *task) { - struct task_move_callback *callback; - struct rdtgroup *rdtgrp; - - callback = container_of(head, struct task_move_callback, work); - rdtgrp = callback->rdtgrp; - /* - * If resource group was deleted before this task work callback - * was invoked, then assign the task to root group and free the - * resource group. + * If the task is still current on this CPU, update PQR_ASSOC MSR. + * Otherwise, the MSR is updated when the task is scheduled in. */ - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - current->closid = 0; - current->rmid = 0; - rdtgroup_remove(rdtgrp); - } - - if (unlikely(current->flags & PF_EXITING)) - goto out; - - preempt_disable(); - /* update PQR_ASSOC MSR to make resource group go into effect */ - resctrl_sched_in(); - preempt_enable(); + if (task == current) + resctrl_sched_in(); +} -out: - kfree(callback); +static void update_task_closid_rmid(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) + smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); + else + _update_task_closid_rmid(t); } static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - int ret; - - callback = kzalloc(sizeof(*callback), GFP_KERNEL); - if (!callback) - return -ENOMEM; - callback->work.func = move_myself; - callback->rdtgrp = rdtgrp; - /* - * Take a refcount, so rdtgrp cannot be freed before the - * callback has been invoked. + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. + * + * For ctrl_mon groups, move both closid and rmid. + * For monitor groups, can move the tasks only from + * their parent CTRL group. */ - atomic_inc(&rdtgrp->waitcount); - ret = task_work_add(tsk, &callback->work, TWA_RESUME); - if (ret) { - /* - * Task is exiting. Drop the refcount and free the callback. - * No need to check the refcount as the group cannot be - * deleted before the write function unlocks rdtgroup_mutex. - */ - atomic_dec(&rdtgrp->waitcount); - kfree(callback); - rdt_last_cmd_puts("Task exited\n"); - } else { - /* - * For ctrl_mon groups move both closid and rmid. - * For monitor groups, can move the tasks only from - * their parent CTRL group. - */ - if (rdtgrp->type == RDTCTRL_GROUP) { - tsk->closid = rdtgrp->closid; + + if (rdtgrp->type == RDTCTRL_GROUP) { + tsk->closid = rdtgrp->closid; + tsk->rmid = rdtgrp->mon.rmid; + } else if (rdtgrp->type == RDTMON_GROUP) { + if (rdtgrp->mon.parent->closid == tsk->closid) { tsk->rmid = rdtgrp->mon.rmid; - } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) { - tsk->rmid = rdtgrp->mon.rmid; - } else { - rdt_last_cmd_puts("Can't move task to different control group\n"); - ret = -EINVAL; - } + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; } } - return ret; + + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. + */ + barrier(); + + /* + * By now, the task's closid and rmid are set. If the task is current + * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource + * group go into effect. If the task is not current, the MSR will be + * updated when the task is scheduled in. + */ + update_task_closid_rmid(tsk); + + return 0; } static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) -- GitLab From a0195f314a25582b38993bf30db11c300f4f4611 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 17 Dec 2020 14:31:19 -0800 Subject: [PATCH 1607/1894] x86/resctrl: Don't move a task to the same resource group Shakeel Butt reported in [1] that a user can request a task to be moved to a resource group even if the task is already in the group. It just wastes time to do the move operation which could be costly to send IPI to a different CPU. Add a sanity check to ensure that the move operation only happens when the task is not already in the resource group. [1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Reported-by: Shakeel Butt Signed-off-by: Fenghua Yu Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1c6f8a60ac52a..460f3e0df106c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -546,6 +546,13 @@ static void update_task_closid_rmid(struct task_struct *t) static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { + /* If the task is already in rdtgrp, no need to move the task. */ + if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && + tsk->rmid == rdtgrp->mon.rmid) || + (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && + tsk->closid == rdtgrp->mon.parent->closid)) + return 0; + /* * Set the task's closid/rmid before the PQR_ASSOC MSR can be * updated by them. -- GitLab From 872f36eb0b0f4f0e3a81ea1e51a6bdf58ccfdc6e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Jan 2021 05:54:44 -0500 Subject: [PATCH 1608/1894] KVM: x86: __kvm_vcpu_halt can be static Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0287840b93e09..a480804ae27a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7976,7 +7976,7 @@ void kvm_arch_exit(void) kmem_cache_destroy(x86_fpu_cache); } -int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) +static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) { ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { -- GitLab From e400071a805d6229223a98899e9da8c6233704a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns?= Date: Mon, 4 Jan 2021 20:47:17 +0000 Subject: [PATCH 1609/1894] HID: logitech-dj: add the G602 receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested. The device gets correctly exported to userspace and I can see mouse and keyboard events. Signed-off-by: Filipe Laíns Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 1ffcfc9a1e033..45e7e0bdd382b 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1869,6 +1869,10 @@ static const struct hid_device_id logi_dj_receivers[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xc531), .driver_data = recvr_type_gaming_hidpp}, + { /* Logitech G602 receiver (0xc537) */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, + 0xc537), + .driver_data = recvr_type_gaming_hidpp}, { /* Logitech lightspeed receiver (0xc539) */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1), -- GitLab From 74acfa996b2aec2a4ea8587104c7e2f8d4c6aec2 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:30 +0100 Subject: [PATCH 1610/1894] block/rnbd: Select SG_POOL for RNBD_CLIENT lkp reboot following build error: drivers/block/rnbd/rnbd-clt.c: In function 'rnbd_softirq_done_fn': >> drivers/block/rnbd/rnbd-clt.c:387:2: error: implicit declaration of function 'sg_free_table_chained' [-Werror=implicit-function-declaration] 387 | sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); | ^~~~~~~~~~~~~~~~~~~~~ The reason is CONFIG_SG_POOL is not enabled in the config, to avoid such failure, select SG_POOL in Kconfig for RNBD_CLIENT. Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu") Reported-by: kernel test robot Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnbd/Kconfig b/drivers/block/rnbd/Kconfig index 4b6d3d816d1f5..2ff05a0d26461 100644 --- a/drivers/block/rnbd/Kconfig +++ b/drivers/block/rnbd/Kconfig @@ -7,6 +7,7 @@ config BLK_DEV_RNBD_CLIENT tristate "RDMA Network Block Device driver client" depends on INFINIBAND_RTRS_CLIENT select BLK_DEV_RNBD + select SG_POOL help RNBD client is a network block device driver using rdma transport. -- GitLab From 1a84e7c629f8f288e02236bc799f9b0be1cab4a7 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:31 +0100 Subject: [PATCH 1611/1894] block/rnbd-srv: Fix use after free in rnbd_srv_sess_dev_force_close KASAN detect following BUG: [ 778.215311] ================================================================== [ 778.216696] BUG: KASAN: use-after-free in rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.219037] Read of size 8 at addr ffff88b1d6516c28 by task tee/8842 [ 778.220500] CPU: 37 PID: 8842 Comm: tee Kdump: loaded Not tainted 5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71 [ 778.220529] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020 [ 778.220555] Call Trace: [ 778.220609] dump_stack+0x99/0xcb [ 778.220667] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220715] print_address_description.constprop.7+0x1e/0x230 [ 778.220750] ? freeze_kernel_threads+0x73/0x73 [ 778.220896] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220932] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220994] kasan_report.cold.9+0x37/0x7c [ 778.221066] ? kobject_put+0x80/0x270 [ 778.221102] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.221184] rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.221240] rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server] [ 778.221304] ? sysfs_file_ops+0x90/0x90 [ 778.221353] kernfs_fop_write+0x141/0x240 [ 778.221451] vfs_write+0x142/0x4d0 [ 778.221553] ksys_write+0xc0/0x160 [ 778.221602] ? __ia32_sys_read+0x50/0x50 [ 778.221684] ? lockdep_hardirqs_on_prepare+0x13d/0x210 [ 778.221718] ? syscall_enter_from_user_mode+0x1c/0x50 [ 778.221821] do_syscall_64+0x33/0x40 [ 778.221862] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 778.221896] RIP: 0033:0x7f4affdd9504 [ 778.221928] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 778.221956] RSP: 002b:00007fffebb36b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 778.222011] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f4affdd9504 [ 778.222038] RDX: 0000000000000002 RSI: 00007fffebb36c50 RDI: 0000000000000003 [ 778.222066] RBP: 00007fffebb36c50 R08: 0000556a151aa600 R09: 00007f4affeb1540 [ 778.222094] R10: fffffffffffffc19 R11: 0000000000000246 R12: 0000556a151aa520 [ 778.222121] R13: 0000000000000002 R14: 00007f4affea6760 R15: 0000000000000002 [ 778.222764] Allocated by task 3212: [ 778.223285] kasan_save_stack+0x19/0x40 [ 778.223316] __kasan_kmalloc.constprop.7+0xc1/0xd0 [ 778.223347] kmem_cache_alloc_trace+0x186/0x350 [ 778.223382] rnbd_srv_rdma_ev+0xf16/0x1690 [rnbd_server] [ 778.223422] process_io_req+0x4d1/0x670 [rtrs_server] [ 778.223573] __ib_process_cq+0x10a/0x350 [ib_core] [ 778.223709] ib_cq_poll_work+0x31/0xb0 [ib_core] [ 778.223743] process_one_work+0x521/0xa90 [ 778.223773] worker_thread+0x65/0x5b0 [ 778.223802] kthread+0x1f2/0x210 [ 778.223833] ret_from_fork+0x22/0x30 [ 778.224296] Freed by task 8842: [ 778.224800] kasan_save_stack+0x19/0x40 [ 778.224829] kasan_set_track+0x1c/0x30 [ 778.224860] kasan_set_free_info+0x1b/0x30 [ 778.224889] __kasan_slab_free+0x108/0x150 [ 778.224919] slab_free_freelist_hook+0x64/0x190 [ 778.224947] kfree+0xe2/0x650 [ 778.224982] rnbd_destroy_sess_dev+0x2fa/0x3b0 [rnbd_server] [ 778.225011] kobject_put+0xda/0x270 [ 778.225046] rnbd_srv_sess_dev_force_close+0x30/0x60 [rnbd_server] [ 778.225081] rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server] [ 778.225111] kernfs_fop_write+0x141/0x240 [ 778.225140] vfs_write+0x142/0x4d0 [ 778.225169] ksys_write+0xc0/0x160 [ 778.225198] do_syscall_64+0x33/0x40 [ 778.225227] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 778.226506] The buggy address belongs to the object at ffff88b1d6516c00 which belongs to the cache kmalloc-512 of size 512 [ 778.227464] The buggy address is located 40 bytes inside of 512-byte region [ffff88b1d6516c00, ffff88b1d6516e00) The problem is in the sess_dev release function we call rnbd_destroy_sess_dev, and could free the sess_dev already, but we still set the keep_id in rnbd_srv_sess_dev_force_close, which lead to use after free. To fix it, move the keep_id before the sysfs removal, and cache the rnbd_srv_session for lock accessing, Fixes: 786998050cbc ("block/rnbd-srv: close a mapped device from server side.") Signed-off-by: Jack Wang Reviewed-by: Guoqing Jiang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index b8e44331e4944..a6a68d44f517c 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -338,10 +338,12 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev) { - mutex_lock(&sess_dev->sess->lock); - rnbd_srv_destroy_dev_session_sysfs(sess_dev); - mutex_unlock(&sess_dev->sess->lock); + struct rnbd_srv_session *sess = sess_dev->sess; + sess_dev->keep_id = true; + mutex_lock(&sess->lock); + rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&sess->lock); } static int process_msg_close(struct rtrs_srv *rtrs, -- GitLab From 80f99093d81370c5cec37fca3b5a6bdf6bddf0f6 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 8 Jan 2021 15:36:32 +0100 Subject: [PATCH 1612/1894] block/rnbd-clt: Fix sg table use after free Since dynamically allocate sglist is used for rnbd_iu, we can't free sg table after send_usr_msg since the callback function (cqe.done) could still access the sglist. Otherwise KASAN reports UAF issue: [ 4856.600257] BUG: KASAN: use-after-free in dma_direct_unmap_sg+0x53/0x290 [ 4856.600772] Read of size 4 at addr ffff888206af3a98 by task swapper/1/0 [ 4856.601729] CPU: 1 PID: 0 Comm: swapper/1 Kdump: loaded Tainted: G W 5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71 [ 4856.601748] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020 [ 4856.601766] Call Trace: [ 4856.601785] [ 4856.601822] dump_stack+0x99/0xcb [ 4856.601856] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.601888] print_address_description.constprop.7+0x1e/0x230 [ 4856.601913] ? freeze_kernel_threads+0x73/0x73 [ 4856.601965] ? mark_held_locks+0x29/0xa0 [ 4856.602019] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602039] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602079] kasan_report.cold.9+0x37/0x7c [ 4856.602188] ? mlx5_ib_post_recv+0x430/0x520 [mlx5_ib] [ 4856.602209] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602256] dma_direct_unmap_sg+0x53/0x290 [ 4856.602366] complete_rdma_req+0x188/0x4b0 [rtrs_client] [ 4856.602451] ? rtrs_clt_close+0x80/0x80 [rtrs_client] [ 4856.602535] ? mlx5_ib_poll_cq+0x48b/0x16e0 [mlx5_ib] [ 4856.602589] ? radix_tree_insert+0x3a0/0x3a0 [ 4856.602610] ? do_raw_spin_lock+0x119/0x1d0 [ 4856.602647] ? rwlock_bug.part.1+0x60/0x60 [ 4856.602740] rtrs_clt_rdma_done+0x3f7/0x670 [rtrs_client] [ 4856.602804] ? rtrs_clt_rdma_cm_handler+0xda0/0xda0 [rtrs_client] [ 4856.602857] ? check_flags.part.31+0x6c/0x1f0 [ 4856.602927] ? rcu_read_lock_sched_held+0xaf/0xe0 [ 4856.602963] ? rcu_read_lock_bh_held+0xc0/0xc0 [ 4856.603137] __ib_process_cq+0x10a/0x350 [ib_core] [ 4856.603309] ib_poll_handler+0x41/0x1c0 [ib_core] [ 4856.603358] irq_poll_softirq+0xe6/0x280 [ 4856.603392] ? lockdep_hardirqs_on_prepare+0x111/0x210 [ 4856.603446] __do_softirq+0x10d/0x646 [ 4856.603540] asm_call_irq_on_stack+0x12/0x20 [ 4856.603563] [ 4856.605096] Allocated by task 8914: [ 4856.605510] kasan_save_stack+0x19/0x40 [ 4856.605532] __kasan_kmalloc.constprop.7+0xc1/0xd0 [ 4856.605552] __kmalloc+0x155/0x320 [ 4856.605574] __sg_alloc_table+0x155/0x1c0 [ 4856.605594] sg_alloc_table+0x1f/0x50 [ 4856.605620] send_msg_sess_info+0x119/0x2e0 [rnbd_client] [ 4856.605646] remap_devs+0x71/0x210 [rnbd_client] [ 4856.605676] init_sess+0xad8/0xe10 [rtrs_client] [ 4856.605706] rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client] [ 4856.605728] process_one_work+0x521/0xa90 [ 4856.605748] worker_thread+0x65/0x5b0 [ 4856.605769] kthread+0x1f2/0x210 [ 4856.605789] ret_from_fork+0x22/0x30 [ 4856.606159] Freed by task 8914: [ 4856.606559] kasan_save_stack+0x19/0x40 [ 4856.606580] kasan_set_track+0x1c/0x30 [ 4856.606601] kasan_set_free_info+0x1b/0x30 [ 4856.606622] __kasan_slab_free+0x108/0x150 [ 4856.606642] slab_free_freelist_hook+0x64/0x190 [ 4856.606661] kfree+0xe2/0x650 [ 4856.606681] __sg_free_table+0xa4/0x100 [ 4856.606707] send_msg_sess_info+0x1d6/0x2e0 [rnbd_client] [ 4856.606733] remap_devs+0x71/0x210 [rnbd_client] [ 4856.606763] init_sess+0xad8/0xe10 [rtrs_client] [ 4856.606792] rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client] [ 4856.606813] process_one_work+0x521/0xa90 [ 4856.606833] worker_thread+0x65/0x5b0 [ 4856.606853] kthread+0x1f2/0x210 [ 4856.606872] ret_from_fork+0x22/0x30 The solution is to free iu's sgtable after the iu is not used anymore. And also move sg_alloc_table into rnbd_get_iu accordingly. Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu") Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 96e3f9fe82418..506e9094487f6 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -375,12 +375,19 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, init_waitqueue_head(&iu->comp.wait); iu->comp.errno = INT_MAX; + if (sg_alloc_table(&iu->sgt, 1, GFP_KERNEL)) { + rnbd_put_permit(sess, permit); + kfree(iu); + return NULL; + } + return iu; } static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) { if (atomic_dec_and_test(&iu->refcount)) { + sg_free_table(&iu->sgt); rnbd_put_permit(sess, iu->permit); kfree(iu); } @@ -487,8 +494,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) iu->buf = NULL; iu->dev = dev; - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); - msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); msg.device_id = cpu_to_le32(device_id); @@ -502,7 +507,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -575,7 +579,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) iu->buf = rsp; iu->dev = dev; - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); @@ -594,7 +597,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -622,8 +624,6 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) iu->buf = rsp; iu->sess = sess; - - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); @@ -650,7 +650,6 @@ put_iu: } else { err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } -- GitLab From ef8048dd2345d070c41bc7df16763fd4d8fac296 Mon Sep 17 00:00:00 2001 From: Swapnil Ingle Date: Fri, 8 Jan 2021 15:36:33 +0100 Subject: [PATCH 1613/1894] block/rnbd: Adding name to the Contributors List Adding name to the Contributors List Signed-off-by: Swapnil Ingle Acked-by: Jack Wang Acked-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/README | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnbd/README b/drivers/block/rnbd/README index 1773c0aa0bd43..080f58a5400ad 100644 --- a/drivers/block/rnbd/README +++ b/drivers/block/rnbd/README @@ -90,3 +90,4 @@ Kleber Souza Lutz Pogrell Milind Dumbare Roman Penyaev +Swapnil Ingle -- GitLab From 3a21777c6ee99749bac10727b3c17e5bcfebe5c1 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:34 +0100 Subject: [PATCH 1614/1894] block/rnbd-clt: avoid module unload race with close confirmation We had kernel panic, it is caused by unload module and last close confirmation. call trace: [1196029.743127] free_sess+0x15/0x50 [rtrs_client] [1196029.743128] rtrs_clt_close+0x4c/0x70 [rtrs_client] [1196029.743129] ? rnbd_clt_unmap_device+0x1b0/0x1b0 [rnbd_client] [1196029.743130] close_rtrs+0x25/0x50 [rnbd_client] [1196029.743131] rnbd_client_exit+0x93/0xb99 [rnbd_client] [1196029.743132] __x64_sys_delete_module+0x190/0x260 And in the crashdump confirmation kworker is also running. PID: 6943 TASK: ffff9e2ac8098000 CPU: 4 COMMAND: "kworker/4:2" #0 [ffffb206cf337c30] __schedule at ffffffff9f93f891 #1 [ffffb206cf337cc8] schedule at ffffffff9f93fe98 #2 [ffffb206cf337cd0] schedule_timeout at ffffffff9f943938 #3 [ffffb206cf337d50] wait_for_completion at ffffffff9f9410a7 #4 [ffffb206cf337da0] __flush_work at ffffffff9f08ce0e #5 [ffffb206cf337e20] rtrs_clt_close_conns at ffffffffc0d5f668 [rtrs_client] #6 [ffffb206cf337e48] rtrs_clt_close at ffffffffc0d5f801 [rtrs_client] #7 [ffffb206cf337e68] close_rtrs at ffffffffc0d26255 [rnbd_client] #8 [ffffb206cf337e78] free_sess at ffffffffc0d262ad [rnbd_client] #9 [ffffb206cf337e88] rnbd_clt_put_dev at ffffffffc0d266a7 [rnbd_client] The problem is both code path try to close same session, which lead to panic. To fix it, just skip the sess if the refcount already drop to 0. Fixes: f7a7a5c228d4 ("block/rnbd: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 506e9094487f6..45a4700766524 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1697,7 +1697,8 @@ static void rnbd_destroy_sessions(void) */ list_for_each_entry_safe(sess, sn, &sess_list, list) { - WARN_ON(!rnbd_clt_get_sess(sess)); + if (!rnbd_clt_get_sess(sess)) + continue; close_rtrs(sess); list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { /* -- GitLab From 02f938e9fed1681791605ca8b96c2d9da9355f6a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 8 Jan 2021 16:55:37 +0800 Subject: [PATCH 1615/1894] blk-mq-debugfs: Add decode for BLK_MQ_F_TAG_HCTX_SHARED Showing the hctx flags for when BLK_MQ_F_TAG_HCTX_SHARED is set gives something like: root@debian:/home/john# more /sys/kernel/debug/block/sda/hctx0/flags alloc_policy=FIFO SHOULD_MERGE|TAG_QUEUE_SHARED|3 Add the decoding for that flag. Fixes: 32bc15afed04b ("blk-mq: Facilitate a shared sbitmap per tagset") Signed-off-by: John Garry Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e21eed20a1551..712a95f74ccb8 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -246,6 +246,7 @@ static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), + HCTX_FLAG_NAME(TAG_HCTX_SHARED), }; #undef HCTX_FLAG_NAME -- GitLab From bac717171971176b78c72d15a8b6961764ab197f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 16:20:05 +0100 Subject: [PATCH 1616/1894] ARM: picoxcell: fix missing interrupt-parent properties dtc points out that the interrupts for some devices are not parsable: picoxcell-pc3x2.dtsi:45.19-49.5: Warning (interrupts_property): /paxi/gem@30000: Missing interrupt-parent picoxcell-pc3x2.dtsi:51.21-55.5: Warning (interrupts_property): /paxi/dmac@40000: Missing interrupt-parent picoxcell-pc3x2.dtsi:57.21-61.5: Warning (interrupts_property): /paxi/dmac@50000: Missing interrupt-parent picoxcell-pc3x2.dtsi:233.21-237.5: Warning (interrupts_property): /rwid-axi/axi2pico@c0000000: Missing interrupt-parent There are two VIC instances, so it's not clear which one needs to be used. I found the BSP sources that reference VIC0, so use that: https://github.com/r1mikey/meta-picoxcell/blob/master/recipes-kernel/linux/linux-picochip-3.0/0001-picoxcell-support-for-Picochip-picoXcell-SoC.patch Acked-by: Jamie Iles Link: https://lore.kernel.org/r/20201230152010.3914962-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/picoxcell-pc3x2.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi index c4c6c7e9e37b6..5898879a3038e 100644 --- a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi +++ b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi @@ -45,18 +45,21 @@ emac: gem@30000 { compatible = "cadence,gem"; reg = <0x30000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <31>; }; dmac1: dmac@40000 { compatible = "snps,dw-dmac"; reg = <0x40000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <25>; }; dmac2: dmac@50000 { compatible = "snps,dw-dmac"; reg = <0x50000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <26>; }; @@ -233,6 +236,7 @@ axi2pico@c0000000 { compatible = "picochip,axi2pico-pc3x2"; reg = <0xc0000000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <13 14 15 16 17 18 19 20 21>; }; }; -- GitLab From 84e261553e6f919bf0b4d65244599ab2b41f1da5 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 7 Jan 2021 09:47:07 -0500 Subject: [PATCH 1617/1894] hwmon: (amd_energy) fix allocation of hwmon_channel_info config hwmon, specifically hwmon_num_channel_attrs, expects the config array in the hwmon_channel_info structure to be terminated by a zero entry. amd_energy does not honor this convention. As result, a KASAN warning is possible. Fix this by adding an additional entry and setting it to zero. Fixes: 8abee9566b7e ("hwmon: Add amd_energy driver to report energy counters") Signed-off-by: David Arcari Cc: Naveen Krishna Chatradhi Cc: Jean Delvare Cc: Guenter Roeck Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: David Arcari Acked-by: Naveen Krishna Chatradhi Link: https://lore.kernel.org/r/20210107144707.6927-1-darcari@redhat.com Signed-off-by: Guenter Roeck --- drivers/hwmon/amd_energy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c index 9b306448b7a0f..822c2e74b98d4 100644 --- a/drivers/hwmon/amd_energy.c +++ b/drivers/hwmon/amd_energy.c @@ -222,7 +222,7 @@ static int amd_create_sensor(struct device *dev, */ cpus = num_present_cpus() / num_siblings; - s_config = devm_kcalloc(dev, cpus + sockets, + s_config = devm_kcalloc(dev, cpus + sockets + 1, sizeof(u32), GFP_KERNEL); if (!s_config) return -ENOMEM; @@ -254,6 +254,7 @@ static int amd_create_sensor(struct device *dev, scnprintf(label_l[i], 10, "Esocket%u", (i - cpus)); } + s_config[i] = 0; return 0; } -- GitLab From e076ab2a2ca70a0270232067cd49f76cd92efe64 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 7 Jan 2021 17:08:30 -0500 Subject: [PATCH 1618/1894] btrfs: shrink delalloc pages instead of full inodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc") cleaned up how we do delalloc shrinking by utilizing some infrastructure we have in place to flush inodes that we use for device replace and snapshot. However this introduced a pretty serious performance regression. To reproduce the user untarred the source tarball of Firefox (360MiB xz compressed/1.5GiB uncompressed), and would see it take anywhere from 5 to 20 times as long to untar in 5.10 compared to 5.9. This was observed on fast devices (SSD and better) and not on HDD. The root cause is because before we would generally use the normal writeback path to reclaim delalloc space, and for this we would provide it with the number of pages we wanted to flush. The referenced commit changed this to flush that many inodes, which drastically increased the amount of space we were flushing in certain cases, which severely affected performance. We cannot revert this patch unfortunately because of 3d45f221ce62 ("btrfs: fix deadlock when cloning inline extent and low on free metadata space") which requires the ability to skip flushing inodes that are being cloned in certain scenarios, which means we need to keep using our flushing infrastructure or risk re-introducing the deadlock. Instead to fix this problem we can go back to providing btrfs_start_delalloc_roots with a number of pages to flush, and then set up a writeback_control and utilize sync_inode() to handle the flushing for us. This gives us the same behavior we had prior to the fix, while still allowing us to avoid the deadlock that was fixed by Filipe. I redid the users original test and got the following results on one of our test machines (256GiB of ram, 56 cores, 2TiB Intel NVMe drive) 5.9 0m54.258s 5.10 1m26.212s 5.10+patch 0m38.800s 5.10+patch is significantly faster than plain 5.9 because of my patch series "Change data reservations to use the ticketing infra" which contained the patch that introduced the regression, but generally improved the overall ENOSPC flushing mechanisms. Additional testing on consumer-grade SSD (8GiB ram, 8 CPU) confirm the results: 5.10.5 4m00s 5.10.5+patch 1m08s 5.11-rc2 5m14s 5.11-rc2+patch 1m30s Reported-by: René Rebe Fixes: 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc") CC: stable@vger.kernel.org # 5.10 Signed-off-by: Josef Bacik Tested-by: David Sterba Reviewed-by: David Sterba [ add my test results ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 60 +++++++++++++++++++++++++++++++------------ fs/btrfs/space-info.c | 4 ++- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 070716650df87..a8e0a6b038d3e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot, +static int start_delalloc_inodes(struct btrfs_root *root, + struct writeback_control *wbc, bool snapshot, bool in_reclaim_context) { struct btrfs_inode *binode; @@ -9399,6 +9400,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot struct list_head works; struct list_head splice; int ret = 0; + bool full_flush = wbc->nr_to_write == LONG_MAX; INIT_LIST_HEAD(&works); INIT_LIST_HEAD(&splice); @@ -9427,18 +9429,24 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot if (snapshot) set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &binode->runtime_flags); - work = btrfs_alloc_delalloc_work(inode); - if (!work) { - iput(inode); - ret = -ENOMEM; - goto out; - } - list_add_tail(&work->list, &works); - btrfs_queue_work(root->fs_info->flush_workers, - &work->work); - if (*nr != U64_MAX) { - (*nr)--; - if (*nr == 0) + if (full_flush) { + work = btrfs_alloc_delalloc_work(inode); + if (!work) { + iput(inode); + ret = -ENOMEM; + goto out; + } + list_add_tail(&work->list, &works); + btrfs_queue_work(root->fs_info->flush_workers, + &work->work); + } else { + ret = sync_inode(inode, wbc); + if (!ret && + test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + ret = sync_inode(inode, wbc); + btrfs_add_delayed_iput(inode); + if (ret || wbc->nr_to_write <= 0) goto out; } cond_resched(); @@ -9464,18 +9472,29 @@ out: int btrfs_start_delalloc_snapshot(struct btrfs_root *root) { + struct writeback_control wbc = { + .nr_to_write = LONG_MAX, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_fs_info *fs_info = root->fs_info; - u64 nr = U64_MAX; if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &nr, true, false); + return start_delalloc_inodes(root, &wbc, true, false); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, bool in_reclaim_context) { + struct writeback_control wbc = { + .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_root *root; struct list_head splice; int ret; @@ -9489,6 +9508,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice) && nr) { + /* + * Reset nr_to_write here so we know that we're doing a full + * flush. + */ + if (nr == U64_MAX) + wbc.nr_to_write = LONG_MAX; + root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_root(root); @@ -9497,9 +9523,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context); + ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); btrfs_put_root(root); - if (ret < 0) + if (ret < 0 || wbc.nr_to_write <= 0) goto out; spin_lock(&fs_info->delalloc_root_lock); } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 67e55c5479b8e..e8347461c8ddd 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -532,7 +532,9 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, loops = 0; while ((delalloc_bytes || dio_bytes) && loops < 3) { - btrfs_start_delalloc_roots(fs_info, items, true); + u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; + + btrfs_start_delalloc_roots(fs_info, nr_pages, true); loops++; if (wait_ordered && !trans) { -- GitLab From 7c38e769d5c508939ce5dc26df72602f3c902342 Mon Sep 17 00:00:00 2001 From: Seth Miller Date: Mon, 4 Jan 2021 22:58:12 -0600 Subject: [PATCH 1619/1894] HID: Ignore battery for Elan touchscreen on ASUS UX550 Battery status is being reported for the Elan touchscreen on ASUS UX550 laptops despite not having a batter. It always shows either 0 or 1%. Signed-off-by: Seth Miller Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4c5f23640f9c7..5ba0aa1d23353 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -389,6 +389,7 @@ #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401 #define USB_DEVICE_ID_HP_X2 0x074d #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 +#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index dc7f6b4a775c9..f23027d2795ba 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -322,6 +322,8 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, {} }; -- GitLab From a91bd6223ecd46addc71ee6fcd432206d39365d2 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 8 Jan 2021 12:48:47 +0100 Subject: [PATCH 1620/1894] Revert "init/console: Use ttynull as a fallback when there is no console" This reverts commit 757055ae8dedf5333af17b3b5b4b70ba9bc9da4e. The commit caused that ttynull was used as the default console on several systems[1][2][3]. As a result, the console was blank even when a better alternative existed. It happened when there was no console configured on the command line and ttynull_init() was the first initcall calling register_console(). Or it happened when /dev/ did not exist when console_on_rootfs() was called. It was not able to open /dev/console even though a console driver was registered. It tried to add ttynull console but it obviously did not help. But ttynull became the preferred console and was used by /dev/console when it was available later. The commit tried to fix a historical problem that have been there for ages. The primary motivation was the commit 3cffa06aeef7ece30f6 ("printk/console: Allow to disable console output by using console="" or console=null"). It provided a clean solution for a workaround that was widely used and worked only by chance. This revert causes that the console="" or console=null command line options will again work only by chance. These options will cause that a particular console will be preferred and the default (tty) ones will not get enabled. There will be no console registered at all. As a result there won't be stdin, stdout, and stderr for the init process. But it worked exactly this way even before. The proper solution has to fulfill many conditions: + Register ttynull only when explicitly required or as the ultimate fallback. + ttynull should get associated with /dev/console but it must not become preferred console when used as a fallback. Especially, it must still be possible to replace it by a better console later. Such a change requires clean up of the register_console() code. Otherwise, it would be even harder to follow. Especially, the use of has_preferred_console and CON_CONSDEV flag is tricky. The clean up is risky. The ordering of consoles is not well defined. And any changes tend to break existing user settings. Do the revert at the least risky solution for now. [1] https://lore.kernel.org/linux-kselftest/20201221144302.GR4077@smile.fi.intel.com/ [2] https://lore.kernel.org/lkml/d2a3b3c0-e548-7dd1-730f-59bc5c04e191@synopsys.com/ [3] https://patchwork.ozlabs.org/project/linux-um/patch/20210105120128.10854-1-thomas@m3y3r.de/ Reported-by: Andy Shevchenko Reported-by: Vineet Gupta Reported-by: Thomas Meyer Signed-off-by: Petr Mladek Acked-by: Greg Kroah-Hartman Acked-by: Sergey Senozhatsky Signed-off-by: Linus Torvalds --- drivers/tty/Kconfig | 14 ++++++++++++++ drivers/tty/Makefile | 3 ++- drivers/tty/ttynull.c | 18 ------------------ include/linux/console.h | 3 --- init/main.c | 10 ++-------- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 47a6e42f0d04f..e15cd6b5bb99a 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -401,6 +401,20 @@ config MIPS_EJTAG_FDC_KGDB_CHAN help FDC channel number to use for KGDB. +config NULL_TTY + tristate "NULL TTY driver" + help + Say Y here if you want a NULL TTY which simply discards messages. + + This is useful to allow userspace applications which expect a console + device to work without modifications even when no console is + available or desired. + + In order to use this driver, you should redirect the console to this + TTY, or boot the kernel with console=ttynull. + + If unsure, say N. + config TRACE_ROUTER tristate "Trace data router for MIPI P1149.7 cJTAG standard" depends on TRACE_SINK diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile index 3c1c5a9240a70..b3ccae9326601 100644 --- a/drivers/tty/Makefile +++ b/drivers/tty/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_TTY) += tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \ tty_buffer.o tty_port.o tty_mutex.o \ tty_ldsem.o tty_baudrate.o tty_jobctrl.o \ - n_null.o ttynull.o + n_null.o obj-$(CONFIG_LEGACY_PTYS) += pty.o obj-$(CONFIG_UNIX98_PTYS) += pty.o obj-$(CONFIG_AUDIT) += tty_audit.o @@ -25,6 +25,7 @@ obj-$(CONFIG_ISI) += isicom.o obj-$(CONFIG_MOXA_INTELLIO) += moxa.o obj-$(CONFIG_MOXA_SMARTIO) += mxser.o obj-$(CONFIG_NOZOMI) += nozomi.o +obj-$(CONFIG_NULL_TTY) += ttynull.o obj-$(CONFIG_ROCKETPORT) += rocket.o obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o obj-$(CONFIG_PPC_EPAPR_HV_BYTECHAN) += ehv_bytechan.o diff --git a/drivers/tty/ttynull.c b/drivers/tty/ttynull.c index eced70ec54e17..17f05b7eb6d3e 100644 --- a/drivers/tty/ttynull.c +++ b/drivers/tty/ttynull.c @@ -2,13 +2,6 @@ /* * Copyright (C) 2019 Axis Communications AB * - * The console is useful for userspace applications which expect a console - * device to work without modifications even when no console is available - * or desired. - * - * In order to use this driver, you should redirect the console to this - * TTY, or boot the kernel with console=ttynull. - * * Based on ttyprintk.c: * Copyright (C) 2010 Samo Pogacnik */ @@ -66,17 +59,6 @@ static struct console ttynull_console = { .device = ttynull_device, }; -void __init register_ttynull_console(void) -{ - if (!ttynull_driver) - return; - - if (add_preferred_console(ttynull_console.name, 0, NULL)) - return; - - register_console(&ttynull_console); -} - static int __init ttynull_init(void) { struct tty_driver *driver; diff --git a/include/linux/console.h b/include/linux/console.h index dbe78e8e26029..20874db50bc8a 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -186,12 +186,9 @@ extern int braille_register_console(struct console *, int index, extern int braille_unregister_console(struct console *); #ifdef CONFIG_TTY extern void console_sysfs_notify(void); -extern void register_ttynull_console(void); #else static inline void console_sysfs_notify(void) { } -static inline void register_ttynull_console(void) -{ } #endif extern bool console_suspend_enabled; diff --git a/init/main.c b/init/main.c index 421640fca3759..c68d784376ca1 100644 --- a/init/main.c +++ b/init/main.c @@ -1480,14 +1480,8 @@ void __init console_on_rootfs(void) struct file *file = filp_open("/dev/console", O_RDWR, 0); if (IS_ERR(file)) { - pr_err("Warning: unable to open an initial console. Fallback to ttynull.\n"); - register_ttynull_console(); - - file = filp_open("/dev/console", O_RDWR, 0); - if (IS_ERR(file)) { - pr_err("Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process!\n"); - return; - } + pr_err("Warning: unable to open an initial console.\n"); + return; } init_dup(file); init_dup(file); -- GitLab From ef0ba05538299f1391cbe097de36895bb36ecfe6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 7 Jan 2021 09:43:54 -0800 Subject: [PATCH 1621/1894] poll: fix performance regression due to out-of-line __put_user() The kernel test robot reported a -5.8% performance regression on the "poll2" test of will-it-scale, and bisected it to commit d55564cfc222 ("x86: Make __put_user() generate an out-of-line call"). I didn't expect an out-of-line __put_user() to matter, because no normal core code should use that non-checking legacy version of user access any more. But I had overlooked the very odd poll() usage, which does a __put_user() to update the 'revents' values of the poll array. Now, Al Viro correctly points out that instead of updating just the 'revents' field, it would be much simpler to just copy the _whole_ pollfd entry, and then we could just use "copy_to_user()" on the whole array of entries, the same way we use "copy_from_user()" a few lines earlier to get the original values. But that is not what we've traditionally done, and I worry that threaded applications might be concurrently modifying the other fields of the pollfd array. So while Al's suggestion is simpler - and perhaps worth trying in the future - this instead keeps the "just update revents" model. To fix the performance regression, use the modern "unsafe_put_user()" instead of __put_user(), with the proper "user_write_access_begin()" guarding in place. This improves code generation enormously. Link: https://lore.kernel.org/lkml/20210107134723.GA28532@xsang-OptiPlex-9020/ Reported-by: kernel test robot Tested-by: Oliver Sang Cc: Al Viro Cc: David Laight Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- fs/select.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/select.c b/fs/select.c index ebfebdfe5c69a..37aaa8317f3ae 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1011,14 +1011,17 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, fdcount = do_poll(head, &table, end_time); poll_freewait(&table); + if (!user_write_access_begin(ufds, nfds * sizeof(*ufds))) + goto out_fds; + for (walk = head; walk; walk = walk->next) { struct pollfd *fds = walk->entries; int j; - for (j = 0; j < walk->len; j++, ufds++) - if (__put_user(fds[j].revents, &ufds->revents)) - goto out_fds; + for (j = walk->len; j; fds++, ufds++, j--) + unsafe_put_user(fds->revents, &ufds->revents, Efault); } + user_write_access_end(); err = fdcount; out_fds: @@ -1030,6 +1033,11 @@ out_fds: } return err; + +Efault: + user_write_access_end(); + err = -EFAULT; + goto out_fds; } static long do_restart_poll(struct restart_block *restart_block) -- GitLab From 0378c625afe80eb3f212adae42cc33c9f6f31abf Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 6 Jan 2021 18:19:05 -0500 Subject: [PATCH 1622/1894] dm: eliminate potential source of excessive kernel log noise There wasn't ever a real need to log an error in the kernel log for ioctls issued with insufficient permissions. Simply return an error and if an admin/user is sufficiently motivated they can enable DM's dynamic debugging to see an explanation for why the ioctls were disallowed. Reported-by: Nir Soffer Fixes: e980f62353c6 ("dm: don't allow ioctls to targets that don't map to whole devices") Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b3c3c8b4cb428..7bac564f3faa6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -562,7 +562,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, * subset of the parent bdev; require extra privileges. */ if (!capable(CAP_SYS_RAWIO)) { - DMWARN_LIMIT( + DMDEBUG_LIMIT( "%s: sending ioctl %x to DM device without required privilege.", current->comm, cmd); r = -ENOIOCTLCMD; -- GitLab From 9b5948267adc9e689da609eb61cf7ed49cae5fa8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Jan 2021 11:15:56 -0500 Subject: [PATCH 1623/1894] dm integrity: fix flush with external metadata device With external metadata device, flush requests are not passed down to the data device. Fix this by submitting the flush request in dm_integrity_flush_buffers. In order to not degrade performance, we overlap the data device flush with the metadata device flush. Reported-by: Lukas Straub Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 6 ++++ drivers/md/dm-integrity.c | 60 ++++++++++++++++++++++++++++++++------- include/linux/dm-bufio.h | 1 + 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c1a86bde658e..fce4cbf9529d6 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) } EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) +{ + return c->dm_io; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); + sector_t dm_bufio_get_block_number(struct dm_buffer *b) { return b->block; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 5a7a1b90e671c..11c7c538f7a98 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1379,12 +1379,52 @@ thorough_test: #undef MAY_BE_HASH } -static void dm_integrity_flush_buffers(struct dm_integrity_c *ic) +struct flush_request { + struct dm_io_request io_req; + struct dm_io_region io_reg; + struct dm_integrity_c *ic; + struct completion comp; +}; + +static void flush_notify(unsigned long error, void *fr_) +{ + struct flush_request *fr = fr_; + if (unlikely(error != 0)) + dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO); + complete(&fr->comp); +} + +static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) { int r; + + struct flush_request fr; + + if (!ic->meta_dev) + flush_data = false; + if (flush_data) { + fr.io_req.bi_op = REQ_OP_WRITE, + fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + fr.io_req.mem.type = DM_IO_KMEM, + fr.io_req.mem.ptr.addr = NULL, + fr.io_req.notify.fn = flush_notify, + fr.io_req.notify.context = &fr; + fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), + fr.io_reg.bdev = ic->dev->bdev, + fr.io_reg.sector = 0, + fr.io_reg.count = 0, + fr.ic = ic; + init_completion(&fr.comp); + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); + BUG_ON(r); + } + r = dm_bufio_write_dirty_buffers(ic->bufio); if (unlikely(r)) dm_integrity_io_error(ic, "writing tags", r); + + if (flush_data) + wait_for_completion(&fr.comp); } static void sleep_on_endio_wait(struct dm_integrity_c *ic) @@ -2110,7 +2150,7 @@ offload_to_thread: if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { integrity_metadata(&dio->work); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); dio->in_flight = (atomic_t)ATOMIC_INIT(1); dio->completion = NULL; @@ -2195,7 +2235,7 @@ static void integrity_commit(struct work_struct *w) flushes = bio_list_get(&ic->flush_bio_list); if (unlikely(ic->mode != 'J')) { spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); goto release_flush_bios; } @@ -2409,7 +2449,7 @@ skip_io: complete_journal_op(&comp); wait_for_completion_io(&comp.comp); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } static void integrity_writer(struct work_struct *w) @@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm_integrity_c *ic) { int r; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); if (dm_integrity_failed(ic)) return; @@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct work_struct *work) unsigned long limit; struct bio *bio; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); range.logical_sector = 0; range.n_sectors = ic->provided_data_sectors; @@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work) add_new_range_and_wait(ic, &range); spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); - if (ic->meta_dev) - blkdev_issue_flush(ic->dev->bdev, GFP_NOIO); + dm_integrity_flush_buffers(ic, true); limit = ic->provided_data_sectors; if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { @@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti) if (ic->meta_dev) queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } if (ic->mode == 'B') { - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); #if 1 /* set to 0 to test bitmap replay code */ init_journal(ic, 0, ic->journal_sections, 0); diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 29d255fdd5d64..90bd558a17f51 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); sector_t dm_bufio_get_block_number(struct dm_buffer *b); void *dm_bufio_get_block_data(struct dm_buffer *b); void *dm_bufio_get_aux_data(struct dm_buffer *b); -- GitLab From e8deee4f1543eda9b75278f63322f412cad52f6a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 8 Jan 2021 13:46:55 -0800 Subject: [PATCH 1624/1894] ARC: [hsdk]: Enable FPU_SAVE_RESTORE HSDK has hardware floating point and the common use case is with glibc+hf so enable that as default. Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 6b5c54576f54d..a2d10c29fbcc0 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -7,6 +7,7 @@ menuconfig ARC_SOC_HSDK depends on ISA_ARCV2 select ARC_HAS_ACCL_REGS select ARC_IRQ_NO_AUTOSAVE + select ARC_FPU_SAVE_RESTORE select CLK_HSDK select RESET_CONTROLLER select RESET_HSDK -- GitLab From b2345a8a4342cf83316a2198fa915c7c99b7d6c7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:15 +0100 Subject: [PATCH 1625/1894] ALSA: usb-audio: Fix the missing endpoints creations for quirks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent change in the endpoint management moved the endpoint object creation from the stream open time to the parser of the audio descriptor. It works fine for the standard audio, but it overlooked the other places that create audio streams via quirks (QUIRK_AUDIO_FIXED_ENDPOINT) like the reported a few Pioneer devices; those call snd_usb_add_audio_stream() manually, hence they miss the endpoints, eventually resulting in the error at opening streams. Moreover, now the sync EP setup was moved to the explicit call of snd_usb_audioformat_set_sync_ep(), and this needs to be added for those places, too. This patch addresses those regressions for quirks. It adds a local helper function add_audio_stream_from_fixed_fmt(), which does the all needed tasks, and replaces the calls of snd_usb_add_audio_stream() with this new function. Fixes: 54cb31901b83 ("ALSA: usb-audio: Create endpoint objects at parsing phase") Reported-by: František Kučera Link: https://lore.kernel.org/r/20210108075219.21463-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 54 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e4a690bb4c996..b70e2ebc3e29e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -120,6 +120,40 @@ static int create_standard_audio_quirk(struct snd_usb_audio *chip, return 0; } +/* create the audio stream and the corresponding endpoints from the fixed + * audioformat object; this is used for quirks with the fixed EPs + */ +static int add_audio_stream_from_fixed_fmt(struct snd_usb_audio *chip, + struct audioformat *fp) +{ + int stream, err; + + stream = (fp->endpoint & USB_DIR_IN) ? + SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; + + snd_usb_audioformat_set_sync_ep(chip, fp); + + err = snd_usb_add_audio_stream(chip, stream, fp); + if (err < 0) + return err; + + err = snd_usb_add_endpoint(chip, fp->endpoint, + SND_USB_ENDPOINT_TYPE_DATA); + if (err < 0) + return err; + + if (fp->sync_ep) { + err = snd_usb_add_endpoint(chip, fp->sync_ep, + fp->implicit_fb ? + SND_USB_ENDPOINT_TYPE_DATA : + SND_USB_ENDPOINT_TYPE_SYNC); + if (err < 0) + return err; + } + + return 0; +} + /* * create a stream for an endpoint/altsetting without proper descriptors */ @@ -131,8 +165,8 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, struct audioformat *fp; struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; - int stream, err; unsigned *rate_table = NULL; + int err; fp = kmemdup(quirk->data, sizeof(*fp), GFP_KERNEL); if (!fp) @@ -153,11 +187,6 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, fp->rate_table = rate_table; } - stream = (fp->endpoint & USB_DIR_IN) - ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; - err = snd_usb_add_audio_stream(chip, stream, fp); - if (err < 0) - goto error; if (fp->iface != get_iface_desc(&iface->altsetting[0])->bInterfaceNumber || fp->altset_idx >= iface->num_altsetting) { err = -EINVAL; @@ -176,6 +205,13 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, fp->datainterval = snd_usb_parse_datainterval(chip, alts); if (fp->maxpacksize == 0) fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); + if (!fp->fmt_type) + fp->fmt_type = UAC_FORMAT_TYPE_I; + + err = add_audio_stream_from_fixed_fmt(chip, fp); + if (err < 0) + goto error; + usb_set_interface(chip->dev, fp->iface, 0); snd_usb_init_pitch(chip, fp); snd_usb_init_sample_rate(chip, fp, fp->rate_max); @@ -417,7 +453,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip, struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; struct audioformat *fp; - int stream, err; + int err; /* both PCM and MIDI interfaces have 2 or more altsettings */ if (iface->num_altsetting < 2) @@ -482,9 +518,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip, return -ENXIO; } - stream = (fp->endpoint & USB_DIR_IN) - ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; - err = snd_usb_add_audio_stream(chip, stream, fp); + err = add_audio_stream_from_fixed_fmt(chip, fp); if (err < 0) { list_del(&fp->list); /* unlink for avoiding double-free */ kfree(fp); -- GitLab From 5d15f1eb456025cf47078fdbc230d7a9f1ee4cef Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:16 +0100 Subject: [PATCH 1626/1894] ALSA: usb-audio: Choose audioformat of a counter-part substream The implicit feedback mode needs to handle two endpoints and the choice of the audioformat object for the sync EP is important since this determines the compatibility of the hw_params. The current code uses the same audioformat object if both the main EP and the sync EP point to the same iface/altsetting. This was done in consideration of the non-implicit-fb sync EP handling, and it doesn't match well with the cases where actually to endpoints are defined in the sameiface / altsetting like a few Pioneer devices. Modify snd_usb_find_implicit_fb_sync_format() to pick up the audioformat that is assigned in the counter-part substreams primarily, so that the actual capture stream can be opened properly. We keep the same audioformat object only as a fallback in case nothing found, though. Fixes: 9fddc15e8039 ("ALSA: usb-audio: Factor out the implicit feedback quirk code") Link: https://lore.kernel.org/r/20210108075219.21463-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 931042a6a051e..9724efe1cdce8 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -378,20 +378,19 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, int stream) { struct snd_usb_substream *subs; - const struct audioformat *fp, *sync_fmt; + const struct audioformat *fp, *sync_fmt = NULL; int score, high_score; - /* When sharing the same altset, use the original audioformat */ + /* Use the original audioformat as fallback for the shared altset */ if (target->iface == target->sync_iface && target->altsetting == target->sync_altsetting) - return target; + sync_fmt = target; subs = find_matching_substream(chip, stream, target->sync_ep, target->fmt_type); if (!subs) - return NULL; + return sync_fmt; - sync_fmt = NULL; high_score = 0; list_for_each_entry(fp, &subs->fmt_list, list) { score = match_endpoint_audioformats(subs, fp, -- GitLab From 00272c61827e37bb64c47499843d8c0d8ee136a5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:17 +0100 Subject: [PATCH 1627/1894] ALSA: usb-audio: Avoid unnecessary interface re-setup The current endpoint handling assumed (more or less) a unique 1:1 relation between the endpoint and the iface/altset. The exception was the sync EP without the implicit feedback which has usually the secondary EP of the same altset. This works fine for most devices, but it turned out that some unusual devices like Pinoeer's ones have both playback and capture endpoints in the same iface/altsetting and use both for the implicit feedback mode. For handling such a case, we need to extend the endpoint management to take the shared interface into account. This patch does that: it adds a new object snd_usb_iface_ref for managing the reference counts of the each USB interface that is used by each endpoint. The interface setup is performed only once for the (sharing) endpoints, and the doubly initialization is avoided. Along with this, the resource release of endpoints and interface refcounts are put into a single function, snd_usb_endpoint_free_all() instead of looping in the caller side. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Link: https://lore.kernel.org/r/20210108075219.21463-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/card.c | 5 ++- sound/usb/card.h | 2 ++ sound/usb/endpoint.c | 82 ++++++++++++++++++++++++++++++++++++++------ sound/usb/endpoint.h | 2 +- sound/usb/usbaudio.h | 1 + 5 files changed, 77 insertions(+), 15 deletions(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index d731ca62d5994..e08fbf8e3ee0f 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -450,10 +450,8 @@ lookup_device_name(u32 id) static void snd_usb_audio_free(struct snd_card *card) { struct snd_usb_audio *chip = card->private_data; - struct snd_usb_endpoint *ep, *n; - list_for_each_entry_safe(ep, n, &chip->ep_list, list) - snd_usb_endpoint_free(ep); + snd_usb_endpoint_free_all(chip); mutex_destroy(&chip->mutex); if (!atomic_read(&chip->shutdown)) @@ -611,6 +609,7 @@ static int snd_usb_audio_create(struct usb_interface *intf, chip->usb_id = usb_id; INIT_LIST_HEAD(&chip->pcm_list); INIT_LIST_HEAD(&chip->ep_list); + INIT_LIST_HEAD(&chip->iface_ref_list); INIT_LIST_HEAD(&chip->midi_list); INIT_LIST_HEAD(&chip->mixer_list); diff --git a/sound/usb/card.h b/sound/usb/card.h index 6a027c349194a..de0d2aa883fa2 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -42,6 +42,7 @@ struct audioformat { }; struct snd_usb_substream; +struct snd_usb_iface_ref; struct snd_usb_endpoint; struct snd_usb_power_domain; @@ -58,6 +59,7 @@ struct snd_urb_ctx { struct snd_usb_endpoint { struct snd_usb_audio *chip; + struct snd_usb_iface_ref *iface_ref; int opened; /* open refcount; protect with chip->mutex */ atomic_t running; /* running status */ diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 162da7a500463..ae6276aded91f 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -24,6 +24,14 @@ #define EP_FLAG_RUNNING 1 #define EP_FLAG_STOPPING 2 +/* interface refcounting */ +struct snd_usb_iface_ref { + unsigned char iface; + bool need_setup; + int opened; + struct list_head list; +}; + /* * snd_usb_endpoint is a model that abstracts everything related to an * USB endpoint and its streaming. @@ -488,6 +496,28 @@ exit_clear: clear_bit(ctx->index, &ep->active_mask); } +/* + * Find or create a refcount object for the given interface + * + * The objects are released altogether in snd_usb_endpoint_free_all() + */ +static struct snd_usb_iface_ref * +iface_ref_find(struct snd_usb_audio *chip, int iface) +{ + struct snd_usb_iface_ref *ip; + + list_for_each_entry(ip, &chip->iface_ref_list, list) + if (ip->iface == iface) + return ip; + + ip = kzalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) + return NULL; + ip->iface = iface; + list_add_tail(&ip->list, &chip->iface_ref_list); + return ip; +} + /* * Get the existing endpoint object corresponding EP * Returns NULL if not present. @@ -520,8 +550,8 @@ snd_usb_get_endpoint(struct snd_usb_audio *chip, int ep_num) * * Returns zero on success or a negative error code. * - * New endpoints will be added to chip->ep_list and must be freed by - * calling snd_usb_endpoint_free(). + * New endpoints will be added to chip->ep_list and freed by + * calling snd_usb_endpoint_free_all(). * * For SND_USB_ENDPOINT_TYPE_SYNC, the caller needs to guarantee that * bNumEndpoints > 1 beforehand. @@ -658,6 +688,12 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, usb_audio_dbg(chip, "Open EP 0x%x, iface=%d:%d, idx=%d\n", ep_num, ep->iface, ep->altsetting, ep->ep_idx); + ep->iface_ref = iface_ref_find(chip, ep->iface); + if (!ep->iface_ref) { + ep = NULL; + goto unlock; + } + ep->cur_audiofmt = fp; ep->cur_channels = fp->channels; ep->cur_rate = params_rate(params); @@ -681,6 +717,11 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, ep->implicit_fb_sync); } else { + if (WARN_ON(!ep->iface_ref)) { + ep = NULL; + goto unlock; + } + if (!endpoint_compatible(ep, fp, params)) { usb_audio_err(chip, "Incompatible EP setup for 0x%x\n", ep_num); @@ -692,6 +733,9 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, ep_num, ep->opened); } + if (!ep->iface_ref->opened++) + ep->iface_ref->need_setup = true; + ep->opened++; unlock: @@ -760,12 +804,16 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip, mutex_lock(&chip->mutex); usb_audio_dbg(chip, "Closing EP 0x%x (count %d)\n", ep->ep_num, ep->opened); - if (!--ep->opened) { + + if (!--ep->iface_ref->opened) endpoint_set_interface(chip, ep, false); + + if (!--ep->opened) { ep->iface = 0; ep->altsetting = 0; ep->cur_audiofmt = NULL; ep->cur_rate = 0; + ep->iface_ref = NULL; usb_audio_dbg(chip, "EP 0x%x closed\n", ep->ep_num); } mutex_unlock(&chip->mutex); @@ -775,6 +823,8 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip, void snd_usb_endpoint_suspend(struct snd_usb_endpoint *ep) { ep->need_setup = true; + if (ep->iface_ref) + ep->iface_ref->need_setup = true; } /* @@ -1195,11 +1245,13 @@ int snd_usb_endpoint_configure(struct snd_usb_audio *chip, int err = 0; mutex_lock(&chip->mutex); + if (WARN_ON(!ep->iface_ref)) + goto unlock; if (!ep->need_setup) goto unlock; - /* No need to (re-)configure the sync EP belonging to the same altset */ - if (ep->ep_idx) { + /* If the interface has been already set up, just set EP parameters */ + if (!ep->iface_ref->need_setup) { err = snd_usb_endpoint_set_params(chip, ep); if (err < 0) goto unlock; @@ -1242,6 +1294,8 @@ int snd_usb_endpoint_configure(struct snd_usb_audio *chip, goto unlock; } + ep->iface_ref->need_setup = false; + done: ep->need_setup = false; err = 1; @@ -1387,15 +1441,21 @@ void snd_usb_endpoint_release(struct snd_usb_endpoint *ep) } /** - * snd_usb_endpoint_free: Free the resources of an snd_usb_endpoint + * snd_usb_endpoint_free_all: Free the resources of an snd_usb_endpoint + * @card: The chip * - * @ep: the endpoint to free - * - * This free all resources of the given ep. + * This free all endpoints and those resources */ -void snd_usb_endpoint_free(struct snd_usb_endpoint *ep) +void snd_usb_endpoint_free_all(struct snd_usb_audio *chip) { - kfree(ep); + struct snd_usb_endpoint *ep, *en; + struct snd_usb_iface_ref *ip, *in; + + list_for_each_entry_safe(ep, en, &chip->ep_list, list) + kfree(ep); + + list_for_each_entry_safe(ip, in, &chip->iface_ref_list, list) + kfree(ip); } /* diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index 11e3bb839fd7e..eea4ca49876d6 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -42,7 +42,7 @@ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep); void snd_usb_endpoint_suspend(struct snd_usb_endpoint *ep); int snd_usb_endpoint_activate(struct snd_usb_endpoint *ep); void snd_usb_endpoint_release(struct snd_usb_endpoint *ep); -void snd_usb_endpoint_free(struct snd_usb_endpoint *ep); +void snd_usb_endpoint_free_all(struct snd_usb_audio *chip); int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep); int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 980287aadd361..215c1771dd570 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -44,6 +44,7 @@ struct snd_usb_audio { struct list_head pcm_list; /* list of pcm streams */ struct list_head ep_list; /* list of audio-related endpoints */ + struct list_head iface_ref_list; /* list of interface refcounts */ int pcm_devs; struct list_head midi_list; /* list of midi interfaces */ -- GitLab From eae4d054f909d9e9589d0940f9b5b0cd68de1e2e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:18 +0100 Subject: [PATCH 1628/1894] ALSA: usb-audio: Annotate the endpoint index in audioformat There are devices that have multiple endpoints sharing the same iface/altset not only for sync but also for the actual streams, and the audioformat for such an endpoint needs to be handled with the proper endpoint index; otherwise it confuses the endpoint management. This patch extends the audioformat to annotate the endpoint index, and put the proper ep_idx=1 to Pioneer device quirk entries accordingly. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Link: https://lore.kernel.org/r/20210108075219.21463-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/card.h | 1 + sound/usb/endpoint.c | 2 +- sound/usb/quirks-table.h | 6 ++++++ sound/usb/quirks.c | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/usb/card.h b/sound/usb/card.h index de0d2aa883fa2..37091b1176143 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -18,6 +18,7 @@ struct audioformat { unsigned int frame_size; /* samples per frame for non-audio */ unsigned char iface; /* interface number */ unsigned char altsetting; /* corresponding alternate setting */ + unsigned char ep_idx; /* endpoint array index */ unsigned char altset_idx; /* array index of altenate setting */ unsigned char attributes; /* corresponding attributes of cs endpoint */ unsigned char endpoint; /* endpoint */ diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index ae6276aded91f..fe73fe3ff2bca 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -683,7 +683,7 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, } else { ep->iface = fp->iface; ep->altsetting = fp->altsetting; - ep->ep_idx = 0; + ep->ep_idx = fp->ep_idx; } usb_audio_dbg(chip, "Open EP 0x%x, iface=%d:%d, idx=%d\n", ep_num, ep->iface, ep->altsetting, ep->ep_idx); diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 0e11cb96fa8cf..c8a4bdf18207c 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3362,6 +3362,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x86, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3450,6 +3451,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3506,6 +3508,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3562,6 +3565,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3619,6 +3623,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3679,6 +3684,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index b70e2ebc3e29e..89e172642d98b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -194,7 +194,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, } alts = &iface->altsetting[fp->altset_idx]; altsd = get_iface_desc(alts); - if (altsd->bNumEndpoints < 1) { + if (altsd->bNumEndpoints <= fp->ep_idx) { err = -EINVAL; goto error; } @@ -204,7 +204,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, if (fp->datainterval == 0) fp->datainterval = snd_usb_parse_datainterval(chip, alts); if (fp->maxpacksize == 0) - fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); + fp->maxpacksize = le16_to_cpu(get_endpoint(alts, fp->ep_idx)->wMaxPacketSize); if (!fp->fmt_type) fp->fmt_type = UAC_FORMAT_TYPE_I; -- GitLab From 167c9dc84ec384c0940359e067301883ad2b42a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:19 +0100 Subject: [PATCH 1629/1894] ALSA: usb-audio: Fix implicit feedback sync setup for Pioneer devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pioneer devices have both playback and capture streams sharing the same iface/altsetting, and those need to be paired as implicit feedback. Instead of a half-baked (and broken) static quirk entry, set up more generically for those devices by checking the number of endpoints and the attribute of the secondary EP. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Reported-by: František Kučera Link: https://lore.kernel.org/r/20210108075219.21463-6-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 48 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 9724efe1cdce8..1ac2cc6c33fb1 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -58,8 +58,6 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x0499, 0x172f, 0x81, 2), /* Steinberg UR22C */ IMPLICIT_FB_FIXED_DEV(0x0d9a, 0x00df, 0x81, 2), /* RTX6001 */ IMPLICIT_FB_FIXED_DEV(0x22f0, 0x0006, 0x81, 3), /* Allen&Heath Qu-16 */ - IMPLICIT_FB_FIXED_DEV(0x2b73, 0x000a, 0x82, 0), /* Pioneer DJ DJM-900NXS2 */ - IMPLICIT_FB_FIXED_DEV(0x2b73, 0x0017, 0x82, 0), /* Pioneer DJ DJM-250MK2 */ IMPLICIT_FB_FIXED_DEV(0x1686, 0xf029, 0x82, 2), /* Zoom UAC-2 */ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8003, 0x86, 2), /* Fractal Audio Axe-Fx II */ IMPLICIT_FB_FIXED_DEV(0x0499, 0x172a, 0x86, 2), /* Yamaha MODX */ @@ -100,7 +98,7 @@ static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = { /* set up sync EP information on the audioformat */ static int add_implicit_fb_sync_ep(struct snd_usb_audio *chip, struct audioformat *fmt, - int ep, int ifnum, + int ep, int ep_idx, int ifnum, const struct usb_host_interface *alts) { struct usb_interface *iface; @@ -115,7 +113,7 @@ static int add_implicit_fb_sync_ep(struct snd_usb_audio *chip, fmt->sync_ep = ep; fmt->sync_iface = ifnum; fmt->sync_altsetting = alts->desc.bAlternateSetting; - fmt->sync_ep_idx = 0; + fmt->sync_ep_idx = ep_idx; fmt->implicit_fb = 1; usb_audio_dbg(chip, "%d:%d: added %s implicit_fb sync_ep %x, iface %d:%d\n", @@ -147,7 +145,7 @@ static int add_generic_uac2_implicit_fb(struct snd_usb_audio *chip, (epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != USB_ENDPOINT_USAGE_IMPLICIT_FB) return 0; - return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 0, ifnum, alts); } @@ -173,10 +171,32 @@ static int add_roland_implicit_fb(struct snd_usb_audio *chip, (epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != USB_ENDPOINT_USAGE_IMPLICIT_FB) return 0; - return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 0, ifnum, alts); } +/* Pioneer devices: playback and capture streams sharing the same iface/altset + */ +static int add_pioneer_implicit_fb(struct snd_usb_audio *chip, + struct audioformat *fmt, + struct usb_host_interface *alts) +{ + struct usb_endpoint_descriptor *epd; + + if (alts->desc.bNumEndpoints != 2) + return 0; + + epd = get_endpoint(alts, 1); + if (!usb_endpoint_is_isoc_in(epd) || + (epd->bmAttributes & USB_ENDPOINT_SYNCTYPE) != USB_ENDPOINT_SYNC_ASYNC || + ((epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != + USB_ENDPOINT_USAGE_DATA && + (epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != + USB_ENDPOINT_USAGE_IMPLICIT_FB)) + return 0; + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 1, + alts->desc.bInterfaceNumber, alts); +} static int __add_generic_implicit_fb(struct snd_usb_audio *chip, struct audioformat *fmt, @@ -197,7 +217,7 @@ static int __add_generic_implicit_fb(struct snd_usb_audio *chip, if (!usb_endpoint_is_isoc_in(epd) || (epd->bmAttributes & USB_ENDPOINT_SYNCTYPE) != USB_ENDPOINT_SYNC_ASYNC) return 0; - return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 0, iface, alts); } @@ -250,7 +270,7 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, case IMPLICIT_FB_NONE: return 0; /* No quirk */ case IMPLICIT_FB_FIXED: - return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, + return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, 0, p->iface, NULL); } } @@ -278,6 +298,14 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, return 1; } + /* Pioneer devices implicit feedback with vendor spec class */ + if (attr == USB_ENDPOINT_SYNC_ASYNC && + alts->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC && + USB_ID_VENDOR(chip->usb_id) == 0x2b73 /* Pioneer */) { + if (add_pioneer_implicit_fb(chip, fmt, alts)) + return 1; + } + /* Try the generic implicit fb if available */ if (chip->generic_implicit_fb) return add_generic_implicit_fb(chip, fmt, alts); @@ -295,8 +323,8 @@ static int audioformat_capture_quirk(struct snd_usb_audio *chip, p = find_implicit_fb_entry(chip, capture_implicit_fb_quirks, alts); if (p && p->type == IMPLICIT_FB_FIXED) - return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, p->iface, - NULL); + return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, 0, + p->iface, NULL); return 0; } -- GitLab From afba9dc1f3a5390475006061c0bdc5ad4915878e Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Jan 2021 11:07:55 +0100 Subject: [PATCH 1630/1894] net: ipa: modem: add missing SET_NETDEV_DEV() for proper sysfs links At the moment it is quite hard to identify the network interface provided by IPA in userspace components: The network interface is created as virtual device, without any link to the IPA device. The interface name ("rmnet_ipa%d") is the only indication that the network interface belongs to IPA, but this is not very reliable. Add SET_NETDEV_DEV() to associate the network interface with the IPA parent device. This allows userspace services like ModemManager to properly identify that this network interface is provided by IPA and belongs to the modem. Cc: Alex Elder Fixes: a646d6ec9098 ("soc: qcom: ipa: modem and microcontroller") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20210106100755.56800-1-stephan@gerhold.net Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_modem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index e34fe2d77324e..9b08eb8239846 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -216,6 +216,7 @@ int ipa_modem_start(struct ipa *ipa) ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; + SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; -- GitLab From 53475c5dd856212e91538a9501162e821cc1f791 Mon Sep 17 00:00:00 2001 From: Dongseok Yi Date: Fri, 8 Jan 2021 11:28:38 +0900 Subject: [PATCH 1631/1894] net: fix use-after-free when UDP GRO with shared fraglist skbs in fraglist could be shared by a BPF filter loaded at TC. If TC writes, it will call skb_ensure_writable -> pskb_expand_head to create a private linear section for the head_skb. And then call skb_clone_fraglist -> skb_get on each skb in the fraglist. skb_segment_list overwrites part of the skb linear section of each fragment itself. Even after skb_clone, the frag_skbs share their linear section with their clone in PF_PACKET. Both sk_receive_queue of PF_PACKET and PF_INET (or PF_INET6) can have a link for the same frag_skbs chain. If a new skb (not frags) is queued to one of the sk_receive_queue, multiple ptypes can see and release this. It causes use-after-free. [ 4443.426215] ------------[ cut here ]------------ [ 4443.426222] refcount_t: underflow; use-after-free. [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190 refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426726] pstate: 60400005 (nZCv daif +PAN -UAO) [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8 [ 4443.426808] Call trace: [ 4443.426813] refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426823] skb_release_data+0x144/0x264 [ 4443.426828] kfree_skb+0x58/0xc4 [ 4443.426832] skb_queue_purge+0x64/0x9c [ 4443.426844] packet_set_ring+0x5f0/0x820 [ 4443.426849] packet_setsockopt+0x5a4/0xcd0 [ 4443.426853] __sys_setsockopt+0x188/0x278 [ 4443.426858] __arm64_sys_setsockopt+0x28/0x38 [ 4443.426869] el0_svc_common+0xf0/0x1d0 [ 4443.426873] el0_svc_handler+0x74/0x98 [ 4443.426880] el0_svc+0x8/0xc Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.) Signed-off-by: Dongseok Yi Acked-by: Willem de Bruijn Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/1610072918-174177-1-git-send-email-dseok.yi@samsung.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f62cae3f75d87..b6f2b520a9b74 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; - struct sk_buff *nskb; + struct sk_buff *nskb, *tmp; + int err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, nskb = list_skb; list_skb = list_skb->next; + err = 0; + if (skb_shared(nskb)) { + tmp = skb_clone(nskb, GFP_ATOMIC); + if (tmp) { + consume_skb(nskb); + nskb = tmp; + err = skb_unclone(nskb, GFP_ATOMIC); + } else { + err = -ENOMEM; + } + } + if (!tail) skb->next = nskb; else tail->next = nskb; + if (unlikely(err)) { + nskb->next = list_skb; + goto err_linearize; + } + tail = nskb; delta_len += nskb->len; -- GitLab From fd2ddef043592e7de80af53f47fa46fd3573086e Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Thu, 7 Jan 2021 16:11:10 +1100 Subject: [PATCH 1632/1894] udp: Prevent reuseport_select_sock from reading uninitialized socks reuse->socks[] is modified concurrently by reuseport_add_sock. To prevent reading values that have not been fully initialized, only read the array up until the last known safe index instead of incorrectly re-reading the last index of the array. Fixes: acdcecc61285f ("udp: correct reuseport selection with connected sockets") Signed-off-by: Baptiste Lepers Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20210107051110.12247-1-baptiste.lepers@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sock_reuseport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index bbdd3c7b6cb5b..b065f0a103ed0 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -293,7 +293,7 @@ select_by_hash: i = j = reciprocal_scale(hash, socks); while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { i++; - if (i >= reuse->num_socks) + if (i >= socks) i = 0; if (i == j) goto out; -- GitLab From c1787ffd0d24eb93eefac2dbba0eac5700da9ff1 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 7 Jan 2021 18:13:15 +0000 Subject: [PATCH 1633/1894] ppp: fix refcount underflow on channel unbridge When setting up a channel bridge, ppp_bridge_channels sets the pch->bridge field before taking the associated reference on the bridge file instance. This opens up a refcount underflow bug if ppp_bridge_channels called via. iotcl runs concurrently with ppp_unbridge_channels executing via. file release. The bug is triggered by ppp_bridge_channels taking the error path through the 'err_unset' label. In this scenario, pch->bridge is set, but the reference on the bridged channel will not be taken because the function errors out. If ppp_unbridge_channels observes pch->bridge before it is unset by the error path, it will erroneously drop the reference on the bridged channel and cause a refcount underflow. To avoid this, ensure that ppp_bridge_channels holds a reference on each channel in advance of setting the bridge pointers. Signed-off-by: Tom Parkin Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls") Acked-by: Guillaume Nault Link: https://lore.kernel.org/r/20210107181315.3128-1-tparkin@katalix.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 09c27f7773f95..d445ecb1d0c75 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -623,6 +623,7 @@ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb) write_unlock_bh(&pch->upl); return -EALREADY; } + refcount_inc(&pchb->file.refcnt); rcu_assign_pointer(pch->bridge, pchb); write_unlock_bh(&pch->upl); @@ -632,19 +633,24 @@ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb) write_unlock_bh(&pchb->upl); goto err_unset; } + refcount_inc(&pch->file.refcnt); rcu_assign_pointer(pchb->bridge, pch); write_unlock_bh(&pchb->upl); - refcount_inc(&pch->file.refcnt); - refcount_inc(&pchb->file.refcnt); - return 0; err_unset: write_lock_bh(&pch->upl); + /* Re-read pch->bridge with upl held in case it was modified concurrently */ + pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl)); RCU_INIT_POINTER(pch->bridge, NULL); write_unlock_bh(&pch->upl); synchronize_rcu(); + + if (pchb) + if (refcount_dec_and_test(&pchb->file.refcnt)) + ppp_destroy_channel(pchb); + return -EALREADY; } -- GitLab From 2b446e650b418f9a9e75f99852e2f2560cabfa17 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:05 -0800 Subject: [PATCH 1634/1894] docs: net: explain struct net_device lifetime Explain the two basic flows of struct net_device's operation. Signed-off-by: Jakub Kicinski --- Documentation/networking/netdevices.rst | 171 +++++++++++++++++++++++- net/core/rtnetlink.c | 2 +- 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index e65665c5ab501..17bdcb746dcf5 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -10,18 +10,177 @@ Introduction The following is a random collection of documentation regarding network devices. -struct net_device allocation rules -================================== +struct net_device lifetime rules +================================ Network device structures need to persist even after module is unloaded and must be allocated with alloc_netdev_mqs() and friends. If device has registered successfully, it will be freed on last use -by free_netdev(). This is required to handle the pathologic case cleanly -(example: rmmod mydriver needs_free_netdev = true; + } + + static void my_destructor(struct net_device *dev) + { + some_obj_destroy(priv->obj); + some_uninit(priv); + } + + int create_link() + { + struct my_device_priv *priv; + int err; + + ASSERT_RTNL(); + + dev = alloc_netdev(sizeof(*priv), "net%d", NET_NAME_UNKNOWN, my_setup); + if (!dev) + return -ENOMEM; + priv = netdev_priv(dev); + + /* Implicit constructor */ + err = some_init(priv); + if (err) + goto err_free_dev; + + priv->obj = some_obj_create(); + if (!priv->obj) { + err = -ENOMEM; + goto err_some_uninit; + } + /* End of constructor, set the destructor: */ + dev->priv_destructor = my_destructor; + + err = register_netdevice(dev); + if (err) + /* register_netdevice() calls destructor on failure */ + goto err_free_dev; + + /* If anything fails now unregister_netdevice() (or unregister_netdev()) + * will take care of calling my_destructor and free_netdev(). + */ + + return 0; + + err_some_uninit: + some_uninit(priv); + err_free_dev: + free_netdev(dev); + return err; + } + +If struct net_device.priv_destructor is set it will be called by the core +some time after unregister_netdevice(), it will also be called if +register_netdevice() fails. The callback may be invoked with or without +``rtnl_lock`` held. + +There is no explicit constructor callback, driver "constructs" the private +netdev state after allocating it and before registration. + +Setting struct net_device.needs_free_netdev makes core call free_netdevice() +automatically after unregister_netdevice() when all references to the device +are gone. It only takes effect after a successful call to register_netdevice() +so if register_netdevice() fails driver is responsible for calling +free_netdev(). + +free_netdev() is safe to call on error paths right after unregister_netdevice() +or when register_netdevice() fails. Parts of netdev (de)registration process +happen after ``rtnl_lock`` is released, therefore in those cases free_netdev() +will defer some of the processing until ``rtnl_lock`` is released. + +Devices spawned from struct rtnl_link_ops should never free the +struct net_device directly. + +.ndo_init and .ndo_uninit +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``.ndo_init`` and ``.ndo_uninit`` callbacks are called during net_device +registration and de-registration, under ``rtnl_lock``. Drivers can use +those e.g. when parts of their init process need to run under ``rtnl_lock``. + +``.ndo_init`` runs before device is visible in the system, ``.ndo_uninit`` +runs during de-registering after device is closed but other subsystems +may still have outstanding references to the netdevice. MTU === diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bb0596c41b3ef..79f514afb17d0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3441,7 +3441,7 @@ replay: if (ops->newlink) { err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should call free_netdev() in ->destructor + /* Drivers should set dev->needs_free_netdev * and unregister it on failure after registration * so that device could be finally freed in rtnl_unlock. */ -- GitLab From c269a24ce057abfc31130960e96ab197ef6ab196 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:06 -0800 Subject: [PATCH 1635/1894] net: make free_netdev() more lenient with unregistering devices There are two flavors of handling netdev registration: - ones called without holding rtnl_lock: register_netdev() and unregister_netdev(); and - those called with rtnl_lock held: register_netdevice() and unregister_netdevice(). While the semantics of the former are pretty clear, the same can't be said about the latter. The netdev_todo mechanism is utilized to perform some of the device unregistering tasks and it hooks into rtnl_unlock() so the locked variants can't actually finish the work. In general free_netdev() does not mix well with locked calls. Most drivers operating under rtnl_lock set dev->needs_free_netdev to true and expect core to make the free_netdev() call some time later. The part where this becomes most problematic is error paths. There is no way to unwind the state cleanly after a call to register_netdevice(), since unreg can't be performed fully without dropping locks. Make free_netdev() more lenient, and defer the freeing if device is being unregistered. This allows error paths to simply call free_netdev() both after register_netdevice() failed, and after a call to unregister_netdevice() but before dropping rtnl_lock. Simplify the error paths which are currently doing gymnastics around free_netdev() handling. Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 4 +--- net/core/dev.c | 11 +++++++++++ net/core/rtnetlink.c | 23 ++++++----------------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 15bbfaf943fd1..8b644113715e9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -284,9 +284,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - if (new_dev->reg_state == NETREG_UNINITIALIZED || - new_dev->reg_state == NETREG_UNREGISTERED) - free_netdev(new_dev); + free_netdev(new_dev); return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 8fa739259041a..adde93cbca9f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10631,6 +10631,17 @@ void free_netdev(struct net_device *dev) struct napi_struct *p, *n; might_sleep(); + + /* When called immediately after register_netdevice() failed the unwind + * handling may still be dismantling the device. Handle that case by + * deferring the free. + */ + if (dev->reg_state == NETREG_UNREGISTERING) { + ASSERT_RTNL(); + dev->needs_free_netdev = true; + return; + } + netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 79f514afb17d0..3d6ab194d0f58 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3439,26 +3439,15 @@ replay: dev->ifindex = ifm->ifi_index; - if (ops->newlink) { + if (ops->newlink) err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should set dev->needs_free_netdev - * and unregister it on failure after registration - * so that device could be finally freed in rtnl_unlock. - */ - if (err < 0) { - /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED || - dev->reg_state == NETREG_UNREGISTERED) - free_netdev(dev); - goto out; - } - } else { + else err = register_netdevice(dev); - if (err < 0) { - free_netdev(dev); - goto out; - } + if (err < 0) { + free_netdev(dev); + goto out; } + err = rtnl_configure_link(dev, ifm); if (err < 0) goto out_unregister; -- GitLab From 766b0515d5bec4b780750773ed3009b148df8c0a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:07 -0800 Subject: [PATCH 1636/1894] net: make sure devices go through netdev_wait_all_refs If register_netdevice() fails at the very last stage - the notifier call - some subsystems may have already seen it and grabbed a reference. struct net_device can't be freed right away without calling netdev_wait_all_refs(). Now that we have a clean interface in form of dev->needs_free_netdev and lenient free_netdev() we can undo what commit 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev failure.") has done and complete the unregistration path by bringing the net_set_todo() call back. After registration fails user is still expected to explicitly free the net_device, so make sure ->needs_free_netdev is cleared, otherwise rolling back the registration will cause the old double free for callers who release rtnl_lock before the free. This also solves the problem of priv_destructor not being called on notifier error. net_set_todo() will be moved back into unregister_netdevice_queue() in a follow up. Reported-by: Hulk Robot Reported-by: Yang Yingliang Signed-off-by: Jakub Kicinski --- net/core/dev.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index adde93cbca9f0..0071a11a6dc3c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10077,17 +10077,11 @@ int register_netdevice(struct net_device *dev) ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); if (ret) { + /* Expect explicit free_netdev() on failure */ + dev->needs_free_netdev = false; rollback_registered(dev); - rcu_barrier(); - - dev->reg_state = NETREG_UNREGISTERED; - /* We should put the kobject that hold in - * netdev_unregister_kobject(), otherwise - * the net device cannot be freed when - * driver calls free_netdev(), because the - * kobject is being hold. - */ - kobject_put(&dev->dev.kobj); + net_set_todo(dev); + goto out; } /* * Prevent userspace races by waiting until the network -- GitLab From e80927079fd97b4d5457e3af2400a0087b561564 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Mon, 4 Jan 2021 15:41:18 +0800 Subject: [PATCH 1637/1894] bcache: set pdev_set_uuid before scond loop iteration There is no need to reassign pdev_set_uuid in the second loop iteration, so move it to the place before second loop. Signed-off-by: Yi Li Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a4752ac410dc4..6aa23a6fb394d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2644,8 +2644,8 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, } list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { + char *pdev_set_uuid = pdev->dc->sb.set_uuid; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { - char *pdev_set_uuid = pdev->dc->sb.set_uuid; char *set_uuid = c->set_uuid; if (!memcmp(pdev_set_uuid, set_uuid, 16)) { -- GitLab From f7b4943dea48a572ad751ce1f18a245d43debe7e Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:19 +0800 Subject: [PATCH 1638/1894] bcache: fix typo from SUUP to SUPP in features.h This patch fixes the following typos, from BCH_FEATURE_COMPAT_SUUP to BCH_FEATURE_COMPAT_SUPP from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_INCOMPAT_SUPP from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_RO_COMPAT_SUPP Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index a1653c4780416..32c5bbda2f0dc 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -15,9 +15,9 @@ /* Incompat feature set */ #define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ -#define BCH_FEATURE_COMPAT_SUUP 0 -#define BCH_FEATURE_RO_COMPAT_SUUP 0 -#define BCH_FEATURE_INCOMPAT_SUUP BCH_FEATURE_INCOMPAT_LARGE_BUCKET +#define BCH_FEATURE_COMPAT_SUPP 0 +#define BCH_FEATURE_RO_COMPAT_SUPP 0 +#define BCH_FEATURE_INCOMPAT_SUPP BCH_FEATURE_INCOMPAT_LARGE_BUCKET #define BCH_HAS_COMPAT_FEATURE(sb, mask) \ ((sb)->feature_compat & (mask)) -- GitLab From 1dfc0686c29a9bbd3a446a29f9ccde3dec3bc75a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:20 +0800 Subject: [PATCH 1639/1894] bcache: check unsupported feature sets for bcache register This patch adds the check for features which is incompatible for current supported feature sets. Now if the bcache device created by bcache-tools has features that current kernel doesn't support, read_super() will fail with error messoage. E.g. if an unsupported incompatible feature detected, bcache register will fail with dmesg "bcache: register_bcache() error : Unsupported incompatible feature found". Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 15 +++++++++++++++ drivers/md/bcache/super.c | 14 ++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index 32c5bbda2f0dc..e73724c2b49b8 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -79,6 +79,21 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); +static inline bool bch_has_unknown_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_compat & ~BCH_FEATURE_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_ro_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_ro_compat & ~BCH_FEATURE_RO_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_incompat_features(struct cache_sb *sb) +{ + return ((sb->feature_incompat & ~BCH_FEATURE_INCOMPAT_SUPP) != 0); +} + int bch_print_cache_set_feature_compat(struct cache_set *c, char *buf, int size); int bch_print_cache_set_feature_ro_compat(struct cache_set *c, char *buf, int size); int bch_print_cache_set_feature_incompat(struct cache_set *c, char *buf, int size); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6aa23a6fb394d..f4674a3298af5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -228,6 +228,20 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, sb->feature_compat = le64_to_cpu(s->feature_compat); sb->feature_incompat = le64_to_cpu(s->feature_incompat); sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat); + + /* Check incompatible features */ + err = "Unsupported compatible feature found"; + if (bch_has_unknown_compat_features(sb)) + goto err; + + err = "Unsupported read-only compatible feature found"; + if (bch_has_unknown_ro_compat_features(sb)) + goto err; + + err = "Unsupported incompatible feature found"; + if (bch_has_unknown_incompat_features(sb)) + goto err; + err = read_super_common(sb, bdev, s); if (err) goto err; -- GitLab From b16671e8f493e3df40b1fb0dff4078f391c5099a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:21 +0800 Subject: [PATCH 1640/1894] bcache: introduce BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE for large bucket When large bucket feature was added, BCH_FEATURE_INCOMPAT_LARGE_BUCKET was introduced into the incompat feature set. It used bucket_size_hi (which was added at the tail of struct cache_sb_disk) to extend current 16bit bucket size to 32bit with existing bucket_size in struct cache_sb_disk. This is not a good idea, there are two obvious problems, - Bucket size is always value power of 2, if store log2(bucket size) in existing bucket_size of struct cache_sb_disk, it is unnecessary to add bucket_size_hi. - Macro csum_set() assumes d[SB_JOURNAL_BUCKETS] is the last member in struct cache_sb_disk, bucket_size_hi was added after d[] which makes csum_set calculate an unexpected super block checksum. To fix the above problems, this patch introduces a new incompat feature bit BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, when this bit is set, it means bucket_size in struct cache_sb_disk stores the order of power-of-2 bucket size value. When user specifies a bucket size larger than 32768 sectors, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE will be set to incompat feature set, and bucket_size stores log2(bucket size) more than store the real bucket size value. The obsoleted BCH_FEATURE_INCOMPAT_LARGE_BUCKET won't be used anymore, it is renamed to BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET and still only recognized by kernel driver for legacy compatible purpose. The previous bucket_size_hi is renmaed to obso_bucket_size_hi in struct cache_sb_disk and not used in bcache-tools anymore. For cache device created with BCH_FEATURE_INCOMPAT_LARGE_BUCKET feature, bcache-tools and kernel driver still recognize the feature string and display it as "obso_large_bucket". With this change, the unnecessary extra space extend of bcache on-disk super block can be avoided, and csum_set() may generate expected check sum as well. Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.c | 2 +- drivers/md/bcache/features.h | 11 ++++++++--- drivers/md/bcache/super.c | 22 +++++++++++++++++++--- include/uapi/linux/bcache.h | 2 +- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c index 6469223f0b777..d636b7b2d070c 100644 --- a/drivers/md/bcache/features.c +++ b/drivers/md/bcache/features.c @@ -17,7 +17,7 @@ struct feature { }; static struct feature feature_list[] = { - {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET, + {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, "large_bucket"}, {0, 0, 0 }, }; diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index e73724c2b49b8..84fc2c0f01015 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -13,11 +13,15 @@ /* Feature set definition */ /* Incompat feature set */ -#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ +/* 32bit bucket size, obsoleted */ +#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET 0x0001 +/* real bucket size is (1 << bucket_size) */ +#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE 0x0002 #define BCH_FEATURE_COMPAT_SUPP 0 #define BCH_FEATURE_RO_COMPAT_SUPP 0 -#define BCH_FEATURE_INCOMPAT_SUPP BCH_FEATURE_INCOMPAT_LARGE_BUCKET +#define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \ + BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE) #define BCH_HAS_COMPAT_FEATURE(sb, mask) \ ((sb)->feature_compat & (mask)) @@ -77,7 +81,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ ~BCH##_FEATURE_INCOMPAT_##flagname; \ } -BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE); static inline bool bch_has_unknown_compat_features(struct cache_sb *sb) { diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f4674a3298af5..3999641f17753 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -64,9 +64,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s { unsigned int bucket_size = le16_to_cpu(s->bucket_size); - if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES && - bch_has_feature_large_bucket(sb)) - bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16; + if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { + if (bch_has_feature_large_bucket(sb)) { + unsigned int max, order; + + max = sizeof(unsigned int) * BITS_PER_BYTE - 1; + order = le16_to_cpu(s->bucket_size); + /* + * bcache tool will make sure the overflow won't + * happen, an error message here is enough. + */ + if (order > max) + pr_err("Bucket size (1 << %u) overflows\n", + order); + bucket_size = 1 << order; + } else if (bch_has_feature_obso_large_bucket(sb)) { + bucket_size += + le16_to_cpu(s->obso_bucket_size_hi) << 16; + } + } return bucket_size; } diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 52e8bcb339811..cf7399f03b712 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -213,7 +213,7 @@ struct cache_sb_disk { __le16 keys; }; __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ - __le16 bucket_size_hi; + __le16 obso_bucket_size_hi; /* obsoleted */ }; /* -- GitLab From 5342fd4255021ef0c4ce7be52eea1c4ebda11c63 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:22 +0800 Subject: [PATCH 1641/1894] bcache: set bcache device into read-only mode for BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET If BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET is set in incompat feature set, it means the cache device is created with obsoleted layout with obso_bucket_site_hi. Now bcache does not support this feature bit, a new BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE incompat feature bit is added for a better layout to support large bucket size. For the legacy compatibility purpose, if a cache device created with obsoleted BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit, all bcache devices attached to this cache set should be set to read-only. Then the dirty data can be written back to backing device before re-create the cache device with BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE feature bit by the latest bcache-tools. This patch checks BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit when running a cache set and attach a bcache device to the cache set. If this bit is set, - When run a cache set, print an error kernel message to indicate all following attached bcache device will be read-only. - When attach a bcache device, print an error kernel message to indicate the attached bcache device will be read-only, and ask users to update to latest bcache-tools. Such change is only for cache device whose bucket size >= 32MB, this is for the zoned SSD and almost nobody uses such large bucket size at this moment. If you don't explicit set a large bucket size for a zoned SSD, such change is totally transparent to your bcache device. Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3999641f17753..2047a9cccdb5d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1332,6 +1332,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); + if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(dc->disk.disk, 1); + } + /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1554,6 +1560,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) bcache_device_link(d, c, "volume"); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(d->disk, 1); + } + return 0; err: kobject_put(&d->kobj); @@ -2113,6 +2125,9 @@ static int run_cache_set(struct cache_set *c) c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); bcache_write_super(c); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) + pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n"); + list_for_each_entry_safe(dc, t, &uncached_devices, list) bch_cached_dev_attach(dc, c, NULL); -- GitLab From 55e6ac1e1f31c7f678d9f3c8d54c6f102e5f1550 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:22 +0000 Subject: [PATCH 1642/1894] io_uring: io_rw_reissue lockdep annotations We expect io_rw_reissue() to take place only during submission with uring_lock held. Add a lockdep annotation to check that invariant. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index cb57e0360fcbb..55ba1922a3497 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2692,6 +2692,8 @@ static bool io_rw_reissue(struct io_kiocb *req, long res) if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) return false; + lockdep_assert_held(&req->ctx->uring_lock); + ret = io_sq_thread_acquire_mm_files(req->ctx, req); if (io_resubmit_prep(req, ret)) { -- GitLab From 4f793dc40bc605b97624fd36baf085b3c35e8bfd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:23 +0000 Subject: [PATCH 1643/1894] io_uring: inline io_uring_attempt_task_drop() A simple preparation change inlining io_uring_attempt_task_drop() into io_uring_flush(). Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 55ba1922a3497..1c931e7a3948f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8964,23 +8964,6 @@ static void io_uring_del_task_file(struct file *file) fput(file); } -/* - * Drop task note for this file if we're the only ones that hold it after - * pending fput() - */ -static void io_uring_attempt_task_drop(struct file *file) -{ - if (!current->io_uring) - return; - /* - * fput() is pending, will be 2 if the only other ref is our potential - * task file note. If the task is exiting, drop regardless of count. - */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || - atomic_long_read(&file->f_count) == 2) - io_uring_del_task_file(file); -} - static void io_uring_remove_task_files(struct io_uring_task *tctx) { struct file *file; @@ -9072,7 +9055,17 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { - io_uring_attempt_task_drop(file); + if (!current->io_uring) + return 0; + + /* + * fput() is pending, will be 2 if the only other ref is our potential + * task file note. If the task is exiting, drop regardless of count. + */ + if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || + atomic_long_read(&file->f_count) == 2) + io_uring_del_task_file(file); + return 0; } -- GitLab From 6b5733eb638b7068ab7cb34e663b55a1d1892d85 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:24 +0000 Subject: [PATCH 1644/1894] io_uring: add warn_once for io_uring_flush() files_cancel() should cancel all relevant requests and drop file notes, so we should never have file notes after that, including on-exit fput and flush. Add a WARN_ONCE to be sure. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1c931e7a3948f..f39671a0d84f6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9055,17 +9055,23 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { - if (!current->io_uring) + struct io_uring_task *tctx = current->io_uring; + + if (!tctx) return 0; + /* we should have cancelled and erased it before PF_EXITING */ + WARN_ON_ONCE((current->flags & PF_EXITING) && + xa_load(&tctx->xa, (unsigned long)file)); + /* * fput() is pending, will be 2 if the only other ref is our potential * task file note. If the task is exiting, drop regardless of count. */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || - atomic_long_read(&file->f_count) == 2) - io_uring_del_task_file(file); + if (atomic_long_read(&file->f_count) != 2) + return 0; + io_uring_del_task_file(file); return 0; } -- GitLab From d9d05217cb6990b9a56e13b56e7a1b71e2551f6c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:25 +0000 Subject: [PATCH 1645/1894] io_uring: stop SQPOLL submit on creator's death When the creator of SQPOLL io_uring dies (i.e. sqo_task), we don't want its internals like ->files and ->mm to be poked by the SQPOLL task, it have never been nice and recently got racy. That can happen when the owner undergoes destruction and SQPOLL tasks tries to submit new requests in parallel, and so calls io_sq_thread_acquire*(). That patch halts SQPOLL submissions when sqo_task dies by introducing sqo_dead flag. Once set, the SQPOLL task must not do any submission, which is synchronised by uring_lock as well as the new flag. The tricky part is to make sure that disabling always happens, that means either the ring is discovered by creator's do_exit() -> cancel, or if the final close() happens before it's done by the creator. The last is guaranteed by the fact that for SQPOLL the creator task and only it holds exactly one file note, so either it pins up to do_exit() or removed by the creator on the final put in flush. (see comments in uring_flush() around file->f_count == 2). One more place that can trigger io_sq_thread_acquire_*() is __io_req_task_submit(). Shoot off requests on sqo_dead there, even though actually we don't need to. That's because cancellation of sqo_task should wait for the request before going any further. note 1: io_disable_sqo_submit() does io_ring_set_wakeup_flag() so the caller would enter the ring to get an error, but it still doesn't guarantee that the flag won't be cleared. note 2: if final __userspace__ close happens not from the creator task, the file note will pin the ring until the task dies. Fixed: b1b6b5a30dce8 ("kernel/io_uring: cancel io_uring before task works") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 62 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f39671a0d84f6..2f305c097bd5b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -262,6 +262,7 @@ struct io_ring_ctx { unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; + unsigned int sqo_dead: 1; /* * Ring buffer of indices into array of io_uring_sqe, which is @@ -2160,12 +2161,11 @@ static void io_req_task_cancel(struct callback_head *cb) static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - bool fail; - fail = __io_sq_thread_acquire_mm(ctx) || - __io_sq_thread_acquire_files(ctx); mutex_lock(&ctx->uring_lock); - if (!fail) + if (!ctx->sqo_dead && + !__io_sq_thread_acquire_mm(ctx) && + !__io_sq_thread_acquire_files(ctx)) __io_queue_sqe(req, NULL); else __io_req_task_cancel(req, -EFAULT); @@ -6954,7 +6954,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, &nr_events, 0); - if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs))) + if (to_submit && !ctx->sqo_dead && + likely(!percpu_ref_is_dying(&ctx->refs))) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } @@ -8712,6 +8713,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + + if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead)) + ctx->sqo_dead = 1; + /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) @@ -8874,6 +8879,18 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } } +static void io_disable_sqo_submit(struct io_ring_ctx *ctx) +{ + WARN_ON_ONCE(ctx->sqo_task != current); + + mutex_lock(&ctx->uring_lock); + ctx->sqo_dead = 1; + mutex_unlock(&ctx->uring_lock); + + /* make sure callers enter the ring to get error */ + io_ring_set_wakeup_flag(ctx); +} + /* * We need to iteratively cancel requests, in case a request has dependent * hard links. These persist even for failure of cancelations, hence keep @@ -8885,6 +8902,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { + /* for SQPOLL only sqo_task has task notes */ + io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle); io_sq_thread_park(ctx->sq_data); @@ -9056,6 +9075,7 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { struct io_uring_task *tctx = current->io_uring; + struct io_ring_ctx *ctx = file->private_data; if (!tctx) return 0; @@ -9071,7 +9091,16 @@ static int io_uring_flush(struct file *file, void *data) if (atomic_long_read(&file->f_count) != 2) return 0; - io_uring_del_task_file(file); + if (ctx->flags & IORING_SETUP_SQPOLL) { + /* there is only one file note, which is owned by sqo_task */ + WARN_ON_ONCE((ctx->sqo_task == current) == + !xa_load(&tctx->xa, (unsigned long)file)); + + io_disable_sqo_submit(ctx); + } + + if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current) + io_uring_del_task_file(file); return 0; } @@ -9145,8 +9174,9 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, #endif /* !CONFIG_MMU */ -static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) +static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) { + int ret = 0; DEFINE_WAIT(wait); do { @@ -9155,6 +9185,11 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); + if (unlikely(ctx->sqo_dead)) { + ret = -EOWNERDEAD; + goto out; + } + if (!io_sqring_full(ctx)) break; @@ -9162,6 +9197,8 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); +out: + return ret; } static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, @@ -9235,10 +9272,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (ctx->flags & IORING_SETUP_SQPOLL) { io_cqring_overflow_flush(ctx, false, NULL, NULL); + ret = -EOWNERDEAD; + if (unlikely(ctx->sqo_dead)) + goto out; if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); - if (flags & IORING_ENTER_SQ_WAIT) - io_sqpoll_wait_sq(ctx); + if (flags & IORING_ENTER_SQ_WAIT) { + ret = io_sqpoll_wait_sq(ctx); + if (ret) + goto out; + } submitted = to_submit; } else if (to_submit) { ret = io_uring_add_task_file(ctx, f.file); @@ -9665,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: + io_disable_sqo_submit(ctx); io_ring_ctx_wait_and_kill(ctx); return ret; } -- GitLab From 6bae85bd70d063b63fbe262d943cc321eab31b17 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 8 Jan 2021 22:46:02 -0800 Subject: [PATCH 1646/1894] maintainers: update my email address Change my email contact ahead of a likely painful eleven-month migration to a certain cobalt enteprisey groupware cloud product that will totally break my workflow. Some day I may get used to having to email being sequestered behind both claret and cerulean oath2+sms 2fa layers, but for now I'll stick with keying in one password to receive an email vs. the required four. Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3d1b8fe972613..292394098e39f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9273,7 +9273,7 @@ F: drivers/net/ethernet/sgi/ioc3-eth.c IOMAP FILESYSTEM LIBRARY M: Christoph Hellwig -M: Darrick J. Wong +M: Darrick J. Wong M: linux-xfs@vger.kernel.org M: linux-fsdevel@vger.kernel.org L: linux-xfs@vger.kernel.org @@ -19504,7 +19504,7 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XFS FILESYSTEM -M: Darrick J. Wong +M: Darrick J. Wong M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org S: Supported -- GitLab From a2bc221b972db91e4be1970e776e98f16aa87904 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 7 Jan 2021 02:15:20 -0800 Subject: [PATCH 1647/1894] netxen_nic: fix MSI/MSI-x interrupts For all PCI functions on the netxen_nic adapter, interrupt mode (INTx or MSI) configuration is dependent on what has been configured by the PCI function zero in the shared interrupt register, as these adapters do not support mixed mode interrupts among the functions of a given adapter. Logic for setting MSI/MSI-x interrupt mode in the shared interrupt register based on PCI function id zero check is not appropriate for all family of netxen adapters, as for some of the netxen family adapters PCI function zero is not really meant to be probed/loaded in the host but rather just act as a management function on the device, which caused all the other PCI functions on the adapter to always use legacy interrupt (INTx) mode instead of choosing MSI/MSI-x interrupt mode. This patch replaces that check with port number so that for all type of adapters driver attempts for MSI/MSI-x interrupt modes. Fixes: b37eb210c076 ("netxen_nic: Avoid mixed mode interrupts") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Link: https://lore.kernel.org/r/20210107101520.6735-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index f21847739ef1f..d258e0ccf9465 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -564,11 +564,6 @@ static const struct net_device_ops netxen_netdev_ops = { .ndo_set_features = netxen_set_features, }; -static inline bool netxen_function_zero(struct pci_dev *pdev) -{ - return (PCI_FUNC(pdev->devfn) == 0) ? true : false; -} - static inline void netxen_set_interrupt_mode(struct netxen_adapter *adapter, u32 mode) { @@ -664,7 +659,7 @@ static int netxen_setup_intr(struct netxen_adapter *adapter) netxen_initialize_interrupt_registers(adapter); netxen_set_msix_bit(pdev, 0); - if (netxen_function_zero(pdev)) { + if (adapter->portnum == 0) { if (!netxen_setup_msi_interrupts(adapter, num_msix)) netxen_set_interrupt_mode(adapter, NETXEN_MSI_MODE); else -- GitLab From b210de4f8c97d57de051e805686248ec4c6cfc52 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 7 Jan 2021 15:50:18 +0200 Subject: [PATCH 1648/1894] net: ipv6: Validate GSO SKB before finish IPv6 processing There are cases where GSO segment's length exceeds the egress MTU: - Forwarding of a TCP GRO skb, when DF flag is not set. - Forwarding of an skb that arrived on a virtualisation interface (virtio-net/vhost/tap) with TSO/GSO size set by other network stack. - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an interface with a smaller MTU. - Arriving GRO skb (or GSO skb in a virtualised environment) that is bridged to a NETIF_F_TSO tunnel stacked over an interface with an insufficient MTU. If so: - Consume the SKB and its segments. - Issue an ICMP packet with 'Packet Too Big' message containing the MTU, allowing the source host to reduce its Path MTU appropriately. Note: These cases are handled in the same manner in IPv4 output finish. This patch aligns the behavior of IPv6 and the one of IPv4. Fixes: 9e50849054a4 ("netfilter: ipv6: move POSTROUTING invocation before fragmentation") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/1610027418-30438-1-git-send-email-ayal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 749ad72386b23..077d43af8226b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -125,8 +125,43 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return -EINVAL; } +static int +ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + struct sk_buff *segs, *nskb; + netdev_features_t features; + int ret = 0; + + /* Please see corresponding comment in ip_finish_output_gso + * describing the cases where GSO segment length exceeds the + * egress MTU. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + skb_list_walk_safe(segs, segs, nskb) { + int err; + + skb_mark_not_on_list(segs); + err = ip6_fragment(net, sk, segs, ip6_finish_output2); + if (err && ret == 0) + ret = err; + } + + return ret; +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + unsigned int mtu; + #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { @@ -135,7 +170,11 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff } #endif - if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + mtu = ip6_skb_dst_mtu(skb); + if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + if ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); -- GitLab From 0ea02c73775277001c651ad4a0e83781a9acf406 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 11 Nov 2020 19:52:16 +0800 Subject: [PATCH 1649/1894] riscv: Drop a duplicated PAGE_KERNEL_EXEC commit b91540d52a08 ("RISC-V: Add EFI runtime services") add a duplicated PAGE_KERNEL_EXEC, kill it. Signed-off-by: Kefeng Wang Reviewed-by: Pekka Enberg Reviewed-by: Atish Patra Fixes: b91540d52a08 ("RISC-V: Add EFI runtime services") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 41a72861987cc..251e1db088fa2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -99,7 +99,6 @@ | _PAGE_DIRTY) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \ -- GitLab From 3502bd9b5762154ff11665f3f18f6d7dcc6f781c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sat, 9 Jan 2021 00:37:45 +0300 Subject: [PATCH 1650/1894] selftests/tls: fix selftests after adding ChaCha20-Poly1305 TLS selftests where broken because of wrong variable types used. Fix it by changing u16 -> uint16_t Fixes: 4f336e88a870 ("selftests/tls: add CHACHA20-POLY1305 to tls selftests") Reported-by: kernel test robot Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1610141865-7142-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index cb0d1890a860f..e0088c2d38a5d 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -103,8 +103,8 @@ FIXTURE(tls) FIXTURE_VARIANT(tls) { - u16 tls_version; - u16 cipher_type; + uint16_t tls_version; + uint16_t cipher_type; }; FIXTURE_VARIANT_ADD(tls, 12_gcm) -- GitLab From b77413446408fdd256599daf00d5be72b5f3e7c6 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 8 Jan 2021 14:13:37 +0700 Subject: [PATCH 1651/1894] tipc: fix NULL deref in tipc_link_xmit() The buffer list can have zero skb as following path: tipc_named_node_up()->tipc_node_xmit()->tipc_link_xmit(), so we need to check the list before casting an &sk_buff. Fault report: [] tipc: Bulk publication failure [] general protection fault, probably for non-canonical [#1] PREEMPT [...] [] KASAN: null-ptr-deref in range [0x00000000000000c8-0x00000000000000cf] [] CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Not tainted 5.10.0-rc4+ #2 [] Hardware name: Bochs ..., BIOS Bochs 01/01/2011 [] RIP: 0010:tipc_link_xmit+0xc1/0x2180 [] Code: 24 b8 00 00 00 00 4d 39 ec 4c 0f 44 e8 e8 d7 0a 10 f9 48 [...] [] RSP: 0018:ffffc90000006ea0 EFLAGS: 00010202 [] RAX: dffffc0000000000 RBX: ffff8880224da000 RCX: 1ffff11003d3cc0d [] RDX: 0000000000000019 RSI: ffffffff886007b9 RDI: 00000000000000c8 [] RBP: ffffc90000007018 R08: 0000000000000001 R09: fffff52000000ded [] R10: 0000000000000003 R11: fffff52000000dec R12: ffffc90000007148 [] R13: 0000000000000000 R14: 0000000000000000 R15: ffffc90000007018 [] FS: 0000000000000000(0000) GS:ffff888037400000(0000) knlGS:000[...] [] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [] CR2: 00007fffd2db5000 CR3: 000000002b08f000 CR4: 00000000000006f0 Fixes: af9b028e270fd ("tipc: make media xmit call outside node spinlock context") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20210108071337.3598-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/link.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 6ae2140eb4f74..a6a694b789274 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1030,7 +1030,6 @@ void tipc_link_reset(struct tipc_link *l) int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb_peek(list)); struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff_head *transmq = &l->transmq; struct sk_buff *skb, *_skb; @@ -1038,13 +1037,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; int pkt_cnt = skb_queue_len(list); - int imp = msg_importance(hdr); unsigned int mss = tipc_link_mss(l); unsigned int cwin = l->window; unsigned int mtu = l->mtu; + struct tipc_msg *hdr; bool new_bundle; int rc = 0; + int imp; + + if (pkt_cnt <= 0) + return 0; + hdr = buf_msg(skb_peek(list)); if (unlikely(msg_size(hdr) > mtu)) { pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", skb_queue_len(list), msg_user(hdr), @@ -1053,6 +1057,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -EMSGSIZE; } + imp = msg_importance(hdr); /* Allow oversubscription of one data msg per source at congestion */ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { -- GitLab From 57726ebe2733891c9f59105eff028735f73d05fb Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Fri, 8 Jan 2021 16:52:09 +0200 Subject: [PATCH 1652/1894] mlxsw: core: Add validation of transceiver temperature thresholds Validate thresholds to avoid a single failure due to some transceiver unreliability. Ignore the last readouts in case warning temperature is above alarm temperature, since it can cause unexpected thermal shutdown. Stay with the previous values and refresh threshold within the next iteration. This is a rare scenario, but it was observed at a customer site. Fixes: 6a79507cfe94 ("mlxsw: core: Extend thermal module with per QSFP module thermal zones") Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 8fa286ccdd6bb..250a850496977 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -176,6 +176,12 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, if (err) return err; + if (crit_temp > emerg_temp) { + dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", + tz->tzdev->type, crit_temp, emerg_temp); + return 0; + } + /* According to the system thermal requirements, the thermal zones are * defined with four trip points. The critical and emergency * temperature thresholds, provided by QSFP module are set as "active" @@ -190,11 +196,8 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; - if (emerg_temp > crit_temp) - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + MLXSW_THERMAL_MODULE_TEMP_SHIFT; - else - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp; return 0; } -- GitLab From b06ca3d5a43ca2dd806f7688a17e8e7e0619a80a Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Fri, 8 Jan 2021 16:52:10 +0200 Subject: [PATCH 1653/1894] mlxsw: core: Increase critical threshold for ASIC thermal zone Increase critical threshold for ASIC thermal zone from 110C to 140C according to the system hardware requirements. All the supported ASICs (Spectrum-1, Spectrum-2, Spectrum-3) could be still operational with ASIC temperature below 140C. With the old critical threshold value system can perform unjustified shutdown. All the systems equipped with the above ASICs implement thermal protection mechanism at firmware level and firmware could decide to perform system thermal shutdown in case the temperature is below 140C. So with the new threshold system will not meltdown, while thermal operating range will be aligned with hardware abilities. Fixes: 41e760841d26 ("mlxsw: core: Replace thermal temperature trips with defines") Fixes: a50c1e35650b ("mlxsw: core: Implement thermal zone") Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 250a850496977..bf85ce9835d7f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -19,7 +19,7 @@ #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ -#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ +#define MLXSW_THERMAL_ASIC_TEMP_CRIT 140000 /* 140C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 -- GitLab From f97844f9c518172f813b7ece18a9956b1f70c1bb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 Jan 2021 16:15:16 +0100 Subject: [PATCH 1654/1894] dt-bindings: net: renesas,etheravb: RZ/G2H needs tx-internal-delay-ps The merge resolution of the interaction of commits 307eea32b202864c ("dt-bindings: net: renesas,ravb: Add support for r8a774e1 SoC") and d7adf6331189cbe9 ("dt-bindings: net: renesas,etheravb: Convert to json-schema") missed that "tx-internal-delay-ps" should be a required property on RZ/G2H. Fixes: 8b0308fe319b8002 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210105151516.1540653-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 244befb6402aa..de9dd574a2f95 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -163,6 +163,7 @@ allOf: enum: - renesas,etheravb-r8a774a1 - renesas,etheravb-r8a774b1 + - renesas,etheravb-r8a774e1 - renesas,etheravb-r8a7795 - renesas,etheravb-r8a7796 - renesas,etheravb-r8a77961 -- GitLab From fab336b42441e0b2eb1d81becedb45fbdf99606e Mon Sep 17 00:00:00 2001 From: Chen Yi Date: Tue, 5 Jan 2021 23:31:20 +0800 Subject: [PATCH 1655/1894] selftests: netfilter: Pass family parameter "-f" to conntrack tool Fix nft_conntrack_helper.sh false fail report: 1) Conntrack tool need "-f ipv6" parameter to show out ipv6 traffic items. 2) Sleep 1 second after background nc send packet, to make sure check is after this statement executed. False report: FAIL: ns1-lkjUemYw did not show attached helper ip set via ruleset PASS: ns1-lkjUemYw connection on port 2121 has ftp helper attached ... After fix: PASS: ns1-2hUniwU2 connection on port 2121 has ftp helper attached PASS: ns2-2hUniwU2 connection on port 2121 has ftp helper attached ... Fixes: 619ae8e0697a6 ("selftests: netfilter: add test case for conntrack helper assignment") Signed-off-by: Chen Yi Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_conntrack_helper.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh index edf0a48da6bf8..bf6b9626c7dd2 100755 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -94,7 +94,13 @@ check_for_helper() local message=$2 local port=$3 - ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' + if echo $message |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' if [ $? -ne 0 ] ; then echo "FAIL: ${netns} did not show attached helper $message" 1>&2 ret=1 @@ -111,8 +117,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ip $msg" $port check_for_helper "$ns2" "ip $msg" $port @@ -128,8 +134,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ipv6 $msg" $port check_for_helper "$ns2" "ipv6 $msg" $port -- GitLab From f6351c3f1c27c80535d76cac2299aec44c36291e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 8 Jan 2021 12:44:33 +0100 Subject: [PATCH 1656/1894] netfilter: conntrack: fix reading nf_conntrack_buckets The old way of changing the conntrack hashsize runtime was through changing the module param via file /sys/module/nf_conntrack/parameters/hashsize. This was extended to sysctl change in commit 3183ab8997a4 ("netfilter: conntrack: allow increasing bucket size via sysctl too"). The commit introduced second "user" variable nf_conntrack_htable_size_user which shadow actual variable nf_conntrack_htable_size. When hashsize is changed via module param this "user" variable isn't updated. This results in sysctl net/netfilter/nf_conntrack_buckets shows the wrong value when users update via the old way. This patch fix the issue by always updating "user" variable when reading the proc file. This will take care of changes to the actual variable without sysctl need to be aware. Fixes: 3183ab8997a4 ("netfilter: conntrack: allow increasing bucket size via sysctl too") Reported-by: Yoel Caspersen Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46c5557c1fecf..0ee702d374b02 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -523,6 +523,9 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write, { int ret; + /* module_param hashsize could have changed value */ + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret < 0 || !write) return ret; -- GitLab From a0adc8eabb402cfb9f32d15edd9f65f65e35cdce Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 7 Jan 2021 20:26:16 +0000 Subject: [PATCH 1657/1894] dma-buf: cma_heap: Fix memory leak in CMA heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bing Song noticed the CMA heap was leaking memory due to a flub I made in commit a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation"), and provided this fix which ensures the pagelist is also freed on release. Cc: Bing Song Cc: Sumit Semwal Cc: Liam Mark Cc: Laura Abbott Cc: Brian Starkey Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: Chris Goldsworthy Cc: Ørjan Eide Cc: Robin Murphy Cc: Ezequiel Garcia Cc: Simon Ser Cc: James Jones Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reported-by: Bing Song Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation") Signed-off-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20210107202616.75170-1-john.stultz@linaro.org --- drivers/dma-buf/heaps/cma_heap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 3c4e343011721..364fc2f3e4995 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -251,6 +251,9 @@ static void cma_heap_dma_buf_release(struct dma_buf *dmabuf) buffer->vaddr = NULL; } + /* free page list */ + kfree(buffer->pages); + /* release memory */ cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount); kfree(buffer); } -- GitLab From c98e9daa59a611ff4e163689815f40380c912415 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 5 Jan 2021 08:54:32 -0500 Subject: [PATCH 1658/1894] NFS: Adjust fs_context error logging Several existing dprink()/dfprintk() calls were converted to use the new mount API logging macros by commit ce8866f0913f ("NFS: Attach supplementary error information to fs_context"). If the fs_context was not created using fsopen() then it will not have had a log buffer allocated for it, and the new mount API logging macros will wind up calling printk(). This can result in syslog messages being logged where previously there were none... most notably "NFS4: Couldn't follow remote path", which can happen if the client is auto-negotiating a protocol version with an NFS server that doesn't support the higher v4.x versions. Convert the nfs_errorf(), nfs_invalf(), and nfs_warnf() macros to check for the existence of the fs_context's log buffer and call dprintk() if it doesn't exist. Add nfs_ferrorf(), nfs_finvalf(), and nfs_warnf(), which do the same thing but take an NFS debug flag as an argument and call dfprintk(). Finally, modify the "NFS4: Couldn't follow remote path" message to use nfs_ferrorf(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207385 Signed-off-by: Scott Mayhew Reviewed-by: Benjamin Coddington Fixes: ce8866f0913f ("NFS: Attach supplementary error information to fs_context.") Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 26 +++++++++++++++++++++++--- fs/nfs/nfs4super.c | 4 ++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b840d0a91c9d8..6bdee7ab3a6c2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -136,9 +136,29 @@ struct nfs_fs_context { } clone_data; }; -#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) -#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) -#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) +#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) + +#define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 984cc42ee54d8..d09bcfd7db894 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -227,7 +227,7 @@ int nfs4_try_get_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); @@ -250,7 +250,7 @@ int nfs4_get_referral_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); -- GitLab From 86b53fbf08f48d353a86a06aef537e78e82ba721 Mon Sep 17 00:00:00 2001 From: "j.nixdorf@avm.de" Date: Tue, 5 Jan 2021 15:17:01 +0100 Subject: [PATCH 1659/1894] net: sunrpc: interpret the return value of kstrtou32 correctly A return value of 0 means success. This is documented in lib/kstrtox.c. This was found by trying to mount an NFS share from a link-local IPv6 address with the interface specified by its index: mount("[fe80::1%1]:/srv/nfs", "/mnt", "nfs", 0, "nolock,addr=fe80::1%1") Before this commit this failed with EINVAL and also caused the following message in dmesg: [...] NFS: bad IP address specified: addr=fe80::1%1 The syscall using the same address based on the interface name instead of its index succeeds. Credits for this patch go to my colleague Christian Speich, who traced the origin of this bug to this line of code. Signed-off-by: Johannes Nixdorf Fixes: 00cfaa943ec3 ("replace strict_strto calls") Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 010dcb876f9d7..6e4dbd577a39f 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -185,7 +185,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, scope_id = dev->ifindex; dev_put(dev); } else { - if (kstrtou32(p, 10, &scope_id) == 0) { + if (kstrtou32(p, 10, &scope_id) != 0) { kfree(p); return 0; } -- GitLab From 67bbceedc9bb8ad48993a8bd6486054756d711f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 13:35:46 -0500 Subject: [PATCH 1660/1894] pNFS: Mark layout for return if return-on-close was not sent If the layout return-on-close failed because the layoutreturn was never sent, then we should mark the layout for return again. Fixes: 9c47b18cf722 ("pNFS: Ensure we do clear the return-on-close layout stateid on fatal errors") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 07f59dc8cb2e7..ccc89fab18020 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1560,12 +1560,18 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, int ret) { struct pnfs_layout_hdr *lo = args->layout; + struct inode *inode = args->inode; const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { case -NFS4ERR_NOMATCHING_LAYOUT: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) + pnfs_set_plh_return_info(lo, args->range.iomode, 0); + spin_unlock(&inode->i_lock); break; case 0: if (res->lrs_present) -- GitLab From 078000d02d57f02dde61de4901f289672e98c8bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 13:18:03 -0500 Subject: [PATCH 1661/1894] pNFS: We want return-on-close to complete when evicting the inode If the inode is being evicted, it should be safe to run return-on-close, so we should do it to ensure we don't inadvertently leak layout segments. Fixes: 1c5bd76d17cc ("pNFS: Enable layoutreturn operation for return-on-close") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 ++++++++++---------------- fs/nfs/pnfs.c | 8 +++----- fs/nfs/pnfs.h | 8 +++----- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 14acd2f791072..2f4679a62712a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3536,10 +3536,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, calldata->inode, - &calldata->arg.lr_args, - &calldata->res.lr_res, - &calldata->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &calldata->arg.lr_args, &calldata->res.lr_res, + &calldata->res.lr_ret) == -EAGAIN) goto out_restart; /* hmm. we are done with the inode, and in the process of freeing @@ -6384,10 +6382,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, data->inode, - &data->args.lr_args, - &data->res.lr_res, - &data->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &data->args.lr_args, &data->res.lr_res, + &data->res.lr_ret) == -EAGAIN) goto out_restart; switch (task->tk_status) { @@ -6441,10 +6437,10 @@ static void nfs4_delegreturn_release(void *calldata) struct nfs4_delegreturndata *data = calldata; struct inode *inode = data->inode; + if (data->lr.roc) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); if (inode) { - if (data->lr.roc) - pnfs_roc_release(&data->lr.arg, &data->lr.res, - data->res.lr_ret); nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } @@ -6520,16 +6516,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; - data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred); data->inode = nfs_igrab_and_active(inode); - if (data->inode) { + if (data->inode || issync) { + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, + cred); if (data->lr.roc) { data->args.lr_args = &data->lr.arg; data->res.lr_res = &data->lr.res; } - } else if (data->lr.roc) { - pnfs_roc_release(&data->lr.arg, &data->lr.res, 0); - data->lr.roc = false; } task_setup_data.callback_data = data; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ccc89fab18020..a18b1992b2fb3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1509,10 +1509,8 @@ out_noroc: return false; } -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret) +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret) { struct nfs4_layoutreturn_args *arg = *argpp; int retval = -EAGAIN; @@ -1545,7 +1543,7 @@ int pnfs_roc_done(struct rpc_task *task, struct inode *inode, return 0; case -NFS4ERR_OLD_STATEID: if (!nfs4_layout_refresh_old_stateid(&arg->stateid, - &arg->range, inode)) + &arg->range, arg->inode)) break; *ret = -NFS4ERR_NOMATCHING_LAYOUT; return -EAGAIN; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bbd3de1025f23..d810ae674f4e8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -297,10 +297,8 @@ bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, const struct cred *cred); -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret); +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret); void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, int ret); @@ -772,7 +770,7 @@ pnfs_roc(struct inode *ino, } static inline int -pnfs_roc_done(struct rpc_task *task, struct inode *inode, +pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, struct nfs4_layoutreturn_res **respp, int *ret) -- GitLab From c18d1e17ba2f6a1c9257b0b5d2882a6e3f772673 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 15:01:18 -0500 Subject: [PATCH 1662/1894] pNFS: Clean up pnfs_layoutreturn_free_lsegs() Remove the check for whether or not the stateid is NULL, and fix up the callers. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a18b1992b2fb3..16a37214aba99 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1152,7 +1152,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || !arg_stateid || + if (!pnfs_layout_is_valid(lo) || !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; if (stateid) { @@ -1559,7 +1559,6 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, { struct pnfs_layout_hdr *lo = args->layout; struct inode *inode = args->inode; - const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; @@ -1569,6 +1568,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, if (pnfs_layout_is_valid(lo) && nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) pnfs_set_plh_return_info(lo, args->range.iomode, 0); + pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&inode->i_lock); break; case 0: @@ -1576,11 +1576,10 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, res_stateid = &res->stateid; fallthrough; default: - arg_stateid = &args->stateid; + pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range, + res_stateid); } trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); - pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, - res_stateid); if (ld_private && ld_private->ops && ld_private->ops->free) ld_private->ops->free(ld_private); pnfs_put_layout_hdr(lo); -- GitLab From 2c8d5fc37fe2384a9bdb6965443ab9224d46f704 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jan 2021 06:43:45 -0500 Subject: [PATCH 1663/1894] pNFS: Stricter ordering of layoutget and layoutreturn If a layout return is in progress, we should wait for it to complete, in case the layout segment we are picking up gets returned too. Fixes: 30cb3ee299cb ("pNFS: Handle NFS4ERR_OLD_STATEID on layoutreturn by bumping the state seqid") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 16a37214aba99..fc13a3c8bc483 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2018,6 +2018,27 @@ lookup_again: goto lookup_again; } + /* + * Because we free lsegs when sending LAYOUTRETURN, we need to wait + * for LAYOUTRETURN. + */ + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + dprintk("%s wait for layoutreturn\n", __func__); + lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); + if (!IS_ERR(lseg)) { + pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, + lseg, + PNFS_UPDATE_LAYOUT_RETRY); + goto lookup_again; + } + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_RETURN); + goto out_put_layout_hdr; + } + lseg = pnfs_find_lseg(lo, &arg, strict_iomode); if (lseg) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, @@ -2070,28 +2091,6 @@ lookup_again: nfs4_stateid_copy(&stateid, &lo->plh_stateid); } - /* - * Because we free lsegs before sending LAYOUTRETURN, we need to wait - * for LAYOUTRETURN even if first is true. - */ - if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { - spin_unlock(&ino->i_lock); - dprintk("%s wait for layoutreturn\n", __func__); - lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); - if (!IS_ERR(lseg)) { - if (first) - pnfs_clear_first_layoutget(lo); - pnfs_put_layout_hdr(lo); - dprintk("%s retrying\n", __func__); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, - lseg, PNFS_UPDATE_LAYOUT_RETRY); - goto lookup_again; - } - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, - PNFS_UPDATE_LAYOUT_RETURN); - goto out_put_layout_hdr; - } - if (pnfs_layoutgets_blocked(lo)) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_BLOCKED); -- GitLab From 1757655d780d9d29bc4b60e708342e94924f7ef3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 11:28:30 -0500 Subject: [PATCH 1664/1894] NFS/pNFS: Don't call pnfs_free_bucket_lseg() before removing the request In pnfs_generic_clear_request_commit(), we try calling pnfs_free_bucket_lseg() before we remove the request from the DS bucket. That will always fail, since the point is to test for whether or not that bucket is empty. Fixes: c84bea59449a ("NFS/pNFS: Simplify bucket layout segment reference counting") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 2efcfdd348a11..df20bbe8d15e3 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -78,22 +78,18 @@ void pnfs_generic_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { - struct pnfs_layout_segment *freeme = NULL; + struct pnfs_commit_bucket *bucket = NULL; if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; cinfo->ds->nwritten--; - if (list_is_singular(&req->wb_list)) { - struct pnfs_commit_bucket *bucket; - + if (list_is_singular(&req->wb_list)) bucket = list_first_entry(&req->wb_list, - struct pnfs_commit_bucket, - written); - freeme = pnfs_free_bucket_lseg(bucket); - } + struct pnfs_commit_bucket, written); out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg(freeme); + if (bucket) + pnfs_put_lseg(pnfs_free_bucket_lseg(bucket)); } EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); -- GitLab From 46c9ea1d4fee4cf1f8cc6001b9c14aae61b3d502 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 11:54:57 -0500 Subject: [PATCH 1665/1894] NFS/pNFS: Don't leak DS commits in pnfs_generic_retry_commit() We must ensure that we pass a layout segment to nfs_retry_commit() when we're cleaning up after pnfs_bucket_alloc_ds_commits(). Otherwise, requests that should be committed to the DS will get committed to the MDS. Do so by ensuring that pnfs_bucket_get_committing() always tries to return a layout segment when it returns a non-empty page list. Fixes: c84bea59449a ("NFS/pNFS: Simplify bucket layout segment reference counting") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index df20bbe8d15e3..49d3389bd8130 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -403,12 +403,16 @@ pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { + struct pnfs_layout_segment *lseg; struct list_head *pos; list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, head); - return pnfs_free_bucket_lseg(bucket); + lseg = pnfs_free_bucket_lseg(bucket); + if (!lseg) + lseg = pnfs_get_lseg(bucket->lseg); + return lseg; } static struct nfs_commit_data * @@ -420,8 +424,6 @@ pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, if (!data) return NULL; data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo); - if (!data->lseg) - data->lseg = pnfs_get_lseg(bucket->lseg); return data; } -- GitLab From cb2856c5971723910a86b7d1d0cf623d6919cbc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 14:13:22 -0500 Subject: [PATCH 1666/1894] NFS/pNFS: Fix a leak of the layout 'plh_outstanding' counter If we exit _lgopen_prepare_attached() without setting a layout, we will currently leak the plh_outstanding counter. Fixes: 411ae722d10a ("pNFS: Wait for stale layoutget calls to complete in pnfs_update_layout()") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fc13a3c8bc483..4f274f21c4abc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2244,6 +2244,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, &rng, GFP_KERNEL); if (!lgp) { pnfs_clear_first_layoutget(lo); + nfs_layoutget_end(lo); pnfs_put_layout_hdr(lo); return; } -- GitLab From 5625dcfbbcf892e40e8d60abbb5f56701a1d031c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Jan 2021 17:12:08 +0530 Subject: [PATCH 1667/1894] Documentation: kbuild: Fix section reference Section 3.11 was incorrectly called 3.9, fix it. Signed-off-by: Viresh Kumar Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index d36768cf12506..9f6a118819513 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -598,7 +598,7 @@ more details, with real examples. explicitly added to $(targets). Assignments to $(targets) are without $(obj)/ prefix. if_changed may be - used in conjunction with custom rules as defined in "3.9 Custom Rules". + used in conjunction with custom rules as defined in "3.11 Custom Rules". Note: It is a typical mistake to forget the FORCE prerequisite. Another common pitfall is that whitespace is sometimes significant; for -- GitLab From 113aac6d567bda783af36d08f73bfda47d8e9a40 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Jan 2021 15:46:06 -0500 Subject: [PATCH 1668/1894] NFS: nfs_delegation_find_inode_server must first reference the superblock Before referencing the inode, we must ensure that the superblock can be referenced. Otherwise, we can end up with iput() calling superblock operations that are no longer valid or accessible. Fixes: e39d8a186ed0 ("NFSv4: Fix an Oops during delegation callbacks") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 816e1427f17eb..04bf8066980c1 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1011,22 +1011,24 @@ nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; - struct inode *freeme, *res = NULL; + struct super_block *freeme = NULL; + struct inode *res = NULL; list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { - freeme = igrab(delegation->inode); - if (freeme && nfs_sb_active(freeme->i_sb)) - res = freeme; + if (nfs_sb_active(server->super)) { + freeme = server->super; + res = igrab(delegation->inode); + } spin_unlock(&delegation->lock); if (res != NULL) return res; if (freeme) { rcu_read_unlock(); - iput(freeme); + nfs_sb_deactive(freeme); rcu_read_lock(); } return ERR_PTR(-EAGAIN); -- GitLab From 896567ee7f17a8a736cda8a28cc987228410a2ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Jan 2021 15:58:08 -0500 Subject: [PATCH 1669/1894] NFS: nfs_igrab_and_active must first reference the superblock Before referencing the inode, we must ensure that the superblock can be referenced. Otherwise, we can end up with iput() calling superblock operations that are no longer valid or accessible. Fixes: ea7c38fef0b7 ("NFSv4: Ensure we reference the inode for return-on-close in delegreturn") Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6bdee7ab3a6c2..62d3189745cdc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -599,12 +599,14 @@ extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, static inline struct inode *nfs_igrab_and_active(struct inode *inode) { - inode = igrab(inode); - if (inode != NULL && !nfs_sb_active(inode->i_sb)) { - iput(inode); - inode = NULL; + struct super_block *sb = inode->i_sb; + + if (sb && nfs_sb_active(sb)) { + if (igrab(inode)) + return inode; + nfs_sb_deactive(sb); } - return inode; + return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) -- GitLab From 7c53f6b671f4aba70ff15e1b05148b10d58c2837 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Jan 2021 14:34:50 -0800 Subject: [PATCH 1670/1894] Linux 5.11-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8b2c3f88ee5ea..9e73f82e0d863 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 869f4fdaf4ca7bb6e0d05caf6fa1108dddc346a7 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 9 Jan 2021 20:01:21 +0800 Subject: [PATCH 1671/1894] netfilter: nf_nat: Fix memleak in nf_nat_init When register_pernet_subsys() fails, nf_nat_bysource should be freed just like when nf_ct_extend_register() fails. Fixes: 1cd472bf036ca ("netfilter: nf_nat: add nat hook register functions to nf_nat") Signed-off-by: Dinghao Liu Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ea923f8cf9c42..b7c3c902290f1 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1174,6 +1174,7 @@ static int __init nf_nat_init(void) ret = register_pernet_subsys(&nat_net_ops); if (ret < 0) { nf_ct_extend_unregister(&nat_extend); + kvfree(nf_nat_bysource); return ret; } -- GitLab From 00cb645fd7e29bdd20967cd20fa8f77bcdf422f9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Nov 2020 13:40:58 +0100 Subject: [PATCH 1672/1894] drm/i915/dsi: Use unconditional msleep for the panel_on_delay when there is no reset-deassert MIPI-sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 25b4620ee822 ("drm/i915/dsi: Skip delays for v3 VBTs in vid-mode") added an intel_dsi_msleep() helper which skips sleeping if the MIPI-sequences have a version of 3 or newer and the panel is in vid-mode; and it moved a bunch of msleep-s over to this new helper. This was based on my reading of the big comment around line 730 which starts with "Panel enable/disable sequences from the VBT spec.", where the "v3 video mode seq" column does not have any wait t# entries. Given that this code has been used on a lot of different devices without issues until now, it seems that my interpretation of the spec here is mostly correct. But now I have encountered one device, an Acer Aspire Switch 10 E SW3-016, where the panel will not light up unless we do actually honor the panel_on_delay after exexuting the MIPI_SEQ_PANEL_ON sequence. What seems to set this model apart is that it is lacking a MIPI_SEQ_DEASSERT_RESET sequence, which is where the power-on delay usually happens. Fix the panel not lighting up on this model by using an unconditional msleep(panel_on_delay) instead of intel_dsi_msleep() when there is no MIPI_SEQ_DEASSERT_RESET sequence. Fixes: 25b4620ee822 ("drm/i915/dsi: Skip delays for v3 VBTs in vid-mode") Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201118124058.26021-1-hdegoede@redhat.com (cherry picked from commit 6fdb335f1c9c0845b50625de1624d8445c4c4a07) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/vlv_dsi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index d52f9c1779081..f94025ec603a6 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -812,10 +812,20 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, intel_dsi_prepare(encoder, pipe_config); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); - /* Deassert reset */ - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + /* + * Give the panel time to power-on and then deassert its reset. + * Depending on the VBT MIPI sequences version the deassert-seq + * may contain the necessary delay, intel_dsi_msleep() will skip + * the delay in that case. If there is no deassert-seq, then an + * unconditional msleep is used to give the panel time to power-on. + */ + if (dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) { + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + } else { + msleep(intel_dsi->panel_on_delay); + } if (IS_GEMINILAKE(dev_priv)) { glk_cold_boot = glk_dsi_enable_io(encoder); -- GitLab From 057fe3535eb35696ad5a849d01d61efa930d2182 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Jan 2021 20:39:05 +0000 Subject: [PATCH 1673/1894] drm/i915: Disable RPM wakeref assertions during driver shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with the regular suspend paths, also disable the wakeref assertions as we disable the driver during shutdown. Reported-by: Hans de Goede Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2899 Fixes: fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Hans de Goede Tested-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210104203905.19248-1-chris@chris-wilson.co.uk (cherry picked from commit 19fe4ac6f0e7163daf9375a4d39947389ae465fa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 88ad754962af1..99eb0d7bbc447 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1047,6 +1047,8 @@ static void intel_shutdown_encoders(struct drm_i915_private *dev_priv) void i915_driver_shutdown(struct drm_i915_private *i915) { + disable_rpm_wakeref_asserts(&i915->runtime_pm); + i915_gem_suspend(i915); drm_kms_helper_poll_disable(&i915->drm); @@ -1060,6 +1062,8 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_suspend_encoders(i915); intel_shutdown_encoders(i915); + + enable_rpm_wakeref_asserts(&i915->runtime_pm); } static bool suspend_to_idle(struct drm_i915_private *dev_priv) -- GitLab From bb83d5fb550bb7db75b29e6342417fda2bbb691c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Jan 2021 17:28:41 +0200 Subject: [PATCH 1674/1894] drm/i915/backlight: fix CPU mode backlight takeover on LPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pch_get_backlight(), lpt_get_backlight(), and lpt_set_backlight() functions operate directly on the hardware registers. If inverting the value is needed, using intel_panel_compute_brightness(), it should only be done in the interface between hardware registers and panel->backlight.level. The CPU mode takeover code added in commit 5b1ec9ac7ab5 ("drm/i915/backlight: Fix backlight takeover on LPT, v3.") reads the hardware register and converts to panel->backlight.level correctly, however the value written back should remain in the hardware register "domain". This hasn't been an issue, because GM45 machines are the only known users of i915.invert_brightness and the brightness invert quirk, and without one of them no conversion is made. It's likely nobody's ever hit the problem. Fixes: 5b1ec9ac7ab5 ("drm/i915/backlight: Fix backlight takeover on LPT, v3.") Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Lyude Paul Cc: # v5.1+ Reviewed-by: Lyude Paul Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210108152841.6944-1-jani.nikula@intel.com (cherry picked from commit 0d4ced1c5bfe649196877d90442d4fd618e19153) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_panel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 9f23bac0d7924..d64fce1a17cbc 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1650,16 +1650,13 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus val = pch_get_backlight(connector); else val = lpt_get_backlight(connector); - val = intel_panel_compute_brightness(connector, val); - panel->backlight.level = clamp(val, panel->backlight.min, - panel->backlight.max); if (cpu_mode) { drm_dbg_kms(&dev_priv->drm, "CPU backlight register was enabled, switching to PCH override\n"); /* Write converted CPU PWM value to PCH override register */ - lpt_set_backlight(connector->base.state, panel->backlight.level); + lpt_set_backlight(connector->base.state, val); intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE); @@ -1667,6 +1664,10 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus cpu_ctl2 & ~BLM_PWM_ENABLE); } + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); + return 0; } -- GitLab From d434ab6db524ab1efd0afad4ffa1ee65ca6ac097 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 11 Jan 2021 04:00:30 +0000 Subject: [PATCH 1675/1894] io_uring: drop mm and files after task_work_run __io_req_task_submit() run by task_work can set mm and files, but io_sq_thread() in some cases, and because __io_sq_thread_acquire_mm() and __io_sq_thread_acquire_files() do a simple current->mm/files check it may end up submitting IO with mm/files of another task. We also need to drop it after in the end to drop potentially grabbed references to them. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2f305c097bd5b..7af74c1ec9091 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7056,6 +7056,7 @@ static int io_sq_thread(void *data) if (sqt_spin || !time_after(jiffies, timeout)) { io_run_task_work(); + io_sq_thread_drop_mm_files(); cond_resched(); if (sqt_spin) timeout = jiffies + sqd->sq_thread_idle; @@ -7093,6 +7094,7 @@ static int io_sq_thread(void *data) } io_run_task_work(); + io_sq_thread_drop_mm_files(); if (cur_css) io_sq_thread_unassociate_blkcg(); -- GitLab From 621fadc22365f3cf307bcd9048e3372e9ee9cdcc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 11 Jan 2021 04:00:31 +0000 Subject: [PATCH 1676/1894] io_uring: don't take files/mm for a dead task In rare cases a task may be exiting while io_ring_exit_work() trying to cancel/wait its requests. It's ok for __io_sq_thread_acquire_mm() because of SQPOLL check, but is not for __io_sq_thread_acquire_files(). Play safe and fail for both of them. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7af74c1ec9091..b0e6d8e607a33 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1106,6 +1106,9 @@ static void io_sq_thread_drop_mm_files(void) static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx) { + if (current->flags & PF_EXITING) + return -EFAULT; + if (!current->files) { struct files_struct *files; struct nsproxy *nsproxy; @@ -1133,6 +1136,8 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) { struct mm_struct *mm; + if (current->flags & PF_EXITING) + return -EFAULT; if (current->mm) return 0; -- GitLab From 2af5268180410b874fc06be91a1b2fbb22b1be0c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 9 Dec 2020 17:39:52 +0200 Subject: [PATCH 1677/1894] drm/i915/icl: Fix initing the DSI DSC power refcount during HW readout For an enabled DSC during HW readout the corresponding power reference is taken along the CRTC power domain references in get_crtc_power_domains(). Remove the incorrect get ref from the DSI encoder hook. Fixes: 2b68392e638d ("drm/i915/dsi: add support for DSC") Cc: Vandita Kulkarni Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20201209153952.3397959-1-imre.deak@intel.com (cherry picked from commit 3a9ec563a4ff770ae647f6ee539810f1866866c9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/icl_dsi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index a9439b4156037..b3533a32f8ba2 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1616,10 +1616,6 @@ static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, get_dsi_io_power_domains(i915, enc_to_intel_dsi(encoder)); - - if (crtc_state->dsc.compression_enable) - intel_display_power_get(i915, - intel_dsc_power_domain(crtc_state)); } static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, -- GitLab From a58015d638cd4e4555297b04bec9b49028369075 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 7 Jan 2021 23:23:48 -0800 Subject: [PATCH 1678/1894] ACPI: scan: Harden acpi_device_add() against device ID overflows Linux VM on Hyper-V crashes with the latest mainline: [ 4.069624] detected buffer overflow in strcpy [ 4.077733] kernel BUG at lib/string.c:1149! .. [ 4.085819] RIP: 0010:fortify_panic+0xf/0x11 ... [ 4.085819] Call Trace: [ 4.085819] acpi_device_add.cold.15+0xf2/0xfb [ 4.085819] acpi_add_single_object+0x2a6/0x690 [ 4.085819] acpi_bus_check_add+0xc6/0x280 [ 4.085819] acpi_ns_walk_namespace+0xda/0x1aa [ 4.085819] acpi_walk_namespace+0x9a/0xc2 [ 4.085819] acpi_bus_scan+0x78/0x90 [ 4.085819] acpi_scan_init+0xfa/0x248 [ 4.085819] acpi_init+0x2c1/0x321 [ 4.085819] do_one_initcall+0x44/0x1d0 [ 4.085819] kernel_init_freeable+0x1ab/0x1f4 This is because of the recent buffer overflow detection in the commit 6a39e62abbaf ("lib: string.h: detect intra-object overflow in fortified string functions") Here acpi_device_bus_id->bus_id can only hold 14 characters, while the the acpi_device_hid(device) returns a 22-char string "HYPER_V_GEN_COUNTER_V1". Per ACPI Spec v6.2, Section 6.1.5 _HID (Hardware ID), if the ID is a string, it must be of the form AAA#### or NNNN####, i.e. 7 chars or 8 chars. The field bus_id in struct acpi_device_bus_id was originally defined as char bus_id[9], and later was enlarged to char bus_id[15] in 2007 in the commit bb0958544f3c ("ACPI: use more understandable bus_id for ACPI devices") Fix the issue by changing the field bus_id to const char *, and use kstrdup_const() to initialize it. Signed-off-by: Dexuan Cui Tested-By: Jethro Beekman [ rjw: Subject change, whitespace adjustment ] Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 2 +- drivers/acpi/scan.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index cb229e24c5637..e6a5d997241c4 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -97,7 +97,7 @@ void acpi_scan_table_handler(u32 event, void *table, void *context); extern struct list_head acpi_bus_id_list; struct acpi_device_bus_id { - char bus_id[15]; + const char *bus_id; unsigned int instance_no; struct list_head node; }; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 80b668c80073a..58ff36340cd7c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -486,6 +486,7 @@ static void acpi_device_del(struct acpi_device *device) acpi_device_bus_id->instance_no--; else { list_del(&acpi_device_bus_id->node); + kfree_const(acpi_device_bus_id->bus_id); kfree(acpi_device_bus_id); } break; @@ -674,7 +675,14 @@ int acpi_device_add(struct acpi_device *device, } if (!found) { acpi_device_bus_id = new_bus_id; - strcpy(acpi_device_bus_id->bus_id, acpi_device_hid(device)); + acpi_device_bus_id->bus_id = + kstrdup_const(acpi_device_hid(device), GFP_KERNEL); + if (!acpi_device_bus_id->bus_id) { + pr_err(PREFIX "Memory allocation error for bus id\n"); + result = -ENOMEM; + goto err_free_new_bus_id; + } + acpi_device_bus_id->instance_no = 0; list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list); } @@ -709,6 +717,11 @@ int acpi_device_add(struct acpi_device *device, if (device->parent) list_del(&device->node); list_del(&device->wakeup_list); + + err_free_new_bus_id: + if (!found) + kfree(new_bus_id); + mutex_unlock(&acpi_device_lock); err_detach: -- GitLab From 843010a815e87b45fc6b64848f02e42f6aee3f22 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Mon, 11 Jan 2021 11:40:33 -0500 Subject: [PATCH 1679/1894] drm/ttm: Fix address passed to dma_mapping_error() in ttm_pool_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check_unmap() is producing a warning about a missing map error check. The return value from dma_map_page() should be checked for an error, not the caller-provided dma_addr. Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Signed-off-by: Jeremy Cline Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/413432/ Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index a00b7ab9c14cf..255c457349799 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -190,7 +190,7 @@ static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, size_t size = (1ULL << order) * PAGE_SIZE; addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL); - if (dma_mapping_error(pool->dev, **dma_addr)) + if (dma_mapping_error(pool->dev, addr)) return -EFAULT; } -- GitLab From 7bb83f6fc4ee84e95d0ac0d14452c2619fb3fe70 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 8 Jan 2021 13:19:38 +0900 Subject: [PATCH 1680/1894] tracing/kprobes: Do the notrace functions check without kprobes on ftrace Enable the notrace function check on the architecture which doesn't support kprobes on ftrace but support dynamic ftrace. This notrace function check is not only for the kprobes on ftrace but also sw-breakpoint based kprobes. Thus there is no reason to limit this check for the arch which supports kprobes on ftrace. This also changes the dependency of Kconfig. Because kprobe event uses the function tracer's address list for identifying notrace function, if the CONFIG_DYNAMIC_FTRACE=n, it can not check whether the target function is notrace or not. Link: https://lkml.kernel.org/r/20210105065730.2634785-1-naveen.n.rao@linux.vnet.ibm.com Link: https://lkml.kernel.org/r/161007957862.114704.4512260007555399463.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 45408c4f92506 ("tracing: kprobes: Prohibit probing on notrace function") Acked-by: Naveen N. Rao Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- kernel/trace/trace_kprobe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d5a19413d4f8a..c1a62ae7e8128 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -538,7 +538,7 @@ config KPROBE_EVENTS config KPROBE_EVENTS_ON_NOTRACE bool "Do NOT protect notrace function from kprobe events" depends on KPROBE_EVENTS - depends on KPROBES_ON_FTRACE + depends on DYNAMIC_FTRACE default n help This is only for the developers who want to debug ftrace itself diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9c31f42245e93..e6fba1798771b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -434,7 +434,7 @@ static int disable_trace_kprobe(struct trace_event_call *call, return 0; } -#if defined(CONFIG_KPROBES_ON_FTRACE) && \ +#if defined(CONFIG_DYNAMIC_FTRACE) && \ !defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE) static bool __within_notrace_func(unsigned long addr) { -- GitLab From a5e92ef3c3fd46320d4e293bdec0cdd4b80a6e0f Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 10 Jan 2021 03:11:42 +0100 Subject: [PATCH 1681/1894] drm: Check actual format for legacy pageflip. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With modifiers one can actually have different format_info structs for the same format, which now matters for AMDGPU since we convert implicit modifiers to explicit modifiers with multiple planes. I checked other drivers and it doesn't look like they end up triggering this case so I think this is safe to relax. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Daniel Vetter Reviewed-by: Zhan Liu Acked-by: Christian König Acked-by: Alex Deucher Fixes: 816853f9dc40 ("drm/amd/display: Set new format info for converted metadata.") Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20210110021142.28221-1-bas@basnieuwenhuizen.nl --- drivers/gpu/drm/drm_plane.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index e6231947f9872..a0cb746bcb0a9 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1163,7 +1163,14 @@ retry: if (ret) goto out; - if (old_fb->format != fb->format) { + /* + * Only check the FOURCC format code, excluding modifiers. This is + * enough for all legacy drivers. Atomic drivers have their own + * checks in their ->atomic_check implementation, which will + * return -EINVAL if any hw or driver constraint is violated due + * to modifier changes. + */ + if (old_fb->format->format != fb->format->format) { DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n"); ret = -EINVAL; goto out; -- GitLab From 2896c93811e39d63a4d9b63ccf12a8fbc226e5e4 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Tue, 3 Nov 2020 02:21:58 +0100 Subject: [PATCH 1682/1894] scsi: target: Fix XCOPY NAA identifier lookup When attempting to match EXTENDED COPY CSCD descriptors with corresponding se_devices, target_xcopy_locate_se_dev_e4() currently iterates over LIO's global devices list which includes all configured backstores. This change ensures that only initiator-accessible backstores are considered during CSCD descriptor lookup, according to the session's se_node_acl LUN list. To avoid LUN removal race conditions, device pinning is changed from being configfs based to instead using the se_node_acl lun_ref. Reference: CVE-2020-28374 Fixes: cbf031f425fd ("target: Add support for EXTENDED_COPY copy offload emulation") Reviewed-by: Lee Duncan Signed-off-by: David Disseldorp Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 119 +++++++++++++++++------------ drivers/target/target_core_xcopy.h | 1 + 2 files changed, 71 insertions(+), 49 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 44e15d7fb2f09..66d6f1d06f219 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -46,60 +46,83 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) return 0; } -struct xcopy_dev_search_info { - const unsigned char *dev_wwn; - struct se_device *found_dev; -}; - +/** + * target_xcopy_locate_se_dev_e4_iter - compare XCOPY NAA device identifiers + * + * @se_dev: device being considered for match + * @dev_wwn: XCOPY requested NAA dev_wwn + * @return: 1 on match, 0 on no-match + */ static int target_xcopy_locate_se_dev_e4_iter(struct se_device *se_dev, - void *data) + const unsigned char *dev_wwn) { - struct xcopy_dev_search_info *info = data; unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; int rc; - if (!se_dev->dev_attrib.emulate_3pc) + if (!se_dev->dev_attrib.emulate_3pc) { + pr_debug("XCOPY: emulate_3pc disabled on se_dev %p\n", se_dev); return 0; + } memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); - rc = memcmp(&tmp_dev_wwn[0], info->dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); - if (rc != 0) - return 0; - - info->found_dev = se_dev; - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - - rc = target_depend_item(&se_dev->dev_group.cg_item); + rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); if (rc != 0) { - pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", - rc, se_dev); - return rc; + pr_debug("XCOPY: skip non-matching: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, tmp_dev_wwn); + return 0; } + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", - se_dev, &se_dev->dev_group); return 1; } -static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, - struct se_device **found_dev) +static int target_xcopy_locate_se_dev_e4(struct se_session *sess, + const unsigned char *dev_wwn, + struct se_device **_found_dev, + struct percpu_ref **_found_lun_ref) { - struct xcopy_dev_search_info info; - int ret; - - memset(&info, 0, sizeof(info)); - info.dev_wwn = dev_wwn; - - ret = target_for_each_device(target_xcopy_locate_se_dev_e4_iter, &info); - if (ret == 1) { - *found_dev = info.found_dev; - return 0; - } else { - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); - return -EINVAL; + struct se_dev_entry *deve; + struct se_node_acl *nacl; + struct se_lun *this_lun = NULL; + struct se_device *found_dev = NULL; + + /* cmd with NULL sess indicates no associated $FABRIC_MOD */ + if (!sess) + goto err_out; + + pr_debug("XCOPY 0xe4: searching for: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, dev_wwn); + + nacl = sess->se_node_acl; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_device *this_dev; + int rc; + + this_lun = rcu_dereference(deve->se_lun); + this_dev = rcu_dereference_raw(this_lun->lun_se_dev); + + rc = target_xcopy_locate_se_dev_e4_iter(this_dev, dev_wwn); + if (rc) { + if (percpu_ref_tryget_live(&this_lun->lun_ref)) + found_dev = this_dev; + break; + } } + rcu_read_unlock(); + if (found_dev == NULL) + goto err_out; + + pr_debug("lun_ref held for se_dev: %p se_dev->se_dev_group: %p\n", + found_dev, &found_dev->dev_group); + *_found_dev = found_dev; + *_found_lun_ref = &this_lun->lun_ref; + return 0; +err_out: + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; } static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, @@ -246,12 +269,16 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, switch (xop->op_origin) { case XCOL_SOURCE_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn, - &xop->dst_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->dst_tid_wwn, + &xop->dst_dev, + &xop->remote_lun_ref); break; case XCOL_DEST_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn, - &xop->src_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->src_tid_wwn, + &xop->src_dev, + &xop->remote_lun_ref); break; default: pr_err("XCOPY CSCD descriptor IDs not found in CSCD list - " @@ -391,18 +418,12 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) { - struct se_device *remote_dev; - if (xop->op_origin == XCOL_SOURCE_RECV_OP) - remote_dev = xop->dst_dev; + pr_debug("putting dst lun_ref for %p\n", xop->dst_dev); else - remote_dev = xop->src_dev; - - pr_debug("Calling configfs_undepend_item for" - " remote_dev: %p remote_dev->dev_group: %p\n", - remote_dev, &remote_dev->dev_group.cg_item); + pr_debug("putting src lun_ref for %p\n", xop->src_dev); - target_undepend_item(&remote_dev->dev_group.cg_item); + percpu_ref_put(xop->remote_lun_ref); } static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index c56a1bde9417b..e5f20005179a8 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -27,6 +27,7 @@ struct xcopy_op { struct se_device *dst_dev; unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + struct percpu_ref *remote_lun_ref; sector_t src_lba; sector_t dst_lba; -- GitLab From 938288349ca8a9d4b936bf5d2f6dd4526a598974 Mon Sep 17 00:00:00 2001 From: Seb Laveze Date: Mon, 11 Jan 2021 09:14:07 +0100 Subject: [PATCH 1683/1894] dt-bindings: net: dwmac: fix queue priority documentation The priority field is not the queue priority (queue priority is fixed) but a bitmask of priorities assigned to this queue. In receive, priorities relate to tagged frames priorities. In transmit, priorities relate to PFC frames. Signed-off-by: Seb Laveze Link: https://lore.kernel.org/r/20210111081406.1348622-1-sebastien.laveze@oss.nxp.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index b2f6083f556af..dfbf5fe4547ab 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -161,7 +161,8 @@ properties: * snps,route-dcbcp, DCB Control Packets * snps,route-up, Untagged Packets * snps,route-multi-broad, Multicast & Broadcast Packets - * snps,priority, RX queue priority (Range 0x0 to 0xF) + * snps,priority, bitmask of the tagged frames priorities assigned to + the queue snps,mtl-tx-config: $ref: /schemas/types.yaml#/definitions/phandle @@ -188,7 +189,10 @@ properties: * snps,idle_slope, unlock on WoL * snps,high_credit, max write outstanding req. limit * snps,low_credit, max read outstanding req. limit - * snps,priority, TX queue priority (Range 0x0 to 0xF) + * snps,priority, bitmask of the priorities assigned to the queue. + When a PFC frame is received with priorities matching the bitmask, + the queue is blocked from transmitting for the pause time specified + in the PFC frame. snps,reset-gpio: deprecated: true -- GitLab From 6f83802a1a06e74eafbdbc9b52c05516d3083d02 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Sun, 10 Jan 2021 21:23:02 +0200 Subject: [PATCH 1684/1894] net: mvpp2: Remove Pause and Asym_Pause support Packet Processor hardware not connected to MAC flow control unit and cannot support TX flow control. This patch disable flow control support. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Acked-by: Marcin Wojtas Link: https://lore.kernel.org/r/1610306582-16641-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 4b1808acef581..358119d983582 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5882,8 +5882,6 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, Autoneg); phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); switch (state->interface) { case PHY_INTERFACE_MODE_10GBASER: -- GitLab From e56b3d94d939f52d46209b9e1b6700c5bfff3123 Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Fri, 8 Jan 2021 09:58:39 +0000 Subject: [PATCH 1685/1894] rndis_host: set proper input size for OID_GEN_PHYSICAL_MEDIUM request MSFT ActiveSync implementation requires that the size of the response for incoming query is to be provided in the request input length. Failure to set the input size proper results in failed request transfer, where the ActiveSync counterpart reports the NDIS_STATUS_INVALID_LENGTH (0xC0010014L) error. Set the input size for OID_GEN_PHYSICAL_MEDIUM query to the expected size of the response in order for the ActiveSync to properly respond to the request. Fixes: 039ee17d1baa ("rndis_host: Add RNDIS physical medium checking into generic_rndis_bind()") Signed-off-by: Andrey Zhizhikin Link: https://lore.kernel.org/r/20210108095839.3335-1-andrey.zhizhikin@leica-geosystems.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 6609d21ef8942..f813ca9dec531 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -387,7 +387,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags) reply_len = sizeof *phym; retval = rndis_query(dev, intf, u.buf, RNDIS_OID_GEN_PHYSICAL_MEDIUM, - 0, (void **) &phym, &reply_len); + reply_len, (void **)&phym, &reply_len); if (retval != 0 || !phym) { /* OID is optional so don't fail here. */ phym_unspec = cpu_to_le32(RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED); -- GitLab From 29766bcffad03da66892bef82674883e31f78fec Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:32 -0500 Subject: [PATCH 1686/1894] net: support kmap_local forced debugging in skb_frag_foreach Skb frags may be backed by highmem and/or compound pages. Highmem pages need kmap_atomic mappings to access. But kmap_atomic maps a single page, not the entire compound page. skb_foreach_page iterates over an skb frag, in one step in the common case, page by page only if kmap_atomic must be called for each page. The decision logic is captured in skb_frag_must_loop. CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP extends kmap from highmem to all pages, to increase code coverage. Extend skb_frag_must_loop to this new condition. Link: https://lore.kernel.org/linux-mm/20210106180132.41dc249d@gandalf.local.home/ Fixes: 0e91a0c6984c ("mm/highmem: Provide CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP") Reported-by: Steven Rostedt (VMware) Signed-off-by: Linus Torvalds Signed-off-by: Willem de Bruijn Tested-by: Steven Rostedt (VMware) Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 333bcdc39635b..c858adfb5a825 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -366,7 +366,7 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) - if (PageHighMem(p)) + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p)) return true; #endif return false; -- GitLab From 97550f6fa59254435d864b92603de3ca4b5a99f8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:33 -0500 Subject: [PATCH 1687/1894] net: compound page support in skb_seq_read skb_seq_read iterates over an skb, returning pointer and length of the next data range with each call. It relies on kmap_atomic to access highmem pages when needed. An skb frag may be backed by a compound page, but kmap_atomic maps only a single page. There are not enough kmap slots to always map all pages concurrently. Instead, if kmap_atomic is needed, iterate over each page. As this increases the number of calls, avoid this unless needed. The necessary condition is captured in skb_frag_must_loop. I tried to make the change as obvious as possible. It should be easy to verify that nothing changes if skb_frag_must_loop returns false. Tested: On an x86 platform with CONFIG_HIGHMEM=y CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y CONFIG_NETFILTER_XT_MATCH_STRING=y Run ip link set dev lo mtu 1500 iptables -A OUTPUT -m string --string 'badstring' -algo bm -j ACCEPT dd if=/dev/urandom of=in bs=1M count=20 nc -l -p 8000 > /dev/null & nc -w 1 -q 0 localhost 8000 < in Signed-off-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 28 +++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c858adfb5a825..5f60c9e907c9d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,7 @@ struct skb_seq_state { struct sk_buff *root_skb; struct sk_buff *cur_skb; __u8 *frag_data; + __u32 frag_off; }; void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b6f2b520a9b74..0da035c1e53f1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3442,6 +3442,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, st->root_skb = st->cur_skb = skb; st->frag_idx = st->stepped_offset = 0; st->frag_data = NULL; + st->frag_off = 0; } EXPORT_SYMBOL(skb_prepare_seq_read); @@ -3496,14 +3497,27 @@ next_skb: st->stepped_offset += skb_headlen(st->cur_skb); while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { + unsigned int pg_idx, pg_off, pg_sz; + frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = skb_frag_size(frag) + st->stepped_offset; + pg_idx = 0; + pg_off = skb_frag_off(frag); + pg_sz = skb_frag_size(frag); + + if (skb_frag_must_loop(skb_frag_page(frag))) { + pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT; + pg_off = offset_in_page(pg_off + st->frag_off); + pg_sz = min_t(unsigned int, pg_sz - st->frag_off, + PAGE_SIZE - pg_off); + } + + block_limit = pg_sz + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) - st->frag_data = kmap_atomic(skb_frag_page(frag)); + st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx); - *data = (u8 *) st->frag_data + skb_frag_off(frag) + + *data = (u8 *)st->frag_data + pg_off + (abs_offset - st->stepped_offset); return block_limit - abs_offset; @@ -3514,8 +3528,12 @@ next_skb: st->frag_data = NULL; } - st->frag_idx++; - st->stepped_offset += skb_frag_size(frag); + st->stepped_offset += pg_sz; + st->frag_off += pg_sz; + if (st->frag_off == skb_frag_size(frag)) { + st->frag_off = 0; + st->frag_idx++; + } } if (st->frag_data) { -- GitLab From 9bd6b629c39e3fa9e14243a6d8820492be1a5b2e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:34 -0500 Subject: [PATCH 1688/1894] esp: avoid unneeded kmap_atomic call esp(6)_output_head uses skb_page_frag_refill to allocate a buffer for the esp trailer. It accesses the page with kmap_atomic to handle highmem. But skb_page_frag_refill can return compound pages, of which kmap_atomic only maps the first underlying page. skb_page_frag_refill does not return highmem, because flag __GFP_HIGHMEM is not set. ESP uses it in the same manner as TCP. That also does not call kmap_atomic, but directly uses page_address, in skb_copy_to_page_nocache. Do the same for ESP. This issue has become easier to trigger with recent kmap local debugging feature CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Signed-off-by: Willem de Bruijn Acked-by: Steffen Klassert Signed-off-by: Jakub Kicinski --- net/ipv4/esp4.c | 7 +------ net/ipv6/esp6.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 8b07f3a4f2db2..a3271ec3e1627 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -443,7 +443,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -485,14 +484,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 52c2f063529fb..2b804fcebcc65 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -478,7 +478,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -519,14 +518,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, -- GitLab From 2225a8dda263edc35a0e8b858fe2945cf6240fde Mon Sep 17 00:00:00 2001 From: Ariel Marcovitch Date: Sat, 2 Jan 2021 22:11:56 +0200 Subject: [PATCH 1689/1894] powerpc: Fix alignment bug within the init sections This is a bug that causes early crashes in builds with an .exit.text section smaller than a page and an .init.text section that ends in the beginning of a physical page (this is kinda random, which might explain why this wasn't really encountered before). The init sections are ordered like this: .init.text .exit.text .init.data Currently, these sections aren't page aligned. Because the init code might become read-only at runtime and because the .init.text section can potentially reside on the same physical page as .init.data, the beginning of .init.data might be mapped read-only along with .init.text. Then when the kernel tries to modify a variable in .init.data (like kthreadd_done, used in kernel_init()) the kernel panics. To avoid this, make _einittext page aligned and also align .exit.text to make sure .init.data is always seperated from the text segments. Fixes: 060ef9d89d18 ("powerpc32: PAGE_EXEC required for inittext") Signed-off-by: Ariel Marcovitch Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210102201156.10805-1-ariel.marcovitch@gmail.com --- arch/powerpc/kernel/vmlinux.lds.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8e0b1298bf19b..4ab426b8b0e02 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -187,6 +187,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT + + /* + *.init.text might be RO so we must ensure this section ends on + * a page boundary. + */ + . = ALIGN(PAGE_SIZE); _einittext = .; #ifdef CONFIG_PPC64 *(.tramp.ftrace.init); @@ -200,6 +206,8 @@ SECTIONS EXIT_TEXT } + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(16) . = ALIGN(8); -- GitLab From 3e096a2112b7b407549020cf095e2a425f00fabb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 1 Jan 2021 23:19:42 +0100 Subject: [PATCH 1690/1894] ALSA: doc: Fix reference to mixart.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIXART.txt has been converted to ReST and renamed. Fix the reference in alsa-configuration.rst. Fixes: 3d8e81862ce4 ("ALSA: doc: ReSTize MIXART.txt") Signed-off-by: Jonathan Neuschäfer Cc: Link: https://lore.kernel.org/r/20210101221942.1068388-1-j.neuschaefer@gmx.net Signed-off-by: Takashi Iwai --- Documentation/sound/alsa-configuration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index fe52c314b7639..b36af65a08edf 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst @@ -1501,7 +1501,7 @@ Module for Digigram miXart8 sound cards. This module supports multiple cards. Note: One miXart8 board will be represented as 4 alsa cards. -See MIXART.txt for details. +See Documentation/sound/cards/mixart.rst for details. When the driver is compiled as a module and the hotplug firmware is supported, the firmware data is loaded via hotplug automatically. -- GitLab From bb52cb0dec8d2fecdb22843a805131478a180728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 11 Jan 2021 14:44:57 +0100 Subject: [PATCH 1691/1894] drm/ttm: make the pool shrinker lock a mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set_pages_wb() might sleep and so we can't do this in an atomic context. Signed-off-by: Christian König Reported-by: Mikhail Gavrilov Tested-by: Mikhail Gavrilov Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/413409/ --- drivers/gpu/drm/ttm/ttm_pool.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 255c457349799..8cd776adc592b 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -66,7 +66,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER]; static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; -static spinlock_t shrinker_lock; +static struct mutex shrinker_lock; static struct list_head shrinker_list; static struct shrinker mm_shrinker; @@ -249,9 +249,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, spin_lock_init(&pt->lock); INIT_LIST_HEAD(&pt->pages); - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); list_add_tail(&pt->shrinker_list, &shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); } /* Remove a pool_type from the global shrinker list and free all pages */ @@ -259,9 +259,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt) { struct page *p, *tmp; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); list_del(&pt->shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); list_for_each_entry_safe(p, tmp, &pt->pages, lru) ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); @@ -302,7 +302,7 @@ static unsigned int ttm_pool_shrink(void) unsigned int num_freed; struct page *p; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); p = ttm_pool_type_take(pt); @@ -314,7 +314,7 @@ static unsigned int ttm_pool_shrink(void) } list_move_tail(&pt->shrinker_list, &shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); return num_freed; } @@ -564,7 +564,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); seq_puts(m, "\t "); for (i = 0; i < MAX_ORDER; ++i) @@ -600,7 +600,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) seq_printf(m, "\ntotal\t: %8lu of %8lu\n", atomic_long_read(&allocated_pages), page_pool_size); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); return 0; } @@ -644,7 +644,7 @@ int ttm_pool_mgr_init(unsigned long num_pages) if (!page_pool_size) page_pool_size = num_pages; - spin_lock_init(&shrinker_lock); + mutex_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); for (i = 0; i < MAX_ORDER; ++i) { -- GitLab From f4eccc7fea203cfb35205891eced1ab51836f362 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Fri, 8 Jan 2021 13:59:12 +0000 Subject: [PATCH 1692/1894] clk: tegra30: Add hda clock default rates to clock driver Current implementation defaults the hda clocks to clk_m. This causes hda to run too slow to operate correctly. Fix this by defaulting to pll_p and setting the frequency to the correct rate. This matches upstream t124 and downstream t30. Acked-by: Jon Hunter Tested-by: Ion Agorria Acked-by: Sameer Pujar Acked-by: Thierry Reding Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210108135913.2421585-2-pgwipeout@gmail.com Signed-off-by: Takashi Iwai --- drivers/clk/tegra/clk-tegra30.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 37244a7e68c22..9cf249c344d9e 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -1256,6 +1256,8 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA30_CLK_I2S3_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, { TEGRA30_CLK_I2S4_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, { TEGRA30_CLK_VIMCLK_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, + { TEGRA30_CLK_HDA, TEGRA30_CLK_PLL_P, 102000000, 0 }, + { TEGRA30_CLK_HDA2CODEC_2X, TEGRA30_CLK_PLL_P, 48000000, 0 }, /* must be the last entry */ { TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0 }, }; -- GitLab From 615d435400435876ac68c1de37e9526a9164eaec Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Fri, 8 Jan 2021 13:59:13 +0000 Subject: [PATCH 1693/1894] ALSA: hda/tegra: fix tegra-hda on tegra30 soc Currently hda on tegra30 fails to open a stream with an input/output error. For example: speaker-test -Dhw:0,3 -c 2 speaker-test 1.2.2 Playback device is hw:0,3 Stream parameters are 48000Hz, S16_LE, 2 channels Using 16 octaves of pink noise Rate set to 48000Hz (requested 48000Hz) Buffer size range from 64 to 16384 Period size range from 32 to 8192 Using max buffer size 16384 Periods = 4 was set period_size = 4096 was set buffer_size = 16384 0 - Front Left Write error: -5,Input/output error xrun_recovery failed: -5,Input/output error Transfer failed: Input/output error The tegra-hda device was introduced in tegra30 but only utilized in tegra124 until recent chips. Tegra210/186 work only due to a hardware change. For this reason it is unknown when this issue first manifested. Discussions with the hardware team show this applies to all current tegra chips. It has been resolved in the tegra234, which does not have hda support at this time. The explanation from the hardware team is this: Below is the striping formula referenced from HD audio spec. { ((num_channels * bits_per_sample) / number of SDOs) >= 8 } The current issue is seen because Tegra HW has a problem with boundary condition (= 8) for striping. The reason why it is not seen on Tegra210/Tegra186 is because it uses max 2SDO lines. Max SDO lines is read from GCAP register. For the given stream (channels = 2, bps = 16); ratio = (channels * bps) / NSDO = 32 / NSDO; On Tegra30, ratio = 32/4 = 8 (FAIL) On Tegra210/186, ratio = 32/2 = 16 (PASS) On Tegra194, ratio = 32/4 = 8 (FAIL) ==> Earlier workaround was applied for it If Tegra210/186 is forced to use 4SDO, it fails there as well. So the behavior is consistent across all these chips. Applying the fix in [1] universally resolves this issue on tegra30-hda. Tested on the Ouya game console and the tf201 tablet. [1] commit 60019d8c650d ("ALSA: hda/tegra: workaround playback failure on Tegra194") Reviewed-by: Jon Hunter Tested-by: Ion Agorria Reviewed-by: Sameer Pujar Acked-by: Thierry Reding Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210108135913.2421585-3-pgwipeout@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 70164d1428d40..361cf2041911a 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -388,7 +388,7 @@ static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev) * in powers of 2, next available ratio is 16 which can be * used as a limiting factor here. */ - if (of_device_is_compatible(np, "nvidia,tegra194-hda")) + if (of_device_is_compatible(np, "nvidia,tegra30-hda")) chip->bus.core.sdo_limit = 16; /* codec detection */ -- GitLab From e7c22eeaff8565d9a8374f320238c251ca31480b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Jan 2021 14:02:50 +0100 Subject: [PATCH 1694/1894] ALSA: fireface: Fix integer overflow in transmit_midi_msg() As snd_ff.rx_bytes[] is unsigned int, and NSEC_PER_SEC is 1000000000L, the second multiplication in ff->rx_bytes[port] * 8 * NSEC_PER_SEC / 31250 always overflows on 32-bit platforms, truncating the result. Fix this by precalculating "NSEC_PER_SEC / 31250", which is an integer constant. Note that this assumes ff->rx_bytes[port] <= 16777. Fixes: 19174295788de77d ("ALSA: fireface: add transaction support") Reviewed-by: Takashi Sakamoto Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210111130251.361335-2-geert+renesas@glider.be Signed-off-by: Takashi Iwai --- sound/firewire/fireface/ff-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/fireface/ff-transaction.c b/sound/firewire/fireface/ff-transaction.c index 7f82762ccc8c8..ee7122c461d46 100644 --- a/sound/firewire/fireface/ff-transaction.c +++ b/sound/firewire/fireface/ff-transaction.c @@ -88,7 +88,7 @@ static void transmit_midi_msg(struct snd_ff *ff, unsigned int port) /* Set interval to next transaction. */ ff->next_ktime[port] = ktime_add_ns(ktime_get(), - ff->rx_bytes[port] * 8 * NSEC_PER_SEC / 31250); + ff->rx_bytes[port] * 8 * (NSEC_PER_SEC / 31250)); if (quad_count == 1) tcode = TCODE_WRITE_QUADLET_REQUEST; -- GitLab From 9f65df9c589f249435255da37a5dd11f1bc86f4d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Jan 2021 14:02:51 +0100 Subject: [PATCH 1695/1894] ALSA: firewire-tascam: Fix integer overflow in midi_port_work() As snd_fw_async_midi_port.consume_bytes is unsigned int, and NSEC_PER_SEC is 1000000000L, the second multiplication in port->consume_bytes * 8 * NSEC_PER_SEC / 31250 always overflows on 32-bit platforms, truncating the result. Fix this by precalculating "NSEC_PER_SEC / 31250", which is an integer constant. Note that this assumes port->consume_bytes <= 16777. Fixes: 531f471834227d03 ("ALSA: firewire-lib/firewire-tascam: localize async midi port") Reviewed-by: Takashi Sakamoto Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210111130251.361335-3-geert+renesas@glider.be Signed-off-by: Takashi Iwai --- sound/firewire/tascam/tascam-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/tascam/tascam-transaction.c b/sound/firewire/tascam/tascam-transaction.c index 90288b4b46379..a073cece4a7d5 100644 --- a/sound/firewire/tascam/tascam-transaction.c +++ b/sound/firewire/tascam/tascam-transaction.c @@ -209,7 +209,7 @@ static void midi_port_work(struct work_struct *work) /* Set interval to next transaction. */ port->next_ktime = ktime_add_ns(ktime_get(), - port->consume_bytes * 8 * NSEC_PER_SEC / 31250); + port->consume_bytes * 8 * (NSEC_PER_SEC / 31250)); /* Start this transaction. */ port->idling = false; -- GitLab From 20c7842ed8374e1c3ee750b2fe7ca8cdd071bda6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 12:52:45 -0500 Subject: [PATCH 1696/1894] ALSA: hda/hdmi - enable runtime pm for CI AMD display audio We are able to power down the GPU and audio via the GPU driver so flag these asics as supporting runtime pm. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Link: https://lore.kernel.org/r/20210105175245.963451-1-alexander.deucher@amd.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 770ad25f1907c..e4dd2ff5e4737 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2598,7 +2598,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB }, /* ATI HDMI */ { PCI_DEVICE(0x1002, 0x0002), - .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0x1308), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0x157a), @@ -2660,9 +2661,11 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1002, 0xaab0), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaac0), - .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0xaac8), - .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0xaad8), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | AZX_DCAPS_PM_RUNTIME }, -- GitLab From 2d6ffc63f12417b979955a5b22ad9a76d2af5de9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:20 +0800 Subject: [PATCH 1697/1894] iommu/vt-d: Fix unaligned addresses for intel_flush_svm_range_dev() The VT-d hardware will ignore those Addr bits which have been masked by the AM field in the PASID-based-IOTLB invalidation descriptor. As the result, if the starting address in the descriptor is not aligned with the address mask, some IOTLB caches might not invalidate. Hence people will see below errors. [ 1093.704661] dmar_fault: 29 callbacks suppressed [ 1093.704664] DMAR: DRHD: handling fault status reg 3 [ 1093.712738] DMAR: [DMA Read] Request device [7a:02.0] PASID 2 fault addr 7f81c968d000 [fault reason 113] SM: Present bit in first-level paging entry is clear Fix this by using aligned address for PASID-based-IOTLB invalidation. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Reported-and-tested-by: Guo Kaijie Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 790ef3497e7e3..18a9f05df4079 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -118,8 +118,10 @@ void intel_svm_check(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_SVM_CAPABLE; } -static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, - unsigned long address, unsigned long pages, int ih) +static void __flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) { struct qi_desc desc; @@ -170,6 +172,22 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } } +static void intel_flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) +{ + unsigned long shift = ilog2(__roundup_pow_of_two(pages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); + unsigned long start = ALIGN_DOWN(address, align); + unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); + + while (start < end) { + __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); + start += align; + } +} + static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, unsigned long pages, int ih) { -- GitLab From b812834b5329fe78d643c9a61350d227db904361 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 9 Jan 2021 17:56:21 +0100 Subject: [PATCH 1698/1894] iommu: arm-smmu-qcom: Add sdm630/msm8998 compatibles for qcom quirks SDM630 and MSM8998 are among the SoCs that use Qualcomm's implementation of SMMUv2 which has already proven to be problematic over the years. Add their compatibles to the lookup list to prevent the platforms from being shut down by the hypervisor at MMU probe. Signed-off-by: Konrad Dybcio Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20210109165622.149777-1-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 1b83d140742f3..bcda17012aee8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -325,7 +325,9 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, } static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { + { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, -- GitLab From 694a1c0adebee9152a9ba0320468f7921aca647d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 28 Dec 2020 09:26:14 +0800 Subject: [PATCH 1699/1894] iommu/vt-d: Fix duplicate included linux/dma-map-ops.h linux/dma-map-ops.h is included more than once, Remove the one that isn't necessary. Signed-off-by: Tian Tao Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609118774-10083-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 65cf06d70bf4e..f665322a09919 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- GitLab From ffaf97899c4a58b9fefb11534f730785443611a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:18 +0000 Subject: [PATCH 1700/1894] drm/i915/gt: Limit VFE threads based on GT MEDIA_STATE_VFE only accepts the 'maximum number of threads' in the range [0, n-1] where n is #EU * (#threads/EU) with the number of threads based on plaform and the number of EU based on the number of slices and subslices. This is a fixed number per platform/gt, so appropriately limit the number of threads we spawn to match the device. v2: Oversaturate the system with tasks to force execution on every HW thread; if the thread idles it is returned to the pool and may be reused again before an unused thread. v3: Fix more state commands, which was causing Baytrail to barf. v4: STATE_CACHE_INVALIDATE requires a stall on Ivybridge Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2024 Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Jon Bloomfield Cc: Rodrigo Vivi Cc: Randy Wright Cc: stable@vger.kernel.org # v5.7+ Reviewed-by: Akeem G Abodunrin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-1-chris@chris-wilson.co.uk (cherry picked from commit eebfb32e26851662d24ea86dd381fd0f83cd4b47) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen7_renderclear.c | 157 ++++++++++++--------- 1 file changed, 94 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index d93d85cd30270..94465374ca2fe 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -7,8 +7,6 @@ #include "i915_drv.h" #include "intel_gpu_commands.h" -#define MAX_URB_ENTRIES 64 -#define STATE_SIZE (4 * 1024) #define GT3_INLINE_DATA_DELAYS 0x1E00 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) @@ -34,38 +32,59 @@ struct batch_chunk { }; struct batch_vals { - u32 max_primitives; - u32 max_urb_entries; - u32 cmd_size; - u32 state_size; + u32 max_threads; u32 state_start; - u32 batch_size; + u32 surface_start; u32 surface_height; u32 surface_width; - u32 scratch_size; - u32 max_size; + u32 size; }; +static inline int num_primitives(const struct batch_vals *bv) +{ + /* + * We need to saturate the GPU with work in order to dispatch + * a shader on every HW thread, and clear the thread-local registers. + * In short, we have to dispatch work faster than the shaders can + * run in order to fill the EU and occupy each HW thread. + */ + return bv->max_threads; +} + static void batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) { if (IS_HASWELL(i915)) { - bv->max_primitives = 280; - bv->max_urb_entries = MAX_URB_ENTRIES; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: + bv->max_threads = 70; + break; + case 2: + bv->max_threads = 140; + break; + case 3: + bv->max_threads = 280; + break; + } bv->surface_height = 16 * 16; bv->surface_width = 32 * 2 * 16; } else { - bv->max_primitives = 128; - bv->max_urb_entries = MAX_URB_ENTRIES / 2; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: /* including vlv */ + bv->max_threads = 36; + break; + case 2: + bv->max_threads = 128; + break; + } bv->surface_height = 16 * 8; bv->surface_width = 32 * 16; } - bv->cmd_size = bv->max_primitives * 4096; - bv->state_size = STATE_SIZE; - bv->state_start = bv->cmd_size; - bv->batch_size = bv->cmd_size + bv->state_size; - bv->scratch_size = bv->surface_height * bv->surface_width; - bv->max_size = bv->batch_size + bv->scratch_size; + bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K); + bv->surface_start = bv->state_start + SZ_4K; + bv->size = bv->surface_start + bv->surface_height * bv->surface_width; } static void batch_init(struct batch_chunk *bc, @@ -155,7 +174,8 @@ static u32 gen7_fill_binding_table(struct batch_chunk *state, const struct batch_vals *bv) { - u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + u32 surface_start = + gen7_fill_surface_state(state, bv->surface_start, bv); u32 *cs = batch_alloc_items(state, 32, 8); u32 offset = batch_offset(state, cs); @@ -214,9 +234,9 @@ static void gen7_emit_state_base_address(struct batch_chunk *batch, u32 surface_state_base) { - u32 *cs = batch_alloc_items(batch, 0, 12); + u32 *cs = batch_alloc_items(batch, 0, 10); - *cs++ = STATE_BASE_ADDRESS | (12 - 2); + *cs++ = STATE_BASE_ADDRESS | (10 - 2); /* general */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* surface */ @@ -233,8 +253,6 @@ gen7_emit_state_base_address(struct batch_chunk *batch, *cs++ = BASE_ADDRESS_MODIFY; *cs++ = 0; *cs++ = BASE_ADDRESS_MODIFY; - *cs++ = 0; - *cs++ = 0; batch_advance(batch, cs); } @@ -244,8 +262,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, u32 urb_size, u32 curbe_size, u32 mode) { - u32 urb_entries = bv->max_urb_entries; - u32 threads = bv->max_primitives - 1; + u32 threads = bv->max_threads - 1; u32 *cs = batch_alloc_items(batch, 32, 8); *cs++ = MEDIA_VFE_STATE | (8 - 2); @@ -254,7 +271,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, *cs++ = 0; /* number of threads & urb entries for GPGPU vs Media Mode */ - *cs++ = threads << 16 | urb_entries << 8 | mode << 2; + *cs++ = threads << 16 | 1 << 8 | mode << 2; *cs++ = 0; @@ -293,17 +310,12 @@ gen7_emit_media_object(struct batch_chunk *batch, { unsigned int x_offset = (media_object_index % 16) * 64; unsigned int y_offset = (media_object_index / 16) * 16; - unsigned int inline_data_size; - unsigned int media_batch_size; - unsigned int i; + unsigned int pkt = 6 + 3; u32 *cs; - inline_data_size = 112 * 8; - media_batch_size = inline_data_size + 6; - - cs = batch_alloc_items(batch, 8, media_batch_size); + cs = batch_alloc_items(batch, 8, pkt); - *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + *cs++ = MEDIA_OBJECT | (pkt - 2); /* interface descriptor offset */ *cs++ = 0; @@ -317,25 +329,44 @@ gen7_emit_media_object(struct batch_chunk *batch, *cs++ = 0; /* inline */ - *cs++ = (y_offset << 16) | (x_offset); + *cs++ = y_offset << 16 | x_offset; *cs++ = 0; *cs++ = GT3_INLINE_DATA_DELAYS; - for (i = 3; i < inline_data_size; i++) - *cs++ = 0; batch_advance(batch, cs); } static void gen7_emit_pipeline_flush(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 5); + u32 *cs = batch_alloc_items(batch, 0, 4); - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + + batch_advance(batch, cs); +} + +static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 8); + + /* ivb: Stall before STATE_CACHE_INVALIDATE */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_CS_STALL; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); } @@ -344,34 +375,34 @@ static void emit_batch(struct i915_vma * const vma, const struct batch_vals *bv) { struct drm_i915_private *i915 = vma->vm->i915; - unsigned int desc_count = 64; - const u32 urb_size = 112; + const unsigned int desc_count = 1; + const unsigned int urb_size = 1; struct batch_chunk cmds, state; - u32 interface_descriptor; + u32 descriptors; unsigned int i; - batch_init(&cmds, vma, start, 0, bv->cmd_size); - batch_init(&state, vma, start, bv->state_start, bv->state_size); + batch_init(&cmds, vma, start, 0, bv->state_start); + batch_init(&state, vma, start, bv->state_start, SZ_4K); - interface_descriptor = - gen7_fill_interface_descriptor(&state, bv, - IS_HASWELL(i915) ? - &cb_kernel_hsw : - &cb_kernel_ivb, - desc_count); - gen7_emit_pipeline_flush(&cmds); + descriptors = gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : + &cb_kernel_ivb, + desc_count); + + gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); - gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_invalidate(&cmds); + gen7_emit_pipeline_flush(&cmds); + gen7_emit_state_base_address(&cmds, descriptors); + gen7_emit_pipeline_invalidate(&cmds); gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); - gen7_emit_interface_descriptor_load(&cmds, - interface_descriptor, - desc_count); - - for (i = 0; i < bv->max_primitives; i++) + for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); batch_add(&cmds, MI_BATCH_BUFFER_END); @@ -385,15 +416,15 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, batch_get_defaults(engine->i915, &bv); if (!vma) - return bv.max_size; + return bv.size; - GEM_BUG_ON(vma->obj->base.size < bv.max_size); + GEM_BUG_ON(vma->obj->base.size < bv.size); batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); if (IS_ERR(batch)) return PTR_ERR(batch); - emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + emit_batch(vma, memset(batch, 0, bv.size), &bv); i915_gem_object_flush_map(vma->obj); __i915_gem_object_release_map(vma->obj); -- GitLab From 09aa9e45863e9e25dfbf350bae89fc3c2964482c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:19 +0000 Subject: [PATCH 1701/1894] drm/i915/gt: Restore clear-residual mitigations for Ivybridge, Baytrail The mitigation is required for all gen7 platforms, now that it does not cause GPU hangs, restore it for Ivybridge and Baytrail. Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Bloomfield Jon Reviewed-by: Akeem G Abodunrin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-2-chris@chris-wilson.co.uk (cherry picked from commit 008ead6ef8f588a8c832adfe9db201d9be5fd410) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a41b43f445b8a..d809789f9cfb7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1290,7 +1290,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); - if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { err = gen7_ctx_switch_bb_init(engine); if (err) goto err_ring_unpin; -- GitLab From 984cadea032b103c5824a5f29d0a36b3e9df6333 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:20 +0000 Subject: [PATCH 1702/1894] drm/i915: Allow the sysadmin to override security mitigations The clear-residuals mitigation is a relatively heavy hammer and under some circumstances the user may wish to forgo the context isolation in order to meet some performance requirement. Introduce a generic module parameter to allow selectively enabling/disabling different mitigations. To disable just the clear-residuals mitigation (on Ivybridge, Baytrail, or Haswell) use the module parameter: i915.mitigations=auto,!residuals Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1858 Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jon Bloomfield Cc: Rodrigo Vivi Cc: stable@vger.kernel.org # v5.7 Reviewed-by: Jon Bloomfield Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-3-chris@chris-wilson.co.uk (cherry picked from commit f7452c7cbd5b5dfb9a6c84cb20bea04c89be50cd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gt/intel_ring_submission.c | 4 +- drivers/gpu/drm/i915/i915_mitigations.c | 146 ++++++++++++++++++ drivers/gpu/drm/i915/i915_mitigations.h | 13 ++ 4 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_mitigations.c create mode 100644 drivers/gpu/drm/i915/i915_mitigations.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e5574e506a5cc..6d9e81ea67f4b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_config.o \ i915_irq.o \ i915_getparam.o \ + i915_mitigations.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index d809789f9cfb7..ecf3a6118a6d7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -32,6 +32,7 @@ #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" +#include "i915_mitigations.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" @@ -886,7 +887,8 @@ static int switch_context(struct i915_request *rq) GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); if (engine->wa_ctx.vma && ce != engine->kernel_context) { - if (engine->wa_ctx.vma->private != ce) { + if (engine->wa_ctx.vma->private != ce && + i915_mitigate_clear_residuals()) { ret = clear_residuals(rq); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_mitigations.c b/drivers/gpu/drm/i915/i915_mitigations.c new file mode 100644 index 0000000000000..84f12598d1458 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include +#include +#include + +#include "i915_drv.h" +#include "i915_mitigations.h" + +static unsigned long mitigations __read_mostly = ~0UL; + +enum { + CLEAR_RESIDUALS = 0, +}; + +static const char * const names[] = { + [CLEAR_RESIDUALS] = "residuals", +}; + +bool i915_mitigate_clear_residuals(void) +{ + return READ_ONCE(mitigations) & BIT(CLEAR_RESIDUALS); +} + +static int mitigations_set(const char *val, const struct kernel_param *kp) +{ + unsigned long new = ~0UL; + char *str, *sep, *tok; + bool first = true; + int err = 0; + + BUILD_BUG_ON(ARRAY_SIZE(names) >= BITS_PER_TYPE(mitigations)); + + str = kstrdup(val, GFP_KERNEL); + if (!str) + return -ENOMEM; + + for (sep = str; (tok = strsep(&sep, ","));) { + bool enable = true; + int i; + + /* Be tolerant of leading/trailing whitespace */ + tok = strim(tok); + + if (first) { + first = false; + + if (!strcmp(tok, "auto")) + continue; + + new = 0; + if (!strcmp(tok, "off")) + continue; + } + + if (*tok == '!') { + enable = !enable; + tok++; + } + + if (!strncmp(tok, "no", 2)) { + enable = !enable; + tok += 2; + } + + if (*tok == '\0') + continue; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if (!strcmp(tok, names[i])) { + if (enable) + new |= BIT(i); + else + new &= ~BIT(i); + break; + } + } + if (i == ARRAY_SIZE(names)) { + pr_err("Bad \"%s.mitigations=%s\", '%s' is unknown\n", + DRIVER_NAME, val, tok); + err = -EINVAL; + break; + } + } + kfree(str); + if (err) + return err; + + WRITE_ONCE(mitigations, new); + return 0; +} + +static int mitigations_get(char *buffer, const struct kernel_param *kp) +{ + unsigned long local = READ_ONCE(mitigations); + int count, i; + bool enable; + + if (!local) + return scnprintf(buffer, PAGE_SIZE, "%s\n", "off"); + + if (local & BIT(BITS_PER_LONG - 1)) { + count = scnprintf(buffer, PAGE_SIZE, "%s,", "auto"); + enable = false; + } else { + enable = true; + count = 0; + } + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if ((local & BIT(i)) != enable) + continue; + + count += scnprintf(buffer + count, PAGE_SIZE - count, + "%s%s,", enable ? "" : "!", names[i]); + } + + buffer[count - 1] = '\n'; + return count; +} + +static const struct kernel_param_ops ops = { + .set = mitigations_set, + .get = mitigations_get, +}; + +module_param_cb_unsafe(mitigations, &ops, NULL, 0600); +MODULE_PARM_DESC(mitigations, +"Selectively enable security mitigations for all Intel® GPUs in the system.\n" +"\n" +" auto -- enables all mitigations required for the platform [default]\n" +" off -- disables all mitigations\n" +"\n" +"Individual mitigations can be enabled by passing a comma-separated string,\n" +"e.g. mitigations=residuals to enable only clearing residuals or\n" +"mitigations=auto,noresiduals to disable only the clear residual mitigation.\n" +"Either '!' or 'no' may be used to switch from enabling the mitigation to\n" +"disabling it.\n" +"\n" +"Active mitigations for Ivybridge, Baytrail, Haswell:\n" +" residuals -- clear all thread-local registers between contexts" +); diff --git a/drivers/gpu/drm/i915/i915_mitigations.h b/drivers/gpu/drm/i915/i915_mitigations.h new file mode 100644 index 0000000000000..1359d8135287a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_MITIGATIONS_H__ +#define __I915_MITIGATIONS_H__ + +#include + +bool i915_mitigate_clear_residuals(void); + +#endif /* __I915_MITIGATIONS_H__ */ -- GitLab From d78050ee35440d7879ed94011c52994b8932e96e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 7 Jan 2021 14:40:08 +0000 Subject: [PATCH 1703/1894] arm64: Remove arm64_dma32_phys_limit and its uses With the introduction of a dynamic ZONE_DMA range based on DT or IORT information, there's no need for CMA allocations from the wider ZONE_DMA32 since on most platforms ZONE_DMA will cover the 32-bit addressable range. Remove the arm64_dma32_phys_limit and set arm64_dma_phys_limit to cover the smallest DMA range required on the platform. CMA allocation and crashkernel reservation now go in the dynamically sized ZONE_DMA, allowing correct functionality on RPi4. Signed-off-by: Catalin Marinas Cc: Chen Zhou Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne # On RPi4B --- arch/arm64/include/asm/processor.h | 3 +-- arch/arm64/mm/init.c | 33 ++++++++++++++++-------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 69ad25fbeae4a..ca2cd75d32863 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -94,8 +94,7 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; -extern phys_addr_t arm64_dma32_phys_limit; -#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1) +#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7deddf56f7c3e..709d98fea90cc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); /* - * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of - * memory as some devices, namely the Raspberry Pi 4, have peripherals with - * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 - * bit addressable memory area. + * If the corresponding config options are enabled, we create both ZONE_DMA + * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory + * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). + * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, + * otherwise it is empty. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -84,7 +84,7 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, + crash_base = memblock_find_in_range(0, arm64_dma_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -196,6 +196,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; unsigned int __maybe_unused acpi_zone_dma_bits; unsigned int __maybe_unused dt_zone_dma_bits; + phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32); #ifdef CONFIG_ZONE_DMA acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); @@ -205,8 +206,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); + if (!arm64_dma_phys_limit) + arm64_dma_phys_limit = dma32_phys_limit; #endif + if (!arm64_dma_phys_limit) + arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -394,16 +399,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma32_phys_limit = max_zone_phys(32); - else - arm64_dma32_phys_limit = PHYS_MASK + 1; - reserve_elfcorehdr(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - - dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -438,6 +436,11 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); + /* + * Reserve the CMA area after arm64_dma_phys_limit was initialised. + */ + dma_contiguous_reserve(arm64_dma_phys_limit); + /* * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. @@ -455,7 +458,7 @@ void __init bootmem_init(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; -- GitLab From 8e14f610159d524cd7aac37982826d3ef75c09e8 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Sat, 9 Jan 2021 15:17:06 +0000 Subject: [PATCH 1704/1894] dm crypt: do not call bio_endio() from the dm-crypt tasklet Sometimes, when dm-crypt executes decryption in a tasklet, we may get "BUG: KASAN: use-after-free in tasklet_action_common.constprop..." with a kasan-enabled kernel. When the decryption fully completes in the tasklet, dm-crypt will call bio_endio(), which in turn will call clone_endio() from dm.c core code. That function frees the resources associated with the bio, including per bio private structures. For dm-crypt it will free the current struct dm_crypt_io, which contains our tasklet object, causing use-after-free, when the tasklet is being dequeued by the kernel. To avoid this, do not call bio_endio() from the current tasklet context, but delay its execution to the dm-crypt IO workqueue. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: # v5.9+ Signed-off-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d2d6d3000f5bd..b4d7418b50368 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1730,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_io_bio_endio(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + bio_endio(io->base_bio); +} + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1752,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io) kfree(io->integrity_metadata); base_bio->bi_status = error; - bio_endio(base_bio); + + /* + * If we are running this function from our tasklet, + * we can't call bio_endio() here, because it will call + * clone_endio() from dm.c, which in turn will + * free the current struct dm_crypt_io structure with + * our tasklet. In this case we need to delay bio_endio() + * execution to after the tasklet is done and dequeued. + */ + if (tasklet_trylock(&io->tasklet)) { + tasklet_unlock(&io->tasklet); + bio_endio(base_bio); + return; + } + + INIT_WORK(&io->work, kcryptd_io_bio_endio); + queue_work(cc->io_queue, &io->work); } /* -- GitLab From 17ffc193cdc6dc7a613d00d8ad47fc1f801b9bf0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 12 Jan 2021 14:54:47 -0500 Subject: [PATCH 1705/1894] dm integrity: fix the maximum number of arguments Advance the maximum number of arguments from 9 to 15 to account for all potential feature flags that may be supplied. Linux 4.19 added "meta_device" (356d9d52e1221ba0c9f10b8b38652f78a5298329) and "recalculate" (a3fcf7253139609bf9ff901fbf955fba047e75dd) flags. Commit 468dfca38b1a6fbdccd195d875599cb7c8875cd9 added "sectors_per_bit" and "bitmap_flush_interval". Commit 84597a44a9d86ac949900441cea7da0af0f2f473 added "allow_discards". And the commit d537858ac8aaf4311b51240893add2fc62003b97 added "fix_padding". Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 11c7c538f7a98..81df019ab284a 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3792,7 +3792,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned extra_args; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 9, "Invalid number of feature args"}, + {0, 15, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; -- GitLab From df85bc140a4d6cbaa78d8e9c35154e1a2f0622c7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 11 Jan 2021 18:07:07 +0100 Subject: [PATCH 1706/1894] net: dcb: Accept RTM_GETDCB messages carrying set-like DCB commands In commit 826f328e2b7e ("net: dcb: Validate netlink message in DCB handler"), Linux started rejecting RTM_GETDCB netlink messages if they contained a set-like DCB_CMD_ command. The reason was that privileges were only verified for RTM_SETDCB messages, but the value that determined the action to be taken is the command, not the message type. And validation of message type against the DCB command was the obvious missing piece. Unfortunately it turns out that mlnx_qos, a somewhat widely deployed tool for configuration of DCB, accesses the DCB set-like APIs through RTM_GETDCB. Therefore do not bounce the discrepancy between message type and command. Instead, in addition to validating privileges based on the actual message type, validate them also based on the expected message type. This closes the loophole of allowing DCB configuration on non-admin accounts, while maintaining backward compatibility. Fixes: 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver") Fixes: 826f328e2b7e ("net: dcb: Validate netlink message in DCB handler") Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/a3edcfda0825f2aa2591801c5232f2bbf2d8a554.1610384801.git.me@pmachata.org Signed-off-by: Jakub Kicinski --- net/dcb/dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 7d49b6fd6cef9..653e3bc9c87b9 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1765,7 +1765,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, fn = &reply_funcs[dcb->cmd]; if (!fn->cb) return -EOPNOTSUPP; - if (fn->type != nlh->nlmsg_type) + if (fn->type == RTM_SETDCB && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!tb[DCB_ATTR_IFNAME]) -- GitLab From 8ff60eb052eeba95cfb3efe16b08c9199f8121cf Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 12 Jan 2021 15:49:04 -0800 Subject: [PATCH 1707/1894] mm, slub: consider rest of partial list if acquire_slab() fails acquire_slab() fails if there is contention on the freelist of the page (probably because some other CPU is concurrently freeing an object from the page). In that case, it might make sense to look for a different page (since there might be more remote frees to the page from other CPUs, and we don't want contention on struct page). However, the current code accidentally stops looking at the partial list completely in that case. Especially on kernels without CONFIG_NUMA set, this means that get_partial() fails and new_slab_objects() falls back to new_slab(), allocating new pages. This could lead to an unnecessary increase in memory fragmentation. Link: https://lkml.kernel.org/r/20201228130853.1871516-1-jannh@google.com Fixes: 7ced37197196 ("slub: Acquire_slab() avoid loop") Signed-off-by: Jann Horn Acked-by: David Rientjes Acked-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index dc5b42e700b85..d9e4e10683cc1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,7 +1973,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) - break; + continue; /* cmpxchg raced */ available += objects; if (!object) { -- GitLab From ce8f86ee94fabcc98537ddccd7e82cfd360a4dc5 Mon Sep 17 00:00:00 2001 From: Hailong liu Date: Tue, 12 Jan 2021 15:49:08 -0800 Subject: [PATCH 1708/1894] mm/page_alloc: add a missing mm_page_alloc_zone_locked() tracepoint The trace point *trace_mm_page_alloc_zone_locked()* in __rmqueue() does not currently cover all branches. Add the missing tracepoint and check the page before do that. [akpm@linux-foundation.org: use IS_ENABLED() to suppress warning] Link: https://lkml.kernel.org/r/20201228132901.41523-1-carver4lio@163.com Signed-off-by: Hailong liu Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bdbec4c981738..027f6481ba59b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2862,20 +2862,20 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, { struct page *page; -#ifdef CONFIG_CMA - /* - * Balance movable allocations between regular and CMA areas by - * allocating from CMA when over half of the zone's free memory - * is in the CMA area. - */ - if (alloc_flags & ALLOC_CMA && - zone_page_state(zone, NR_FREE_CMA_PAGES) > - zone_page_state(zone, NR_FREE_PAGES) / 2) { - page = __rmqueue_cma_fallback(zone, order); - if (page) - return page; + if (IS_ENABLED(CONFIG_CMA)) { + /* + * Balance movable allocations between regular and CMA areas by + * allocating from CMA when over half of the zone's free memory + * is in the CMA area. + */ + if (alloc_flags & ALLOC_CMA && + zone_page_state(zone, NR_FREE_CMA_PAGES) > + zone_page_state(zone, NR_FREE_PAGES) / 2) { + page = __rmqueue_cma_fallback(zone, order); + if (page) + goto out; + } } -#endif retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { @@ -2886,8 +2886,9 @@ retry: alloc_flags)) goto retry; } - - trace_mm_page_alloc_zone_locked(page, order, migratetype); +out: + if (page) + trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; } -- GitLab From 7ea510b92c7c9b4eb5ff72e6b4bbad4b0407a914 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 12 Jan 2021 15:49:11 -0800 Subject: [PATCH 1709/1894] mm/memcontrol: fix warning in mem_cgroup_page_lruvec() Boot a CONFIG_MEMCG=y kernel with "cgroup_disabled=memory" and you are met by a series of warnings from the VM_WARN_ON_ONCE_PAGE(!memcg, page) recently added to the inline mem_cgroup_page_lruvec(). An earlier attempt to place that warning, in mem_cgroup_lruvec(), had been careful to do so after weeding out the mem_cgroup_disabled() case; but was itself invalid because of the mem_cgroup_lruvec(NULL, pgdat) in clear_pgdat_congested() and age_active_anon(). Warning in mem_cgroup_page_lruvec() was once useful in detecting a KSM charge bug, so may be worth keeping: but skip if mem_cgroup_disabled(). Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101032056260.1093@eggly.anvils Fixes: 9a1ac2288cf1 ("mm/memcontrol:rewrite mem_cgroup_page_lruvec()") Signed-off-by: Hugh Dickins Reviewed-by: Alex Shi Acked-by: Roman Gushchin Acked-by: Chris Down Reviewed-by: Baoquan He Acked-by: Vlastimil Babka Cc: Hui Su Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Johannes Weiner Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d827bd7f3bfe3..eeb0b52203e92 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -665,7 +665,7 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, { struct mem_cgroup *memcg = page_memcg(page); - VM_WARN_ON_ONCE_PAGE(!memcg, page); + VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); return mem_cgroup_lruvec(memcg, pgdat); } -- GitLab From 29970dc24faf0078beb4efab5455b4f504d2198d Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Tue, 12 Jan 2021 15:49:14 -0800 Subject: [PATCH 1710/1894] arm/kasan: fix the array size of kasan_early_shadow_pte[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of kasan_early_shadow_pte[] now is PTRS_PER_PTE which defined to 512 for arm. This means that it only covers the prev Linux pte entries, but not the HWTABLE pte entries for arm. The reason it currently works is that the symbol kasan_early_shadow_page immediately following kasan_early_shadow_pte in memory is page aligned, which makes kasan_early_shadow_pte look like a 4KB size array. But we can't ensure the order is always right with different compiler/linker, or if more bss symbols are introduced. We had a test with QEMU + vexpress:put a 512KB-size symbol with attribute __section(".bss..page_aligned") after kasan_early_shadow_pte, and poisoned it after kasan_early_init(). Then enabled CONFIG_KASAN, it failed to boot up. Link: https://lkml.kernel.org/r/20210109044622.8312-1-hailongliiu@yeah.net Signed-off-by: Hailong Liu Signed-off-by: Ziliang Guo Reviewed-by: Linus Walleij Cc: Andrey Ryabinin Cc: Russell King Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ard Biesheuvel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 +++++- mm/kasan/init.c | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5e0655fb2a6f7..fe1ae73ff8b57 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -35,8 +35,12 @@ struct kunit_kasan_expectation { #define KASAN_SHADOW_INIT 0 #endif +#ifndef PTE_HWTABLE_PTRS +#define PTE_HWTABLE_PTRS 0 +#endif + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; -extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; +extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; diff --git a/mm/kasan/init.c b/mm/kasan/init.c index bc0ad208b3a7a..7ca0b92d5886d 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -64,7 +64,8 @@ static inline bool kasan_pmd_table(pud_t pud) return false; } #endif -pte_t kasan_early_shadow_pte[PTRS_PER_PTE] __page_aligned_bss; +pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] + __page_aligned_bss; static inline bool kasan_pte_table(pmd_t pmd) { -- GitLab From c22ee5284cf58017fa8c6d21d8f8c68159b6faab Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jan 2021 15:49:18 -0800 Subject: [PATCH 1711/1894] mm/vmalloc.c: fix potential memory leak In VM_MAP_PUT_PAGES case, we should put pages and free array in vfree. But we missed to set area->nr_pages in vmap(). So we would fail to put pages in __vunmap() because area->nr_pages = 0. Link: https://lkml.kernel.org/r/20210107123541.39206-1-linmiaohe@huawei.com Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap") Signed-off-by: Shijie Luo Signed-off-by: Miaohe Lin Reviewed-by: Uladzislau Rezki (Sony) Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4d88fe5a277ac..e6f352bf04982 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2420,8 +2420,10 @@ void *vmap(struct page **pages, unsigned int count, return NULL; } - if (flags & VM_MAP_PUT_PAGES) + if (flags & VM_MAP_PUT_PAGES) { area->pages = pages; + area->nr_pages = count; + } return area->addr; } EXPORT_SYMBOL(vmap); -- GitLab From f555befd185dc097ede887eb7b308c2e1c1369d4 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Tue, 12 Jan 2021 15:49:21 -0800 Subject: [PATCH 1712/1894] mm: migrate: initialize err in do_migrate_pages After commit 236c32eb1096 ("mm: migrate: clean up migrate_prep{_local}")', do_migrate_pages can return uninitialized variable 'err' (which is propagated to user-space as error) when 'from' and 'to' nodesets are identical. This can be reproduced with LTP migrate_pages01, which calls migrate_pages() with same set for both old/new_nodes. Add 'err' initialization back. Link: https://lkml.kernel.org/r/456a021c7ef3636d7668cec9dcb4a446a4244812.1609855564.git.jstancek@redhat.com Fixes: 236c32eb1096 ("mm: migrate: clean up migrate_prep{_local}") Signed-off-by: Jan Stancek Acked-by: Michal Hocko Acked-by: Yang Shi Cc: Zi Yan Cc: Jan Kara Cc: Matthew Wilcox Cc: Mel Gorman Cc: Song Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8cf96bd21341c..2c3a865020534 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1111,7 +1111,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { int busy = 0; - int err; + int err = 0; nodemask_t tmp; migrate_prep(); -- GitLab From 0eb98f1588c2cc7a79816d84ab18a55d254f481c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jan 2021 15:49:24 -0800 Subject: [PATCH 1713/1894] mm/hugetlb: fix potential missing huge page size info The huge page size is encoded for VM_FAULT_HWPOISON errors only. So if we return VM_FAULT_HWPOISON, huge page size would just be ignored. Link: https://lkml.kernel.org/r/20210107123449.38481-1-linmiaohe@huawei.com Fixes: aa50d3a7aa81 ("Encode huge page size for VM_FAULT_HWPOISON errors") Signed-off-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a2602969873dc..18f6ee3179002 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4371,7 +4371,7 @@ retry: * So we need to block hugepage fault by PG_hwpoison bit check. */ if (unlikely(PageHWPoison(page))) { - ret = VM_FAULT_HWPOISON | + ret = VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); goto backout_unlocked; } -- GitLab From 7e5f1126b54a29c078c07a5fe245e269f3c05500 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 12 Jan 2021 15:49:27 -0800 Subject: [PATCH 1714/1894] MAINTAINERS: add Vlastimil as slab allocators maintainer I would like to help with slab allocators maintenance, from the perspective of being responsible for SLAB and more recently also SLUB in an enterprise distro kernel and supporting its users. Recently I've been focusing on improving SLUB's debugging features, and patch review in the area, including the kmemcg accounting rewrite last year. Link: https://lkml.kernel.org/r/20210108110353.19971-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Christoph Lameter Acked-by: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index cc1e6a5ee6e67..28cbe0ec66105 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16319,6 +16319,7 @@ M: Pekka Enberg M: David Rientjes M: Joonsoo Kim M: Andrew Morton +M: Vlastimil Babka L: linux-mm@kvack.org S: Maintained F: include/linux/sl?b*.h -- GitLab From 6696d2a6f38c0beedf03c381edfc392ecf7631b4 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 12 Jan 2021 15:49:30 -0800 Subject: [PATCH 1715/1894] mm,hwpoison: fix printing of page flags Format %pG expects a lower case 'p' in order to print the flags. Fix it. Link: https://lkml.kernel.org/r/20210108085202.4506-1-osalvador@suse.de Fixes: 8295d535e2aa ("mm,hwpoison: refactor get_any_page") Signed-off-by: Oscar Salvador Reported-by: Dan Carpenter Reviewed-by: Anshuman Khandual Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5a38e9eade946..04d9f154a130d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1940,7 +1940,7 @@ retry: goto retry; } } else if (ret == -EIO) { - pr_info("%s: %#lx: unknown page type: %lx (%pGP)\n", + pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n", __func__, pfn, page->flags, &page->flags); } -- GitLab From eb351d75ce1e75b4f793d609efac08426ca50acd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 12 Jan 2021 15:49:33 -0800 Subject: [PATCH 1716/1894] mm/process_vm_access.c: include compat.h Fix the build error: mm/process_vm_access.c:277:5: error: implicit declaration of function 'in_compat_syscall'; did you mean 'in_ia32_syscall'? [-Werror=implicit-function-declaration] Fixes: 38dc5079da7081e "Fix compat regression in process_vm_rw()" Reported-by: syzbot+5b0d0de84d6c65b8dd2b@syzkaller.appspotmail.com Cc: Kyle Huey Cc: Jens Axboe Cc: Al Viro Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/process_vm_access.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 4bcc119580890..f5fee9cf90f8b 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include -- GitLab From a18caa97b1bda0a3d126a7be165ddcfc56c2dde6 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 11 Jan 2021 09:59:32 +0100 Subject: [PATCH 1717/1894] net: phy: smsc: fix clk error handling Commit bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support") added the phy clk support. The commit already checks if clk_get_optional() throw an error but instead of returning the error it ignores it. Fixes: bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support") Suggested-by: Jakub Kicinski Signed-off-by: Marco Felsch Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210111085932.28680-1-m.felsch@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 33372756a451f..ddb78fb4d6dc3 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -317,7 +317,8 @@ static int smsc_phy_probe(struct phy_device *phydev) /* Make clk optional to keep DTB backward compatibility. */ priv->refclk = clk_get_optional(dev, NULL); if (IS_ERR(priv->refclk)) - dev_err_probe(dev, PTR_ERR(priv->refclk), "Failed to request clock\n"); + return dev_err_probe(dev, PTR_ERR(priv->refclk), + "Failed to request clock\n"); ret = clk_prepare_enable(priv->refclk); if (ret) -- GitLab From 07b90056cb15ff9877dca0d8f1b6583d1051f724 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Jan 2021 01:09:43 +0200 Subject: [PATCH 1718/1894] net: dsa: unbind all switches from tree when DSA master unbinds Currently the following happens when a DSA master driver unbinds while there are DSA switches attached to it: $ echo 0000:00:00.5 > /sys/bus/pci/drivers/mscc_felix/unbind ------------[ cut here ]------------ WARNING: CPU: 0 PID: 392 at net/core/dev.c:9507 Call trace: rollback_registered_many+0x5fc/0x688 unregister_netdevice_queue+0x98/0x120 dsa_slave_destroy+0x4c/0x88 dsa_port_teardown.part.16+0x78/0xb0 dsa_tree_teardown_switches+0x58/0xc0 dsa_unregister_switch+0x104/0x1b8 felix_pci_remove+0x24/0x48 pci_device_remove+0x48/0xf0 device_release_driver_internal+0x118/0x1e8 device_driver_detach+0x28/0x38 unbind_store+0xd0/0x100 Located at the above location is this WARN_ON: /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); Other stacked interfaces, like VLAN, do indeed listen for NETDEV_UNREGISTER on the real_dev and also unregister themselves at that time, which is clearly the behavior that rollback_registered_many expects. But DSA interfaces are not VLAN. They have backing hardware (platform devices, PCI devices, MDIO, SPI etc) which have a life cycle of their own and we can't just trigger an unregister from the DSA framework when we receive a netdev notifier that the master unregisters. Luckily, there is something we can do, and that is to inform the driver core that we have a runtime dependency to the DSA master interface's device, and create a device link where that is the supplier and we are the consumer. Having this device link will make the DSA switch unbind before the DSA master unbinds, which is enough to avoid the WARN_ON from rollback_registered_many. Note that even before the blamed commit, DSA did nothing intelligent when the master interface got unregistered either. See the discussion here: https://lore.kernel.org/netdev/20200505210253.20311-1-f.fainelli@gmail.com/ But this time, at least the WARN_ON is loud enough that the upper_dev_link commit can be blamed. The advantage with this approach vs dev_hold(master) in the attached link is that the latter is not meant for long term reference counting. With dev_hold, the only thing that will happen is that when the user attempts an unbind of the DSA master, netdev_wait_allrefs will keep waiting and waiting, due to DSA keeping the refcount forever. DSA would not access freed memory corresponding to the master interface, but the unbind would still result in a freeze. Whereas with device links, graceful teardown is ensured. It even works with cascaded DSA trees. $ echo 0000:00:00.2 > /sys/bus/pci/drivers/fsl_enetc/unbind [ 1818.797546] device swp0 left promiscuous mode [ 1819.301112] sja1105 spi2.0: Link is Down [ 1819.307981] DSA: tree 1 torn down [ 1819.312408] device eno2 left promiscuous mode [ 1819.656803] mscc_felix 0000:00:00.5: Link is Down [ 1819.667194] DSA: tree 0 torn down [ 1819.711557] fsl_enetc 0000:00:00.2 eno2: Link is Down This approach allows us to keep the DSA framework absolutely unchanged, and the driver core will just know to unbind us first when the master goes away - as opposed to the large (and probably impossible) rework required if attempting to listen for NETDEV_UNREGISTER. As per the documentation at Documentation/driver-api/device_link.rst, specifying the DL_FLAG_AUTOREMOVE_CONSUMER flag causes the device link to be automatically purged when the consumer fails to probe or later unbinds. So we don't need to keep the consumer_link variable in struct dsa_switch. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20210111230943.3701806-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/master.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/dsa/master.c b/net/dsa/master.c index 5a0f6fec4271d..cb3a5cf99b258 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -309,8 +309,18 @@ static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead; + struct dsa_switch *ds = cpu_dp->ds; + struct device_link *consumer_link; int ret; + /* The DSA master must use SET_NETDEV_DEV for this to work. */ + consumer_link = device_link_add(ds->dev, dev->dev.parent, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!consumer_link) + netdev_err(dev, + "Failed to create a device link to DSA switch %s\n", + dev_name(ds->dev)); + rtnl_lock(); ret = dev_set_mtu(dev, mtu); rtnl_unlock(); -- GitLab From 91158e1680b164c8d101144ca916a3dca10c3e17 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Jan 2021 02:48:31 +0200 Subject: [PATCH 1719/1894] net: dsa: clear devlink port type before unregistering slave netdevs Florian reported a use-after-free bug in devlink_nl_port_fill found with KASAN: (devlink_nl_port_fill) (devlink_port_notify) (devlink_port_unregister) (dsa_switch_teardown.part.3) (dsa_tree_teardown_switches) (dsa_unregister_switch) (bcm_sf2_sw_remove) (platform_remove) (device_release_driver_internal) (device_links_unbind_consumers) (device_release_driver_internal) (device_driver_detach) (unbind_store) Allocated by task 31: alloc_netdev_mqs+0x5c/0x50c dsa_slave_create+0x110/0x9c8 dsa_register_switch+0xdb0/0x13a4 b53_switch_register+0x47c/0x6dc bcm_sf2_sw_probe+0xaa4/0xc98 platform_probe+0x90/0xf4 really_probe+0x184/0x728 driver_probe_device+0xa4/0x278 __device_attach_driver+0xe8/0x148 bus_for_each_drv+0x108/0x158 Freed by task 249: free_netdev+0x170/0x194 dsa_slave_destroy+0xac/0xb0 dsa_port_teardown.part.2+0xa0/0xb4 dsa_tree_teardown_switches+0x50/0xc4 dsa_unregister_switch+0x124/0x250 bcm_sf2_sw_remove+0x98/0x13c platform_remove+0x44/0x5c device_release_driver_internal+0x150/0x254 device_links_unbind_consumers+0xf8/0x12c device_release_driver_internal+0x84/0x254 device_driver_detach+0x30/0x34 unbind_store+0x90/0x134 What happens is that devlink_port_unregister emits a netlink DEVLINK_CMD_PORT_DEL message which associates the devlink port that is getting unregistered with the ifindex of its corresponding net_device. Only trouble is, the net_device has already been unregistered. It looks like we can stub out the search for a corresponding net_device if we clear the devlink_port's type. This looks like a bit of a hack, but also seems to be the reason why the devlink_port_type_clear function exists in the first place. Fixes: 3122433eb533 ("net: dsa: Register devlink ports before calling DSA driver setup()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Florian fainelli Reported-by: Florian Fainelli Link: https://lore.kernel.org/r/20210112004831.3778323-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 183003e45762a..a47e0f9b20d0a 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -353,9 +353,13 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { + struct devlink_port *dlp = &dp->devlink_port; + if (!dp->setup) return; + devlink_port_type_clear(dlp); + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: break; -- GitLab From cb82a54904a99df9e8f9e9d282046055dae5a730 Mon Sep 17 00:00:00 2001 From: Leon Schuermann Date: Mon, 11 Jan 2021 20:03:13 +0100 Subject: [PATCH 1720/1894] r8152: Add Lenovo Powered USB-C Travel Hub This USB-C Hub (17ef:721e) based on the Realtek RTL8153B chip used to use the cdc_ether driver. However, using this driver, with the system suspended the device constantly sends pause-frames as soon as the receive buffer fills up. This causes issues with other devices, where some Ethernet switches stop forwarding packets altogether. Using the Realtek driver (r8152) fixes this issue. Pause frames are no longer sent while the host system is suspended. Signed-off-by: Leon Schuermann Tested-by: Leon Schuermann Link: https://lore.kernel.org/r/20210111190312.12589-2-leon@is.currently.online Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8c1d61c2cbacb..6aaa0675c28a3 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -793,6 +793,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x721e, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c448d60898216..67cd6986634fb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6877,6 +6877,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x721e)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, -- GitLab From 2284bbd0cf3981462dc6d729c89851c66b05a66a Mon Sep 17 00:00:00 2001 From: Leon Schuermann Date: Mon, 11 Jan 2021 20:03:15 +0100 Subject: [PATCH 1721/1894] r8153_ecm: Add Lenovo Powered USB-C Hub as a fallback of r8152 This commit enables the use of the r8153_ecm driver, introduced with commit c1aedf015ebdd0 ("net/usb/r8153_ecm: support ECM mode for RTL8153") for the Lenovo Powered USB-C Hub (17ef:721e) based on the Realtek RTL8153B chip. This results in the following driver preference: - if r8152 is available, use the r8152 driver - if r8152 is not available, use the r8153_ecm driver This is done to prevent the NIC from constantly sending pause frames when the host system enters standby (fixed by using the r8152 driver in "r8152: Add Lenovo Powered USB-C Travel Hub"), while still allowing the device to work with the r8153_ecm driver as a fallback. Signed-off-by: Leon Schuermann Tested-by: Leon Schuermann Link: https://lore.kernel.org/r/20210111190312.12589-3-leon@is.currently.online Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8153_ecm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/r8153_ecm.c b/drivers/net/usb/r8153_ecm.c index 2c3fabd38b163..20b2df8d74ae1 100644 --- a/drivers/net/usb/r8153_ecm.c +++ b/drivers/net/usb/r8153_ecm.c @@ -122,12 +122,20 @@ static const struct driver_info r8153_info = { }; static const struct usb_device_id products[] = { +/* Realtek RTL8153 Based USB 3.0 Ethernet Adapters */ { USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_REALTEK, 0x8153, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&r8153_info, }, +/* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_LENOVO, 0x721e, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&r8153_info, +}, + { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); -- GitLab From 869c4d5eb1e6fbda66aa790c48bdb946d71494a0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 11 Jan 2021 04:26:39 -0500 Subject: [PATCH 1722/1894] bnxt_en: Improve stats context resource accounting with RDMA driver loaded. The function bnxt_get_ulp_stat_ctxs() does not count the stats contexts used by the RDMA driver correctly when the RDMA driver is freeing the MSIX vectors. It assumes that if the RDMA driver is registered, the additional stats contexts will be needed. This is not true when the RDMA driver is about to unregister and frees the MSIX vectors. This slight error leads to over accouting of the stats contexts needed after the RDMA driver has unloaded. This will cause some firmware warning and error messages in dmesg during subsequent config. changes or ifdown/ifup. Fix it by properly accouting for extra stats contexts only if the RDMA driver is registered and MSIX vectors have been successfully requested. Fixes: c027c6b4e91f ("bnxt_en: get rid of num_stat_ctxs variable") Reviewed-by: Yongping Zhang Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 8c8368c2f335c..64dbbb04b0434 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -222,8 +222,12 @@ int bnxt_get_ulp_msix_base(struct bnxt *bp) int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) { - if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) - return BNXT_MIN_ROCE_STAT_CTXS; + if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) { + struct bnxt_en_dev *edev = bp->edev; + + if (edev->ulp_tbl[BNXT_ROCE_ULP].msix_requested) + return BNXT_MIN_ROCE_STAT_CTXS; + } return 0; } -- GitLab From 687487751814a493fba953efb9b1542b2f90614c Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 11 Jan 2021 04:26:40 -0500 Subject: [PATCH 1723/1894] bnxt_en: Clear DEFRAG flag in firmware message when retry flashing. When the FW tells the driver to retry the INSTALL_UPDATE command after it has cleared the NVM area, the driver is not clearing the previously used ALLOWED_TO_DEFRAG flag. As a result the FW tries to defrag the NVM area a second time in a loop and can fail the request. Fixes: 1432c3f6a6ca ("bnxt_en: Retry installing FW package under NO_SPACE error condition.") Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9ff79d5d14c4c..2f8b193a772da 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2532,7 +2532,7 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware if (rc && ((struct hwrm_err_output *)&resp)->cmd_err == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { - install.flags |= + install.flags = cpu_to_le16(NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); rc = _hwrm_send_message_silent(bp, &install, @@ -2546,6 +2546,7 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware * UPDATE directory and try the flash again */ defrag_attempted = true; + install.flags = 0; rc = __bnxt_flash_nvram(bp->dev, BNX_DIR_TYPE_UPDATE, BNX_DIR_ORDINAL_FIRST, -- GitLab From 20bc80b6f582ad1151c52ca09ab66b472768c9c8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 12 Jan 2021 18:25:23 +0100 Subject: [PATCH 1724/1894] mptcp: more strict state checking for acks Syzkaller found a way to trigger division by zero in mptcp_subflow_cleanup_rbuf(). The current checks implemented into tcp_can_send_ack() are too week, let's be more accurate. Reported-by: Christoph Paasch Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling") Fixes: fd8976790a6c ("mptcp: be careful on MPTCP-level ack.") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6628d8d742030..2ff8c7caf74fc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -427,7 +427,7 @@ static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) static bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & - (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE)); + (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); } static void mptcp_send_ack(struct mptcp_sock *msk) -- GitLab From 76e2a55d16259b51116767b28b19d759bff43f72 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 12 Jan 2021 18:25:24 +0100 Subject: [PATCH 1725/1894] mptcp: better msk-level shutdown. Instead of re-implementing most of inet_shutdown, re-use such helper, and implement the MPTCP-specific bits at the 'proto' level. The msk-level disconnect() can now be invoked, lets provide a suitable implementation. As a side effect, this fixes bad state management for listener sockets. The latter could lead to division by 0 oops since commit ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling"). Fixes: 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine") Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 62 ++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 45 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2ff8c7caf74fc..81faeff8f3bb7 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2642,11 +2642,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - /* Should never be called. - * inet_stream_connect() calls ->disconnect, but that - * refers to the subflow socket, not the mptcp one. - */ - WARN_ON_ONCE(1); + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_flush_join_list(msk); + mptcp_for_each_subflow(msk, subflow) + tcp_disconnect(mptcp_subflow_tcp_sock(subflow), flags); return 0; } @@ -3089,6 +3090,14 @@ bool mptcp_finish_join(struct sock *ssk) return true; } +static void mptcp_shutdown(struct sock *sk, int how) +{ + pr_debug("sk=%p, how=%d", sk, how); + + if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) + __mptcp_wr_shutdown(sk); +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -3098,7 +3107,7 @@ static struct proto mptcp_prot = { .accept = mptcp_accept, .setsockopt = mptcp_setsockopt, .getsockopt = mptcp_getsockopt, - .shutdown = tcp_shutdown, + .shutdown = mptcp_shutdown, .destroy = mptcp_destroy, .sendmsg = mptcp_sendmsg, .recvmsg = mptcp_recvmsg, @@ -3344,43 +3353,6 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } -static int mptcp_shutdown(struct socket *sock, int how) -{ - struct mptcp_sock *msk = mptcp_sk(sock->sk); - struct sock *sk = sock->sk; - int ret = 0; - - pr_debug("sk=%p, how=%d", msk, how); - - lock_sock(sk); - - how++; - if ((how & ~SHUTDOWN_MASK) || !how) { - ret = -EINVAL; - goto out_unlock; - } - - if (sock->state == SS_CONNECTING) { - if ((1 << sk->sk_state) & - (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) - sock->state = SS_DISCONNECTING; - else - sock->state = SS_CONNECTED; - } - - sk->sk_shutdown |= how; - if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) - __mptcp_wr_shutdown(sk); - - /* Wake up anyone sleeping in poll. */ - sk->sk_state_change(sk); - -out_unlock: - release_sock(sk); - - return ret; -} - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -3394,7 +3366,7 @@ static const struct proto_ops mptcp_stream_ops = { .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = mptcp_listen, - .shutdown = mptcp_shutdown, + .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, @@ -3444,7 +3416,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, .listen = mptcp_listen, - .shutdown = mptcp_shutdown, + .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet6_sendmsg, -- GitLab From 7cd1af107a92eb63b93a96dc07406dcbc5269436 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Dec 2020 16:20:51 -0800 Subject: [PATCH 1726/1894] riscv: Trace irq on only interrupt is enabled We should call irq trace only if interrupt is going to be enabled during excecption handling. Otherwise, it results in following warning during boot with lock debugging enabled. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled) [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:4085 lockdep_hardirqs_on_prepare+0x22a/0x22e [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-00022-ge20097fb37e2-dirty #548 [ 0.000000] epc: c005d5d4 ra : c005d5d4 sp : c1c01e80 [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : 00000000 [ 0.000000] t1 : ffffffff t2 : 00000000 s0 : c1c01ea0 [ 0.000000] s1 : c100f360 a0 : 0000002d a1 : c00666ee [ 0.000000] a2 : 00000000 a3 : 00000000 a4 : 00000000 [ 0.000000] a5 : 00000000 a6 : c1c6b390 a7 : 3ffff00e [ 0.000000] s2 : c2384fe8 s3 : 00000000 s4 : 00000001 [ 0.000000] s5 : c1c0a980 s6 : c1d48000 s7 : c1613b4c [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 [ 0.000000] s11: 00000000 t3 : 00000000 t4 : 00000000 [ 0.000000] t5 : 00000001 t6 : 00000000 Fixes: 3c4697982982 ("riscv:Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT") Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 48de316c68c18..744f3209c48d0 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -124,15 +124,15 @@ skip_context_tracking: REG_L a1, (a1) jr a1 1: -#ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on -#endif /* * Exceptions run with interrupts enabled or disabled depending on the * state of SR_PIE in m/sstatus. */ andi t0, s1, SR_PIE beqz t0, 1f +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_on +#endif csrs CSR_STATUS, SR_IE 1: -- GitLab From 25fe2c9c4cd2e97c5f5b69f3aefe69aad3057936 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Jan 2021 17:21:21 +0100 Subject: [PATCH 1727/1894] smc: fix out of bound access in smc_nl_get_sys_info() smc_clc_get_hostname() sets the host pointer to a buffer which is not NULL-terminated (see smc_clc_init()). Reported-by: syzbot+f4708c391121cfc58396@syzkaller.appspotmail.com Fixes: 099b990bd11a ("net/smc: Add support for obtaining system information") Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 59342b519e347..8d866b4ed8f61 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -246,7 +246,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) goto errattr; smc_clc_get_hostname(&host); if (host) { - snprintf(hostname, sizeof(hostname), "%s", host); + memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); + hostname[SMC_MAX_HOSTNAME_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname)) goto errattr; } -- GitLab From 8a4465368964b4fbaf084760c94c7aabf61059fb Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Tue, 12 Jan 2021 17:21:22 +0100 Subject: [PATCH 1728/1894] net/smc: use memcpy instead of snprintf to avoid out of bounds read Using snprintf() to convert not null-terminated strings to null terminated strings may cause out of bounds read in the source string. Therefore use memcpy() and terminate the target string with a null afterwards. Fixes: a3db10efcc4c ("net/smc: Add support for obtaining SMCR device list") Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 17 +++++++++++------ net/smc/smc_ib.c | 6 +++--- net/smc/smc_ism.c | 3 ++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8d866b4ed8f61..0df85a12651e9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -258,7 +258,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) smc_ism_get_system_eid(smcd_dev, &seid); mutex_unlock(&smcd_dev_list.mutex); if (seid && smc_ism_is_v2_capable()) { - snprintf(smc_seid, sizeof(smc_seid), "%s", seid); + memcpy(smc_seid, seid, SMC_MAX_EID_LEN); + smc_seid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid)) goto errattr; } @@ -296,7 +297,8 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) goto errattr; - snprintf(smc_target, sizeof(smc_target), "%s", lgr->pnet_id); + memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); + smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) goto errattr; @@ -313,7 +315,7 @@ static int smc_nl_fill_lgr_link(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + char smc_ibname[IB_DEVICE_NAME_MAX]; u8 smc_gid_target[41]; struct nlattr *attrs; u32 link_uid = 0; @@ -462,7 +464,8 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd))) goto errattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", lgr->smcd->pnetid); + memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) goto errattr; @@ -475,10 +478,12 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, goto errv2attr; if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) goto errv2attr; - snprintf(smc_host, sizeof(smc_host), "%s", lgr->peer_hostname); + memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); + smc_host[SMC_MAX_HOSTNAME_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) goto errv2attr; - snprintf(smc_eid, sizeof(smc_eid), "%s", lgr->negotiated_eid); + memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); + smc_eid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) goto errv2attr; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index ddd7fac98b1d6..7d7ba0320d5ae 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -371,8 +371,8 @@ static int smc_nl_handle_dev_port(struct sk_buff *skb, if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcibdev->pnetid_by_user[port])) goto errattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", - (char *)&smcibdev->pnetid[port]); + memcpy(smc_pnet, &smcibdev->pnetid[port], SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) goto errattr; if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV, @@ -414,7 +414,7 @@ static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + char smc_ibname[IB_DEVICE_NAME_MAX]; struct smc_pci_dev smc_pci_dev; struct pci_dev *pci_dev; unsigned char is_crit; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 524ef64a191a5..9c6e95882553e 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -250,7 +250,8 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd, goto errattr; if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcd->pnetid_by_user)) goto errportattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", smcd->pnetid); + memcpy(smc_pnet, smcd->pnetid, SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) goto errportattr; -- GitLab From 80709af7325d179b433817f421c85449f2454046 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 23 Dec 2020 00:01:52 +0800 Subject: [PATCH 1729/1894] riscv: cacheinfo: Fix using smp_processor_id() in preemptible Use raw_smp_processor_id instead of smp_processor_id() to fix warning, BUG: using smp_processor_id() in preemptible [00000000] code: init/1 caller is debug_smp_processor_id+0x1c/0x26 CPU: 0 PID: 1 Comm: init Not tainted 5.10.0-rc4 #211 Call Trace: walk_stackframe+0x0/0xaa show_stack+0x32/0x3e dump_stack+0x76/0x90 check_preemption_disabled+0xaa/0xac debug_smp_processor_id+0x1c/0x26 get_cache_size+0x18/0x68 load_elf_binary+0x868/0xece bprm_execve+0x224/0x498 kernel_execve+0xdc/0x142 run_init_process+0x90/0x9e try_to_run_init_process+0x12/0x3c kernel_init+0xb4/0xf8 ret_from_exception+0x0/0xc The issue is found when CONFIG_DEBUG_PREEMPT enabled. Reviewed-by: Atish Patra Tested-by: Atish Patra Signed-off-by: Kefeng Wang [Palmer: Added a comment.] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cacheinfo.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index de59dd457b415..d867813570442 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -26,7 +26,16 @@ cache_get_priv_group(struct cacheinfo *this_leaf) static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id()); + /* + * Using raw_smp_processor_id() elides a preemptability check, but this + * is really indicative of a larger problem: the cacheinfo UABI assumes + * that cores have a homonogenous view of the cache hierarchy. That + * happens to be the case for the current set of RISC-V systems, but + * likely won't be true in general. Since there's no way to provide + * correct information for these systems via the current UABI we're + * just eliding the check for now. + */ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id()); struct cacheinfo *this_leaf; int index; -- GitLab From 0aa2ec8a475fb505fd98d93bbcf4e03beeeebcb6 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 2 Jan 2021 13:24:34 +0000 Subject: [PATCH 1730/1894] riscv: Fixup CONFIG_GENERIC_TIME_VSYSCALL The patch fix commit: ad5d112 ("riscv: use vDSO common flow to reduce the latency of the time-related functions"). The GENERIC_TIME_VSYSCALL should be CONFIG_GENERIC_TIME_VSYSCALL or vgettimeofday won't work. Signed-off-by: Guo Ren Reviewed-by: Pekka Enberg Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso.h | 2 +- arch/riscv/kernel/vdso.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 8454f746bbfd0..1453a2f563bcc 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,7 +10,7 @@ #include -#ifndef GENERIC_TIME_VSYSCALL +#ifndef CONFIG_GENERIC_TIME_VSYSCALL struct vdso_data { }; #endif diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 678204231700c..3f1d35e7c98a6 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -12,7 +12,7 @@ #include #include #include -#ifdef GENERIC_TIME_VSYSCALL +#ifdef CONFIG_GENERIC_TIME_VSYSCALL #include #else #include -- GitLab From 69e976831cd53f9ba304fd20305b2025ecc78eab Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 10 Jan 2021 14:21:05 +0000 Subject: [PATCH 1731/1894] MIPS: relocatable: fix possible boot hangup with KASLR enabled LLVM-built Linux triggered a boot hangup with KASLR enabled. arch/mips/kernel/relocate.c:get_random_boot() uses linux_banner, which is a string constant, as a random seed, but accesses it as an array of unsigned long (in rotate_xor()). When the address of linux_banner is not aligned to sizeof(long), such access emits unaligned access exception and hangs the kernel. Use PTR_ALIGN() to align input address to sizeof(long) and also align down the input length to prevent possible access-beyond-end. Fixes: 405bc8fd12f5 ("MIPS: Kernel: Implement KASLR using CONFIG_RELOCATABLE") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Alexander Lobakin Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/relocate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 47aeb3350a760..0e365b7c742d9 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -187,8 +187,14 @@ static int __init relocate_exception_table(long offset) static inline __init unsigned long rotate_xor(unsigned long hash, const void *area, size_t size) { - size_t i; - unsigned long *ptr = (unsigned long *)area; + const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash)); + size_t diff, i; + + diff = (void *)ptr - area; + if (unlikely(size < diff + sizeof(hash))) + return hash; + + size = ALIGN_DOWN(size - diff, sizeof(hash)); for (i = 0; i < size / sizeof(hash); i++) { /* Rotate by odd number of bits and XOR. */ -- GitLab From 7b490a8ab0f2d3ab8d838a4ff22ae86edafd34a1 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 11 Jan 2021 05:27:25 -0800 Subject: [PATCH 1732/1894] MIPS: OCTEON: fix unreachable code in octeon_irq_init_ciu The type of 'r' in octeon_irq_init_ciu is 'unsigned int', so 'r < 0' can't be true. Fix this by change the type of 'r' and 'i' from 'unsigned int' to 'int'. As 'i' won't be negative, this change works. Fixes: 99fbc70f8547 ("MIPS: Octeon: irq: Alloc desc before configuring IRQ") Signed-off-by: Menglong Dong Reviewed-by: Alexander Sverdlin Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/octeon-irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index bd47e15d02c73..be5d4afcd30f9 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1444,7 +1444,7 @@ static void octeon_irq_setup_secondary_ciu2(void) static int __init octeon_irq_init_ciu( struct device_node *ciu_node, struct device_node *parent) { - unsigned int i, r; + int i, r; struct irq_chip *chip; struct irq_chip *chip_edge; struct irq_chip *chip_mbox; -- GitLab From ef3a575baf53571dc405ee4028e26f50856898e7 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 12 Jan 2021 12:53:58 +0100 Subject: [PATCH 1733/1894] xen/privcmd: allow fetching resource sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow issuing an IOCTL_PRIVCMD_MMAP_RESOURCE ioctl with num = 0 and addr = 0 in order to fetch the size of a specific resource. Add a shortcut to the default map resource path, since fetching the size requires no address to be passed in, and thus no VMA to setup. This is missing from the initial implementation, and causes issues when mapping resources that don't have fixed or known sizes. Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Tested-by: Andrew Cooper Cc: stable@vger.kernel.org # >= 4.18 Link: https://lore.kernel.org/r/20210112115358.23346-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index b0c73c58f9874..720a7b7abd46d 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -717,14 +717,15 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata) return 0; } -static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) +static long privcmd_ioctl_mmap_resource(struct file *file, + struct privcmd_mmap_resource __user *udata) { struct privcmd_data *data = file->private_data; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct privcmd_mmap_resource kdata; xen_pfn_t *pfns = NULL; - struct xen_mem_acquire_resource xdata; + struct xen_mem_acquire_resource xdata = { }; int rc; if (copy_from_user(&kdata, udata, sizeof(kdata))) @@ -734,6 +735,22 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) if (data->domid != DOMID_INVALID && data->domid != kdata.dom) return -EPERM; + /* Both fields must be set or unset */ + if (!!kdata.addr != !!kdata.num) + return -EINVAL; + + xdata.domid = kdata.dom; + xdata.type = kdata.type; + xdata.id = kdata.id; + + if (!kdata.addr && !kdata.num) { + /* Query the size of the resource. */ + rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); + if (rc) + return rc; + return __put_user(xdata.nr_frames, &udata->num); + } + mmap_write_lock(mm); vma = find_vma(mm, kdata.addr); @@ -768,10 +785,6 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) } else vma->vm_private_data = PRIV_VMA_LOCKED; - memset(&xdata, 0, sizeof(xdata)); - xdata.domid = kdata.dom; - xdata.type = kdata.type; - xdata.id = kdata.id; xdata.frame = kdata.idx; xdata.nr_frames = kdata.num; set_xen_guest_handle(xdata.frame_list, pfns); -- GitLab From df06824767cc9a32fbdb0e3d3b7e169292a5b5fe Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 7 Jan 2021 14:53:10 +0000 Subject: [PATCH 1734/1894] arm64: entry: remove redundant IRQ flag tracing All EL0 returns go via ret_to_user(), which masks IRQs and notifies lockdep and tracing before calling into do_notify_resume(). Therefore, there's no need for do_notify_resume() to call trace_hardirqs_off(), and the comment is stale. The call is simply redundant. In ret_to_user() we call exit_to_user_mode(), which notifies lockdep and tracing the IRQs will be enabled in userspace, so there's no need for el0_svc_common() to call trace_hardirqs_on() before returning. Further, at the start of ret_to_user() we call trace_hardirqs_off(), so not only is this redundant, but it is immediately undone. In addition to being redundant, the trace_hardirqs_on() in el0_svc_common() leaves lockdep inconsistent with the hardware state, and is liable to cause issues for any C code or instrumentation between this and the call to trace_hardirqs_off() which undoes it in ret_to_user(). This patch removes the redundant tracing calls and associated stale comments. Fixes: 23529049c684 ("arm64: entry: fix non-NMI user<->kernel transitions") Signed-off-by: Mark Rutland Acked-by: Will Deacon Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20210107145310.44616-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 7 ------- arch/arm64/kernel/syscall.c | 9 +-------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f71d6ce4673f5..6237486ff6bb7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { - /* - * The assembly code enters us with IRQs off, but it hasn't - * informed the tracing code of that for efficiency reasons. - * Update the trace code with the current status. - */ - trace_hardirqs_off(); - do { if (thread_flags & _TIF_NEED_RESCHED) { /* Unmask Debug and SError for the next task */ diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index f61e9d8cc55a1..0bfac95fe4643 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -165,15 +165,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { - /* - * We're off to userspace, where interrupts are - * always enabled after we restore the flags from - * the SPSR. - */ - trace_hardirqs_on(); + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) return; - } local_daif_restore(DAIF_PROCCTX); } -- GitLab From b90d72a6bfdb5e5c62cd223a8cdf4045bfbcb94d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:18:55 +0000 Subject: [PATCH 1735/1894] Revert "arm64: Enable perf events based hard lockup detector" This reverts commit 367c820ef08082e68df8a3bc12e62393af21e4b5. lockup_detector_init() makes heavy use of per-cpu variables and must be called with preemption disabled. Usually, it's handled early during boot in kernel_init_freeable(), before SMP has been initialised. Since we do not know whether or not our PMU interrupt can be signalled as an NMI until considerably later in the boot process, the Arm PMU driver attempts to re-initialise the lockup detector off the back of a device_initcall(). Unfortunately, this is called from preemptible context and results in the following splat: | BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 | caller is debug_smp_processor_id+0x20/0x2c | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #276 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x3c0 | show_stack+0x20/0x6c | dump_stack+0x2f0/0x42c | check_preemption_disabled+0x1cc/0x1dc | debug_smp_processor_id+0x20/0x2c | hardlockup_detector_event_create+0x34/0x18c | hardlockup_detector_perf_init+0x2c/0x134 | watchdog_nmi_probe+0x18/0x24 | lockup_detector_init+0x44/0xa8 | armv8_pmu_driver_init+0x54/0x78 | do_one_initcall+0x184/0x43c | kernel_init_freeable+0x368/0x380 | kernel_init+0x1c/0x1cc | ret_from_fork+0x10/0x30 Rather than bodge this with raw_smp_processor_id() or randomly disabling preemption, simply revert the culprit for now until we figure out how to do this properly. Reported-by: Lecopzer Chen Signed-off-by: Will Deacon Acked-by: Mark Rutland Cc: Sumit Garg Cc: Alexandru Elisei Link: https://lore.kernel.org/r/20201221162249.3119-1-lecopzer.chen@mediatek.com Link: https://lore.kernel.org/r/20210112221855.10666-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 -- arch/arm64/kernel/perf_event.c | 41 ++-------------------------------- drivers/perf/arm_pmu.c | 5 ----- include/linux/perf/arm_pmu.h | 2 -- 4 files changed, 2 insertions(+), 48 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 05e17351e4f33..f39568b28ec1c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -174,8 +174,6 @@ config ARM64 select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS - select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 38bb07eff8720..3605f77ad4df1 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -23,8 +23,6 @@ #include #include #include -#include -#include /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { - int ret; - if (acpi_disabled) - ret = platform_driver_register(&armv8_pmu_driver); + return platform_driver_register(&armv8_pmu_driver); else - ret = arm_pmu_acpi_probe(armv8_pmuv3_init); - - /* - * Try to re-initialize lockup detector after PMU init in - * case PMU events are triggered via NMIs. - */ - if (ret == 0 && arm_pmu_irq_is_nmi()) - lockup_detector_init(); - - return ret; + return arm_pmu_acpi_probe(armv8_pmuv3_init); } device_initcall(armv8_pmu_driver_init) @@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; } - -#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF -/* - * Safe maximum CPU frequency in case a particular platform doesn't implement - * cpufreq driver. Although, architecture doesn't put any restrictions on - * maximum frequency but 5 GHz seems to be safe maximum given the available - * Arm CPUs in the market which are clocked much less than 5 GHz. On the other - * hand, we can't make it much higher as it would lead to a large hard-lockup - * detection timeout on parts which are running slower (eg. 1GHz on - * Developerbox) and doesn't possess a cpufreq driver. - */ -#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz -u64 hw_nmi_get_sample_period(int watchdog_thresh) -{ - unsigned int cpu = smp_processor_id(); - unsigned long max_cpu_freq; - - max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; - if (!max_cpu_freq) - max_cpu_freq = SAFE_MAX_CPU_FREQ; - - return (u64)max_cpu_freq * watchdog_thresh; -} -#endif diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 794a37d508537..cb2f55f450e4a 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -726,11 +726,6 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) return per_cpu(hw_events->irq, cpu); } -bool arm_pmu_irq_is_nmi(void) -{ - return has_nmi; -} - /* * PMU hardware loses all context when a CPU goes offline. * When a CPU is hotplugged back in, since some hardware registers are diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index bf7966776c557..505480217cf1a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,8 +163,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif -bool arm_pmu_irq_is_nmi(void); - /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- GitLab From 71e70184f1d1314ad56e834d1befc07daa2af8e6 Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Tue, 12 Jan 2021 09:58:13 +0800 Subject: [PATCH 1736/1894] arm64: rename S_FRAME_SIZE to PT_REGS_SIZE S_FRAME_SIZE is the size of the pt_regs structure, no longer the size of the kernel stack frame, the name is misleading. In keeping with arm32, rename S_FRAME_SIZE to PT_REGS_SIZE. Signed-off-by: Jianlin Lv Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210112015813.2340969-1-Jianlin.Lv@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/entry-ftrace.S | 12 ++++++------ arch/arm64/kernel/entry.S | 14 +++++++------- arch/arm64/kernel/probes/kprobes_trampoline.S | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f42fd9e339815..3017844635874 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -75,7 +75,7 @@ int main(void) DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); - DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_COMPAT DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index a338f40e64d39..b3e4f9a088b1a 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -35,7 +35,7 @@ */ .macro ftrace_regs_entry, allregs=0 /* Make room for pt_regs, plus a callee frame */ - sub sp, sp, #(S_FRAME_SIZE + 16) + sub sp, sp, #(PT_REGS_SIZE + 16) /* Save function arguments (and x9 for simplicity) */ stp x0, x1, [sp, #S_X0] @@ -61,15 +61,15 @@ .endif /* Save the callsite's SP and LR */ - add x10, sp, #(S_FRAME_SIZE + 16) + add x10, sp, #(PT_REGS_SIZE + 16) stp x9, x10, [sp, #S_LR] /* Save the PC after the ftrace callsite */ str x30, [sp, #S_PC] /* Create a frame record for the callsite above pt_regs */ - stp x29, x9, [sp, #S_FRAME_SIZE] - add x29, sp, #S_FRAME_SIZE + stp x29, x9, [sp, #PT_REGS_SIZE] + add x29, sp, #PT_REGS_SIZE /* Create our frame record within pt_regs. */ stp x29, x30, [sp, #S_STACKFRAME] @@ -120,7 +120,7 @@ ftrace_common_return: ldr x9, [sp, #S_PC] /* Restore the callsite's SP */ - add sp, sp, #S_FRAME_SIZE + 16 + add sp, sp, #PT_REGS_SIZE + 16 ret x9 SYM_CODE_END(ftrace_common) @@ -130,7 +130,7 @@ SYM_CODE_START(ftrace_graph_caller) ldr x0, [sp, #S_PC] sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) add x1, sp, #S_LR // parent_ip (callsite's LR) - ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP) + ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP) bl prepare_ftrace_return b ftrace_common_return SYM_CODE_END(ftrace_graph_caller) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a8c3e7aaca749..c9bae73f2621a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -75,7 +75,7 @@ alternative_else_nop_endif .endif #endif - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE #ifdef CONFIG_VMAP_STACK /* * Test whether the SP has overflowed, without corrupting a GPR. @@ -96,7 +96,7 @@ alternative_else_nop_endif * userspace, and can clobber EL0 registers to free up GPRs. */ - /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */ + /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */ msr tpidr_el0, x0 /* Recover the original x0 value and stash it in tpidrro_el0 */ @@ -253,7 +253,7 @@ alternative_else_nop_endif scs_load tsk, x20 .else - add x21, sp, #S_FRAME_SIZE + add x21, sp, #PT_REGS_SIZE get_current_task tsk .endif /* \el == 0 */ mrs x22, elr_el1 @@ -377,7 +377,7 @@ alternative_else_nop_endif ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp + add sp, sp, #PT_REGS_SIZE // restore sp .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 @@ -580,12 +580,12 @@ __bad_stack: /* * Store the original GPRs to the new stack. The orginal SP (minus - * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry. + * PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry. */ - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE kernel_entry 1 mrs x0, tpidr_el0 - add x0, x0, #S_FRAME_SIZE + add x0, x0, #PT_REGS_SIZE str x0, [sp, #S_SP] /* Stash the regs for handle_bad_stack */ diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 890ca72c5a514..288a84e253ccb 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -25,7 +25,7 @@ stp x24, x25, [sp, #S_X24] stp x26, x27, [sp, #S_X26] stp x28, x29, [sp, #S_X28] - add x0, sp, #S_FRAME_SIZE + add x0, sp, #PT_REGS_SIZE stp lr, x0, [sp, #S_LR] /* * Construct a useful saved PSTATE @@ -62,7 +62,7 @@ .endm SYM_CODE_START(kretprobe_trampoline) - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE save_all_base_regs @@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline) restore_all_base_regs - add sp, sp, #S_FRAME_SIZE + add sp, sp, #PT_REGS_SIZE ret SYM_CODE_END(kretprobe_trampoline) -- GitLab From c35a824c31834d947fb99b0c608c1b9f922b4ba0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Jan 2021 10:19:56 +0100 Subject: [PATCH 1737/1894] arm64: make atomic helpers __always_inline With UBSAN enabled and building with clang, there are occasionally warnings like WARNING: modpost: vmlinux.o(.text+0xc533ec): Section mismatch in reference from the function arch_atomic64_or() to the variable .init.data:numa_nodes_parsed The function arch_atomic64_or() references the variable __initdata numa_nodes_parsed. This is often because arch_atomic64_or lacks a __initdata annotation or the annotation of numa_nodes_parsed is wrong. for functions that end up not being inlined as intended but operating on __initdata variables. Mark these as __always_inline, along with the corresponding asm-generic wrappers. Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210108092024.4034860-1-arnd@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic.h | 10 +++++----- include/asm-generic/bitops/atomic.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 015ddffaf6caa..b56a4b2bc2486 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -17,7 +17,7 @@ #include #define ATOMIC_OP(op) \ -static inline void arch_##op(int i, atomic_t *v) \ +static __always_inline void arch_##op(int i, atomic_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, op) \ -static inline int arch_##op##name(int i, atomic_t *v) \ +static __always_inline int arch_##op##name(int i, atomic_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return) #undef ATOMIC_FETCH_OPS #define ATOMIC64_OP(op) \ -static inline void arch_##op(long i, atomic64_t *v) \ +static __always_inline void arch_##op(long i, atomic64_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, op) \ -static inline long arch_##op##name(long i, atomic64_t *v) \ +static __always_inline long arch_##op##name(long i, atomic64_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return) #undef ATOMIC64_FETCH_OP #undef ATOMIC64_FETCH_OPS -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) { return __lse_ll_sc_body(atomic64_dec_if_positive, v); } diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index dd90c9792909d..0e7316a86240b 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -11,19 +11,19 @@ * See Documentation/atomic_bitops.txt for details. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void set_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void clear_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void clear_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void change_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void change_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p); -- GitLab From 3499ba8198cad47b731792e5e56b9ec2a78a83a2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Jan 2021 13:26:02 +0000 Subject: [PATCH 1738/1894] xen: Fix event channel callback via INTX/GSI For a while, event channel notification via the PCI platform device has been broken, because we attempt to communicate with xenstore before we even have notifications working, with the xs_reset_watches() call in xs_init(). We tend to get away with this on Xen versions below 4.0 because we avoid calling xs_reset_watches() anyway, because xenstore might not cope with reading a non-existent key. And newer Xen *does* have the vector callback support, so we rarely fall back to INTX/GSI delivery. To fix it, clean up a bit of the mess of xs_init() and xenbus_probe() startup. Call xs_init() directly from xenbus_init() only in the !XS_HVM case, deferring it to be called from xenbus_probe() in the XS_HVM case instead. Then fix up the invocation of xenbus_probe() to happen either from its device_initcall if the callback is available early enough, or when the callback is finally set up. This means that the hack of calling xenbus_probe() from a workqueue after the first interrupt, or directly from the PCI platform device setup, is no longer needed. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210113132606.422794-2-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 2 +- drivers/xen/events/events_base.c | 10 ---- drivers/xen/platform-pci.c | 1 - drivers/xen/xenbus/xenbus.h | 1 + drivers/xen/xenbus/xenbus_comms.c | 8 --- drivers/xen/xenbus/xenbus_probe.c | 81 +++++++++++++++++++++++++------ include/xen/xenbus.h | 2 +- 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 60e901cd0de6a..5a957a9a09843 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -371,7 +371,7 @@ static int __init xen_guest_init(void) } gnttab_init(); if (!xen_initial_domain()) - xenbus_probe(NULL); + xenbus_probe(); /* * Making sure board specific code will not set up ops for diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6038c4c35db5a..bbebe248b7264 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -2010,16 +2010,6 @@ static struct irq_chip xen_percpu_chip __read_mostly = { .irq_ack = ack_dynirq, }; -int xen_set_callback_via(uint64_t via) -{ - struct xen_hvm_param a; - a.domid = DOMID_SELF; - a.index = HVM_PARAM_CALLBACK_IRQ; - a.value = via; - return HYPERVISOR_hvm_op(HVMOP_set_param, &a); -} -EXPORT_SYMBOL_GPL(xen_set_callback_via); - #ifdef CONFIG_XEN_PVHVM /* Vector callbacks are better than PCI interrupts to receive event * channel notifications because we can receive vector callbacks on any diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index dd911e1ff782c..9db557b76511b 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -149,7 +149,6 @@ static int platform_pci_probe(struct pci_dev *pdev, ret = gnttab_init(); if (ret) goto grant_out; - xenbus_probe(NULL); return 0; grant_out: gnttab_free_auto_xlat_frames(); diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 2a93b7c9c1599..dc15373354144 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -115,6 +115,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); int xenbus_probe_devices(struct xen_bus_type *bus); +void xenbus_probe(void); void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index eb5151fc8efab..e5fda0256feb3 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -57,16 +57,8 @@ DEFINE_MUTEX(xs_response_mutex); static int xenbus_irq; static struct task_struct *xenbus_task; -static DECLARE_WORK(probe_work, xenbus_probe); - - static irqreturn_t wake_waiting(int irq, void *unused) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); - } - wake_up(&xb_waitq); return IRQ_HANDLED; } diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 44634d970a5ca..c8f0282bb6497 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -683,29 +683,76 @@ void unregister_xenstore_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(struct work_struct *unused) +void xenbus_probe(void) { xenstored_ready = 1; + /* + * In the HVM case, xenbus_init() deferred its call to + * xs_init() in case callbacks were not operational yet. + * So do it now. + */ + if (xen_store_domain_type == XS_HVM) + xs_init(); + /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -EXPORT_SYMBOL_GPL(xenbus_probe); -static int __init xenbus_probe_initcall(void) +/* + * Returns true when XenStore init must be deferred in order to + * allow the PCI platform device to be initialised, before we + * can actually have event channel interrupts working. + */ +static bool xs_hvm_defer_init_for_callback(void) { - if (!xen_domain()) - return -ENODEV; +#ifdef CONFIG_XEN_PVHVM + return xen_store_domain_type == XS_HVM && + !xen_have_vector_callback; +#else + return false; +#endif +} - if (xen_initial_domain() || xen_hvm_domain()) - return 0; +static int __init xenbus_probe_initcall(void) +{ + /* + * Probe XenBus here in the XS_PV case, and also XS_HVM unless we + * need to wait for the platform PCI device to come up. + */ + if (xen_store_domain_type == XS_PV || + (xen_store_domain_type == XS_HVM && + !xs_hvm_defer_init_for_callback())) + xenbus_probe(); - xenbus_probe(NULL); return 0; } - device_initcall(xenbus_probe_initcall); +int xen_set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; + int ret; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + a.value = via; + + ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a); + if (ret) + return ret; + + /* + * If xenbus_probe_initcall() deferred the xenbus_probe() + * due to the callback not functioning yet, we can do it now. + */ + if (!xenstored_ready && xs_hvm_defer_init_for_callback()) + xenbus_probe(); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_set_callback_via); + /* Set up event channel for xenstored which is run as a local process * (this is normally used only in dom0) */ @@ -818,11 +865,17 @@ static int __init xenbus_init(void) break; } - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - pr_warn("Error initializing xenstore comms: %i\n", err); - goto out_error; + /* + * HVM domains may not have a functional callback yet. In that + * case let xs_init() be called from xenbus_probe(), which will + * get invoked at an appropriate time. + */ + if (xen_store_domain_type != XS_HVM) { + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; + } } if ((xen_store_domain_type != XS_LOCAL) && diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 00c7235ae93e7..2c43b0ef1e4d5 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -192,7 +192,7 @@ void xs_suspend_cancel(void); struct work_struct; -void xenbus_probe(struct work_struct *); +void xenbus_probe(void); #define XENBUS_IS_ERR_READ(str) ({ \ if (!IS_ERR(str) && strlen(str) == 0) { \ -- GitLab From 8f4fd86aa5d6aa122619623910065d236592e37c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:55 +0000 Subject: [PATCH 1739/1894] xen: Set platform PCI device INTX affinity to CPU0 With INTX or GSI delivery, Xen uses the event channel structures of CPU0. If the interrupt gets handled by Linux on a different CPU, then no events are seen as pending. Rather than introducing locking to allow other CPUs to process CPU0's events, just ensure that the PCI interrupts happens only on CPU0. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-3-dwmw2@infradead.org Signed-off-by: Juergen Gross --- drivers/xen/platform-pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 9db557b76511b..18f0ed8b1f93b 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -132,6 +132,13 @@ static int platform_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); goto out; } + /* + * It doesn't strictly *have* to run on CPU0 but it sure + * as hell better process the event channel ports delivered + * to CPU0. + */ + irq_set_affinity(pdev->irq, cpumask_of(0)); + callback_via = get_callback_via(pdev); ret = xen_set_callback_via(callback_via); if (ret) { -- GitLab From b36b0fe96af13460278bf9b173beced1bd15f85d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:56 +0000 Subject: [PATCH 1740/1894] x86/xen: Add xen_no_vector_callback option to test PCI INTX delivery It's useful to be able to test non-vector event channel delivery, to make sure Linux will work properly on older Xen which doesn't have it. It's also useful for those working on Xen and Xen-compatible hypervisors, because there are guest kernels still in active use which use PCI INTX even when vector delivery is available. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-4-dwmw2@infradead.org Signed-off-by: Juergen Gross --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ arch/x86/xen/enlighten_hvm.c | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 44fde25bb221e..035d27b6272cd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5964,6 +5964,10 @@ This option is obsoleted by the "nopv" option, which has equivalent effect for XEN platform. + xen_no_vector_callback + [KNL,X86,XEN] Disable the vector callback for Xen + event channel interrupts. + xen_scrub_pages= [XEN] Boolean option to control scrubbing pages before giving them back to Xen, for use by other domains. Can be also changed at runtime diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9e87ab010c82b..ec50b7423a4c8 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -188,6 +188,8 @@ static int xen_cpu_dead_hvm(unsigned int cpu) return 0; } +static bool no_vector_callback __initdata; + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -207,7 +209,7 @@ static void __init xen_hvm_guest_init(void) xen_panic_handler_init(); - if (xen_feature(XENFEAT_hvm_callback_vector)) + if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; xen_hvm_smp_init(); @@ -233,6 +235,13 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); +static __init int xen_parse_no_vector_callback(char *arg) +{ + no_vector_callback = true; + return 0; +} +early_param("xen_no_vector_callback", xen_parse_no_vector_callback); + bool __init xen_hvm_need_lapic(void) { if (xen_pv_domain()) -- GitLab From 4621dc6a5bf1235249e92231db30c96dfd1a18b9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:57 +0000 Subject: [PATCH 1741/1894] x86/xen: Don't register Xen IPIs when they aren't going to be used In the case where xen_have_vector_callback is false, we still register the IPI vectors in xen_smp_intr_init() for the secondary CPUs even though they aren't going to be used. Stop doing that. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-5-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index ec50b7423a4c8..e68ea5f4ad1ce 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -164,10 +164,10 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) else per_cpu(xen_vcpu_id, cpu) = cpu; rc = xen_vcpu_setup(cpu); - if (rc) + if (rc || !xen_have_vector_callback) return rc; - if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) + if (xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); -- GitLab From 3d7746bea92530e8695258a3cf3ddec7a135edd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:58 +0000 Subject: [PATCH 1742/1894] x86/xen: Fix xen_hvm_smp_init() when vector callback not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the IPI-related functions in the smp_ops should be conditional on the vector callback being available. The rest should still happen: • xen_hvm_smp_prepare_boot_cpu() This function does two things, both of which should still happen if there is no vector callback support. The call to xen_vcpu_setup() for vCPU0 should still happen as it just sets up the vcpu_info for CPU0. That does happen for the secondary vCPUs too, from xen_cpu_up_prepare_hvm(). The second thing it does is call xen_init_spinlocks(), which perhaps counter-intuitively should *also* still be happening in the case without vector callbacks, so that it can clear its local xen_pvspin flag and disable the virt_spin_lock_key accordingly. Checking xen_have_vector_callback in xen_init_spinlocks() itself would affect PV guests, so set the global nopvspin flag in xen_hvm_smp_init() instead, when vector callbacks aren't available. • xen_hvm_smp_prepare_cpus() This does some IPI-related setup by calling xen_smp_intr_init() and xen_init_lock_cpu(), which can be made conditional. And it sets the xen_vcpu_id to XEN_VCPU_ID_INVALID for all possible CPUS, which does need to happen. • xen_smp_cpus_done() This offlines any vCPUs which doesn't fit in the global shared_info page, if separate vcpu_info placement isn't available. That part also needs to happen regardless of vector callback support. • xen_hvm_cpu_die() This doesn't actually do anything other than commin_cpu_die() right right now in the !vector_callback case; all three teardown functions it calls should be no-ops. But to guard against future regressions it's useful to call it anyway, and for it to explicitly check for xen_have_vector_callback before calling those additional functions. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-6-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/x86/xen/smp_hvm.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f5e7db4f82abb..056430a1080bb 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -33,9 +33,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) int cpu; native_smp_prepare_cpus(max_cpus); - WARN_ON(xen_smp_intr_init(0)); - xen_init_lock_cpu(0); + if (xen_have_vector_callback) { + WARN_ON(xen_smp_intr_init(0)); + xen_init_lock_cpu(0); + } for_each_possible_cpu(cpu) { if (cpu == 0) @@ -50,9 +52,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) static void xen_hvm_cpu_die(unsigned int cpu) { if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); + if (xen_have_vector_callback) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + } } } #else @@ -64,14 +68,17 @@ static void xen_hvm_cpu_die(unsigned int cpu) void __init xen_hvm_smp_init(void) { - if (!xen_have_vector_callback) + smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; + smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; + smp_ops.smp_cpus_done = xen_smp_cpus_done; + smp_ops.cpu_die = xen_hvm_cpu_die; + + if (!xen_have_vector_callback) { + nopvspin = true; return; + } - smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; - smp_ops.cpu_die = xen_hvm_cpu_die; smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; - smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; - smp_ops.smp_cpus_done = xen_smp_cpus_done; } -- GitLab From b4411616c26f26c4017b8fa4d3538b1a02028733 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Jan 2021 12:42:24 +0000 Subject: [PATCH 1743/1894] io_uring: fix null-deref in io_disable_sqo_submit general protection fault, probably for non-canonical address 0xdffffc0000000022: 0000 [#1] KASAN: null-ptr-deref in range [0x0000000000000110-0x0000000000000117] RIP: 0010:io_ring_set_wakeup_flag fs/io_uring.c:6929 [inline] RIP: 0010:io_disable_sqo_submit+0xdb/0x130 fs/io_uring.c:8891 Call Trace: io_uring_create fs/io_uring.c:9711 [inline] io_uring_setup+0x12b1/0x38e0 fs/io_uring.c:9739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 io_disable_sqo_submit() might be called before user rings were allocated, don't do io_ring_set_wakeup_flag() in those cases. Reported-by: syzbot+ab412638aeb652ded540@syzkaller.appspotmail.com Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b0e6d8e607a33..66db2c46ab824 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8895,7 +8895,8 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); /* make sure callers enter the ring to get error */ - io_ring_set_wakeup_flag(ctx); + if (ctx->rings) + io_ring_set_wakeup_flag(ctx); } /* -- GitLab From 06585c497b55045ec21aa8128e340f6a6587351c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Jan 2021 12:42:25 +0000 Subject: [PATCH 1744/1894] io_uring: do sqo disable on install_fd error WARNING: CPU: 0 PID: 8494 at fs/io_uring.c:8717 io_ring_ctx_wait_and_kill+0x4f2/0x600 fs/io_uring.c:8717 Call Trace: io_uring_release+0x3e/0x50 fs/io_uring.c:8759 __fput+0x283/0x920 fs/file_table.c:280 task_work_run+0xdd/0x190 kernel/task_work.c:140 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:174 [inline] exit_to_user_mode_prepare+0x249/0x250 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x50 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 failed io_uring_install_fd() is a special case, we don't do io_ring_ctx_wait_and_kill() directly but defer it to fput, though still need to io_disable_sqo_submit() before. note: it doesn't fix any real problem, just a warning. That's because sqring won't be available to the userspace in this case and so SQPOLL won't submit anything. Reported-by: syzbot+9c9c35374c0ecac06516@syzkaller.appspotmail.com Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 66db2c46ab824..372be9caf3402 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9708,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, */ ret = io_uring_install_fd(ctx, file); if (ret < 0) { + io_disable_sqo_submit(ctx); /* fput will clean it up */ fput(file); return ret; -- GitLab From d52e419ac8b50c8bef41b398ed13528e75d7ad48 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 Jan 2021 15:23:51 +0000 Subject: [PATCH 1745/1894] rxrpc: Fix handling of an unsupported token type in rxrpc_read() Clang static analysis reports the following: net/rxrpc/key.c:657:11: warning: Assigned value is garbage or undefined toksize = toksizes[tok++]; ^ ~~~~~~~~~~~~~~~ rxrpc_read() contains two consecutive loops. The first loop calculates the token sizes and stores the results in toksizes[] and the second one uses the array. When there is an error in identifying the token in the first loop, the token is skipped, no change is made to the toksizes[] array. When the same error happens in the second loop, the token is not skipped. This will cause the toksizes[] array to be out of step and will overrun past the calculated sizes. Fix this by making both loops log a message and return an error in this case. This should only happen if a new token type is incompletely implemented, so it should normally be impossible to trigger this. Fixes: 9a059cd5ca7d ("rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read()") Reported-by: Tom Rix Signed-off-by: David Howells Reviewed-by: Tom Rix Link: https://lore.kernel.org/r/161046503122.2445787.16714129930607546635.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/key.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 9631aa8543b51..8d2073e0e3da5 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -598,7 +598,7 @@ static long rxrpc_read(const struct key *key, default: /* we have a ticket we can't encode */ pr_err("Unsupported key token type (%u)\n", token->security_index); - continue; + return -ENOPKG; } _debug("token[%u]: toksize=%u", ntoks, toksize); @@ -674,7 +674,9 @@ static long rxrpc_read(const struct key *key, break; default: - break; + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; } ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, -- GitLab From a95d25dd7b94a5ba18246da09b4218f132fed60e Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Tue, 12 Jan 2021 15:59:15 +0000 Subject: [PATCH 1746/1894] rxrpc: Call state should be read with READ_ONCE() under some circumstances The call state may be changed at any time by the data-ready routine in response to received packets, so if the call state is to be read and acted upon several times in a function, READ_ONCE() must be used unless the call state lock is held. As it happens, we used READ_ONCE() to read the state a few lines above the unmarked read in rxrpc_input_data(), so use that value rather than re-reading it. Fixes: a158bdd3247b ("rxrpc: Fix call timeouts") Signed-off-by: Baptiste Lepers Signed-off-by: David Howells Link: https://lore.kernel.org/r/161046715522.2450566.488819910256264150.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 667c44aa5a63c..dc201363f2c48 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -430,7 +430,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) return; } - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; -- GitLab From 77b6ec01c29aade01701aa30bf1469acc7f2be76 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 5 Jan 2021 12:21:26 -0800 Subject: [PATCH 1747/1894] cifs: check pointer before freeing clang static analysis reports this problem dfs_cache.c:591:2: warning: Argument to kfree() is a constant address (18446744073709551614), which is not memory allocated by malloc() kfree(vi); ^~~~~~~~~ In dfs_cache_del_vol() the volume info pointer 'vi' being freed is the return of a call to find_vol(). The large constant address is find_vol() returning an error. Add an error check to dfs_cache_del_vol() similar to the one done in dfs_cache_update_vol(). Fixes: 54be1f6c1c37 ("cifs: Add DFS cache routines") Signed-off-by: Tom Rix Reviewed-by: Nathan Chancellor CC: # v5.0+ Signed-off-by: Steve French --- fs/cifs/dfs_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 6ad6ba5f6ebee..0fdb0de7ff861 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1260,7 +1260,8 @@ void dfs_cache_del_vol(const char *fullpath) vi = find_vol(fullpath); spin_unlock(&vol_list_lock); - kref_put(&vi->refcnt, vol_release); + if (!IS_ERR(vi)) + kref_put(&vi->refcnt, vol_release); } /** -- GitLab From 2659d3bff3e1b000f49907d0839178b101a89887 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 13 Jan 2021 14:16:16 -0300 Subject: [PATCH 1748/1894] cifs: fix interrupted close commands Retry close command if it gets interrupted to not leak open handles on the server. Signed-off-by: Paulo Alcantara (SUSE) Reported-by: Duncan Findlay Suggested-by: Pavel Shilovsky Fixes: 6988a619f5b7 ("cifs: allow syscalls to be restarted in __smb_send_rqst()") Cc: stable@vger.kernel.org Reviewd-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 067eb44c7baa8..794fc3b68b4f9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3248,7 +3248,7 @@ close_exit: free_rsp_buf(resp_buftype, rsp); /* retry close in a worker thread if this one is interrupted */ - if (rc == -EINTR) { + if (is_interrupt_error(rc)) { int tmp_rc; tmp_rc = smb2_handle_cancelled_close(tcon, persistent_fid, -- GitLab From c13e7af042270724b42a466edc48a70a43f571f2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 12 Jan 2021 01:13:40 -0800 Subject: [PATCH 1749/1894] fs: cifs: remove unneeded variable in smb3_fs_context_dup 'rc' in smb3_fs_context_dup is not used and can be removed. Signed-off-by: Menglong Dong Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/fs_context.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 0afccbbed2e65..076bcadc756a7 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -303,8 +303,6 @@ do { \ int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx) { - int rc = 0; - memcpy(new_ctx, ctx, sizeof(*ctx)); new_ctx->prepath = NULL; new_ctx->mount_options = NULL; @@ -327,7 +325,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(nodename); DUP_CTX_STR(iocharset); - return rc; + return 0; } static int -- GitLab From ed6b1920f84bc5c3d666dc383ff3bbc60f0f62a5 Mon Sep 17 00:00:00 2001 From: YANG LI Date: Mon, 11 Jan 2021 17:15:28 +0800 Subject: [PATCH 1750/1894] cifs: connect: style: Simplify bool comparison Fix the following coccicheck warning: ./fs/cifs/connect.c:3740:6-21: WARNING: Comparison of 0/1 to bool variable Signed-off-by: YANG LI Reported-by: Abaci Robot Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b9df85506938d..5d39129406ea6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3740,7 +3740,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, if (!ses->binding) { ses->capabilities = server->capabilities; - if (linuxExtEnabled == 0) + if (!linuxExtEnabled) ses->capabilities &= (~server->vals->cap_unix); if (ses->auth_key.response) { -- GitLab From e54fd0716c3db20c0cba73fee2c3a4274b08c24e Mon Sep 17 00:00:00 2001 From: YANG LI Date: Wed, 30 Dec 2020 14:35:45 +0800 Subject: [PATCH 1751/1894] cifs: style: replace one-element array with flexible-array There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use "flexible array members"[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/ deprecated.html#zero-length-and-one-element-arrays Signed-off-by: YANG LI Reported-by: Abaci Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 204a622b89ed3..d85edf5d14294 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -424,7 +424,7 @@ struct smb2_rdma_transform_capabilities_context { __le16 TransformCount; __u16 Reserved1; __u32 Reserved2; - __le16 RDMATransformIds[1]; + __le16 RDMATransformIds[]; } __packed; /* Signing algorithms */ -- GitLab From b42b3a2744b3e8f427de79896720c72823af91ad Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 12 Jan 2021 10:16:43 +0100 Subject: [PATCH 1752/1894] can: isotp: isotp_getname(): fix kernel information leak Initialize the sockaddr_can structure to prevent a data leak to user space. Suggested-by: Cong Wang Reported-by: syzbot+057884e2f453e8afebc8@syzkaller.appspotmail.com Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20210112091643.11789-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/isotp.c b/net/can/isotp.c index 7839c3b9e5bea..3ef7f78e553bc 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1155,6 +1155,7 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (peer) return -EOPNOTSUPP; + memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = so->ifindex; addr->can_addr.tp.rx_id = so->rxid; -- GitLab From ca4c6ebeeb50112f5178f14bfb6d9e8ddf148545 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Wed, 13 Jan 2021 15:31:00 +0800 Subject: [PATCH 1753/1894] can: mcp251xfd: mcp251xfd_handle_rxif_one(): fix wrong NULL pointer check If alloc_canfd_skb() returns NULL, 'cfg' is an uninitialized variable, so we should check 'skb' rather than 'cfd' after calling alloc_canfd_skb(priv->ndev, &cfd). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Signed-off-by: Qinglang Miao Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210113073100.79552-1-miaoqinglang@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 36235afb0bc66..f07e8b737d31e 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1491,7 +1491,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, else skb = alloc_can_skb(priv->ndev, (struct can_frame **)&cfd); - if (!cfd) { + if (!skb) { stats->rx_dropped++; return 0; } -- GitLab From 1f02efd1bb35bee95feed6aab46d1217f29d555b Mon Sep 17 00:00:00 2001 From: Seb Laveze Date: Tue, 12 Jan 2021 15:01:22 +0100 Subject: [PATCH 1754/1894] net: stmmac: use __napi_schedule() for PREEMPT_RT Use of __napi_schedule_irqoff() is not safe with PREEMPT_RT in which hard interrupts are not disabled while running the threaded interrupt. Using __napi_schedule() works for both PREEMPT_RT and mainline Linux, just at the cost of an additional check if interrupts are disabled for mainline (since they are already disabled). Similar to the fix done for enetc commit 215602a8d212 ("enetc: use napi_schedule to be compatible with PREEMPT_RT") Signed-off-by: Seb Laveze Link: https://lore.kernel.org/r/20210112140121.1487619-1-sebastien.laveze@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5b1c12ff98c05..2d90d6856ec58 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2184,7 +2184,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->rx_napi); + __napi_schedule(&ch->rx_napi); } } @@ -2193,7 +2193,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->tx_napi); + __napi_schedule(&ch->tx_napi); } } -- GitLab From 7128c834d30e6b2cf649f14d8fc274941786d0e1 Mon Sep 17 00:00:00 2001 From: Cristian Dumitrescu Date: Mon, 11 Jan 2021 18:11:38 +0000 Subject: [PATCH 1755/1894] i40e: fix potential NULL pointer dereferencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the function i40e_construct_skb_zc only frees the input xdp buffer when the output skb is successfully built. On error, the function i40e_clean_rx_irq_zc does not commit anything for the current packet descriptor and simply exits the packet descriptor processing loop, with the plan to restart the processing of this descriptor on the next invocation. Therefore, on error the ring next-to-clean pointer should not advance, the xdp i.e. *bi buffer should not be freed and the current buffer info should not be invalidated by setting *bi to NULL. Therefore, the *bi should only be set to NULL when the function i40e_construct_skb_zc is successful, otherwise a NULL *bi will be dereferenced when the work for the current descriptor is eventually restarted. Fixes: 3b4f0b66c2b3 ("i40e, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL") Signed-off-by: Cristian Dumitrescu Acked-by: Björn Töpel Link: https://lore.kernel.org/r/20210111181138.49757-1-cristian.dumitrescu@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 47eb9c584a123..492ce213208d2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -348,12 +348,12 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) * SBP is *not* set in PRT_SBPVSI (default not set). */ skb = i40e_construct_skb_zc(rx_ring, *bi); - *bi = NULL; if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; break; } + *bi = NULL; cleaned_count++; i40e_inc_ntc(rx_ring); -- GitLab From 8ad2a970d2010add3963e7219eb50367ab3fa4eb Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Tue, 12 Jan 2021 11:06:00 +0530 Subject: [PATCH 1756/1894] cxgb4/chtls: Fix tid stuck due to wrong update of qid TID stuck is seen when there is a race in CPL_PASS_ACCEPT_RPL/CPL_ABORT_REQ and abort is arriving before the accept reply, which sets the queue number. In this case HW ends up sending CPL_ABORT_RPL_RSS to an incorrect ingress queue. V1->V2: - Removed the unused variable len in chtls_set_quiesce_ctrl(). V2->V3: - As kfree_skb() has a check for null skb, so removed this check before calling kfree_skb() in func chtls_send_reset(). Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Link: https://lore.kernel.org/r/20210112053600.24590-1-ayush.sawal@chelsio.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 7 ++++ .../chelsio/inline_crypto/chtls/chtls.h | 4 ++ .../chelsio/inline_crypto/chtls/chtls_cm.c | 32 ++++++++++++++- .../chelsio/inline_crypto/chtls/chtls_hw.c | 41 +++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 92473dda55d9f..22a0220123ade 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -40,6 +40,13 @@ #define TCB_L2T_IX_M 0xfffULL #define TCB_L2T_IX_V(x) ((x) << TCB_L2T_IX_S) +#define TCB_T_FLAGS_W 1 +#define TCB_T_FLAGS_S 0 +#define TCB_T_FLAGS_M 0xffffffffffffffffULL +#define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S) + +#define TCB_FIELD_COOKIE_TFLAG 1 + #define TCB_SMAC_SEL_W 0 #define TCB_SMAC_SEL_S 24 #define TCB_SMAC_SEL_M 0xffULL diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 72bb123d53db7..9e23780136421 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -575,7 +575,11 @@ int send_tx_flowc_wr(struct sock *sk, int compl, void chtls_tcp_push(struct sock *sk, int flags); int chtls_push_frames(struct chtls_sock *csk, int comp); int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val); +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t); int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode, int cipher_type); +void chtls_set_quiesce_ctrl(struct sock *sk, int val); void skb_entail(struct sock *sk, struct sk_buff *skb, int flags); unsigned int keyid_to_addr(int start_addr, int keyid); void free_tls_keyid(struct sock *sk); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 51dd030b3b366..e5cfbe196ba66 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -32,6 +32,7 @@ #include "chtls.h" #include "chtls_cm.h" #include "clip_tbl.h" +#include "t4_tcb.h" /* * State transitions and actions for close. Note that if we are in SYN_SENT @@ -267,7 +268,9 @@ static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb) if (sk->sk_state != TCP_SYN_RECV) chtls_send_abort(sk, mode, skb); else - goto out; + chtls_set_tcb_field_rpl_skb(sk, TCB_T_FLAGS_W, + TCB_T_FLAGS_V(TCB_T_FLAGS_M), 0, + TCB_FIELD_COOKIE_TFLAG, 1); return; out: @@ -1949,6 +1952,8 @@ static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb) else if (tcp_sk(sk)->linger2 < 0 && !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN)) chtls_abort_conn(sk, skb); + else if (csk_flag_nochk(csk, CSK_TX_DATA_SENT)) + chtls_set_quiesce_ctrl(sk, 0); break; default: pr_info("close_con_rpl in bad state %d\n", sk->sk_state); @@ -2292,6 +2297,28 @@ static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb) return 0; } +static int chtls_set_tcb_rpl(struct chtls_dev *cdev, struct sk_buff *skb) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb) + RSS_HDR; + unsigned int hwtid = GET_TID(rpl); + struct sock *sk; + + sk = lookup_tid(cdev->tids, hwtid); + + /* return EINVAL if socket doesn't exist */ + if (!sk) + return -EINVAL; + + /* Reusing the skb as size of cpl_set_tcb_field structure + * is greater than cpl_abort_req + */ + if (TCB_COOKIE_G(rpl->cookie) == TCB_FIELD_COOKIE_TFLAG) + chtls_send_abort(sk, CPL_ABORT_SEND_RST, NULL); + + kfree_skb(skb); + return 0; +} + chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_PASS_OPEN_RPL] = chtls_pass_open_rpl, [CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl, @@ -2304,5 +2331,6 @@ chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = chtls_conn_cpl, [CPL_ABORT_REQ_RSS] = chtls_conn_cpl, [CPL_ABORT_RPL_RSS] = chtls_conn_cpl, - [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_SET_TCB_RPL] = chtls_set_tcb_rpl, }; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c index a4fb463af22ac..1e67140b0f801 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c @@ -88,6 +88,24 @@ static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val) return ret < 0 ? ret : 0; } +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t) +{ + struct sk_buff *skb; + unsigned int wrlen; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return; + + __set_tcb_field(sk, skb, word, mask, val, cookie, 0); + send_or_defer(sk, tcp_sk(sk), skb, through_l2t); +} + /* * Set one of the t_flags bits in the TCB. */ @@ -113,6 +131,29 @@ static int chtls_set_tcb_quiesce(struct sock *sk, int val) TF_RX_QUIESCE_V(val)); } +void chtls_set_quiesce_ctrl(struct sock *sk, int val) +{ + struct chtls_sock *csk; + struct sk_buff *skb; + unsigned int wrlen; + int ret; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_ATOMIC); + if (!skb) + return; + + csk = rcu_dereference_sk_user_data(sk); + + __set_tcb_field(sk, skb, 1, TF_RX_QUIESCE_V(1), 0, 0, 1); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id); + ret = cxgb4_ofld_send(csk->egress_dev, skb); + if (ret < 0) + kfree_skb(skb); +} + /* TLS Key bitmap processing */ int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi) { -- GitLab From 5b55299eed78538cc4746e50ee97103a1643249c Mon Sep 17 00:00:00 2001 From: David Wu Date: Wed, 13 Jan 2021 11:41:09 +0800 Subject: [PATCH 1757/1894] net: stmmac: Fixed mtu channged by cache aligned Since the original mtu is not used when the mtu is updated, the mtu is aligned with cache, this will get an incorrect. For example, if you want to configure the mtu to be 1500, but mtu 1536 is configured in fact. Fixed: eaf4fac478077 ("net: stmmac: Do not accept invalid MTU values") Signed-off-by: David Wu Link: https://lore.kernel.org/r/20210113034109.27865-1-david.wu@rock-chips.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2d90d6856ec58..26b971cd4da5a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4026,6 +4026,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) { struct stmmac_priv *priv = netdev_priv(dev); int txfifosz = priv->plat->tx_fifo_size; + const int mtu = new_mtu; if (txfifosz == 0) txfifosz = priv->dma_cap.tx_fifo_size; @@ -4043,7 +4044,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) if ((txfifosz < new_mtu) || (new_mtu > BUF_SIZE_16KiB)) return -EINVAL; - dev->mtu = new_mtu; + dev->mtu = mtu; netdev_update_features(dev); -- GitLab From c25a053e15778f6b4d6553708673736e27a6c2cf Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 13 Jan 2021 10:24:10 +0800 Subject: [PATCH 1758/1894] riscv: Fix KASAN memory mapping. Use virtual address instead of physical address when translating the address to shadow memory by kasan_mem_to_shadow(). Signed-off-by: Nick Hu Signed-off-by: Nylon Chen Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 12ddd1f6bf70c..a8a2ffd9114aa 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -93,8 +93,8 @@ void __init kasan_init(void) VMALLOC_END)); for_each_mem_range(i, &_start, &_end) { - void *start = (void *)_start; - void *end = (void *)_end; + void *start = (void *)__va(_start); + void *end = (void *)__va(_end); if (start >= end) break; -- GitLab From 41131a5e54ae7ba5a2bb8d7b30d1818b3f5b13d2 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 12 Jan 2021 11:55:15 +0000 Subject: [PATCH 1759/1894] powerpc/vdso: Fix clock_gettime_fallback for vdso32 The second argument of __kernel_clock_gettime64 points to a struct __kernel_timespec, with 64-bit time_t, so use the clock_gettime64 syscall in the fallback function for the 32-bit VDSO. Similarly, clock_getres_fallback should use the clock_getres_time64 syscall, though it isn't yet called from the 32-bit VDSO. Fixes: d0e3fc69d00d ("powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32") Signed-off-by: Andreas Schwab [chleroy: Moved into a single #ifdef __powerpc64__ block] Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c0ab0eb3cc80687c326f76ff0dd5762b8812ecc.1610452505.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 81671aa365b34..77c635c2c90d4 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -103,6 +103,8 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz); } +#ifdef __powerpc64__ + static __always_inline int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { @@ -115,10 +117,22 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); } -#ifdef CONFIG_VDSO32 +#else #define BUILD_VDSO32 1 +static __always_inline +int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_gettime64, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_getres_time64, _clkid, (unsigned long)_ts); +} + static __always_inline int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) { -- GitLab From be969b7cfbcfa8a835a528f1dc467f0975c6d883 Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:10 -0800 Subject: [PATCH 1760/1894] dts: phy: fix missing mdio device and probe failure of vsc8541-01 device HiFive unleashed A00 board has VSC8541-01 ethernet phy, this device is identified as a Revision B device as described in device identification registers. In order to use this phy in the unmanaged mode, it requires a specific reset sequence of logical 0-1-0-1 transition on the NRESET pin as documented here [1]. Currently, the bootloader (fsbl or u-boot-spl) takes care of the phy reset. If due to some reason the phy device hasn't received the reset by the prior stages before the linux macb driver comes into the picture, the MACB mii bus gets probed but the mdio scan fails and is not even able to read the phy ID registers. It gives an error message: "libphy: MACB_mii_bus: probed mdio_bus 10090000.ethernet-ffffffff: MDIO device at address 0 is missing." Thus adding the device OUI (Organizationally Unique Identifier) to the phy device node helps to probe the phy device. [1]: VSC8541-01 datasheet: https://www.mouser.com/ds/2/523/Microsemi_VSC8541-01_Datasheet_10496_V40-1148034.pdf Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4a2729f5ca3f0..60846e88ae4b1 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -88,6 +88,7 @@ phy-mode = "gmii"; phy-handle = <&phy0>; phy0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0771"; reg = <0>; }; }; -- GitLab From a0fa9d727043da2238432471e85de0bdb8a8df65 Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:11 -0800 Subject: [PATCH 1761/1894] dts: phy: add GPIO number and active state used for phy reset The GEMGXL_RST line on HiFive Unleashed is pulled low and is using GPIO number 12. Add these reset-gpio details to dt-node using which the linux phylib can reset the phy. Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 60846e88ae4b1..24d75a146e02d 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -90,6 +90,7 @@ phy0: ethernet-phy@0 { compatible = "ethernet-phy-id0007.0771"; reg = <0>; + reset-gpios = <&gpio 12 GPIO_ACTIVE_LOW>; }; }; -- GitLab From 0983834a83931606a647c275e5d4165ce4e7b49f Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:12 -0800 Subject: [PATCH 1762/1894] riscv: defconfig: enable gpio support for HiFive Unleashed Ethernet phy VSC8541-01 on HiFive Unleashed has its reset line connected to a gpio, so enable GPIO driver's required to reset the phy. Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index d222d353d86d4..8c3d1e4517031 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -64,6 +64,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y +CONFIG_GPIOLIB=y +CONFIG_GPIO_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y -- GitLab From 101c2fae5108d78915517d0279323ee215e70df2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 15:14:29 -0500 Subject: [PATCH 1763/1894] MAINTAINERS: update radeon/amdgpu/amdkfd git trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FDO is out of space, so move to gitlab. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cc1e6a5ee6e67..82a47081f62a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -907,7 +907,7 @@ AMD KFD M: Felix Kuehling L: amd-gfx@lists.freedesktop.org S: Supported -T: git git://people.freedesktop.org/~agd5f/linux +T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd*.[ch] F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h @@ -14818,7 +14818,7 @@ M: Alex Deucher M: Christian König L: amd-gfx@lists.freedesktop.org S: Supported -T: git git://people.freedesktop.org/~agd5f/linux +T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/ F: drivers/gpu/drm/radeon/ F: include/uapi/drm/amdgpu_drm.h -- GitLab From ff9346dbabbb6595c5c20d90d88ae4a2247487a9 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Thu, 7 Jan 2021 18:53:03 -0500 Subject: [PATCH 1764/1894] drm/amdgpu: fix DRM_INFO flood if display core is not supported (bug 210921) This fix bug 210921 where DRM_INFO floods log when hitting an unsupported ASIC in amdgpu_device_asic_has_dc_support(). This info should be only called once. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=210921 Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b69c34074d8d7..087afab67e221 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3034,7 +3034,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif default: if (amdgpu_dc > 0) - DRM_INFO("Display Core has been requested via kernel parameter " + DRM_INFO_ONCE("Display Core has been requested via kernel parameter " "but isn't supported by ASIC, ignoring\n"); return false; } -- GitLab From f14a5c34d143f6627f0be70c0de1d962f3a6ff1c Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Tue, 5 Jan 2021 15:04:01 +0800 Subject: [PATCH 1765/1894] drm/amdgpu/psp: fix psp gfx ctrl cmds psp GFX_CTRL_CMD_ID_CONSUME_CMD different for windows and linux, according to psp, linux cmds are not correct. v2: only correct GFX_CTRL_CMD_ID_CONSUME_CMD. Signed-off-by: Victor Zhao Reviewed-by: Emily.Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index d65a5339d354a..3ba7bdfde65d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -47,7 +47,7 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */ - GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x00090000, /* send interrupt to psp for updating write pointer of vf */ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ -- GitLab From 73644143b31cb95866c19e0d94be9e3127ec3a6b Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Wed, 9 Dec 2020 18:04:04 -0500 Subject: [PATCH 1766/1894] drm/amd/display: NULL pointer hang [Why] In dc_link_dp_set_test_pattern, we assume all pipes have a stream, which can cause null pointer dereference. [How] Add a null pointer check before accessing stream. Tested-by: Daniel Wheeler Signed-off-by: Qingqing Zhuo Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 2fc12239b22cb..1bd1a09352906 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3992,7 +3992,7 @@ bool dc_link_dp_set_test_pattern( unsigned int cust_pattern_size) { struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx; - struct pipe_ctx *pipe_ctx = &pipes[0]; + struct pipe_ctx *pipe_ctx = NULL; unsigned int lane; unsigned int i; unsigned char link_qual_pattern[LANE_COUNT_DP_MAX] = {0}; @@ -4002,12 +4002,18 @@ bool dc_link_dp_set_test_pattern( memset(&training_pattern, 0, sizeof(training_pattern)); for (i = 0; i < MAX_PIPES; i++) { + if (pipes[i].stream == NULL) + continue; + if (pipes[i].stream->link == link && !pipes[i].top_pipe && !pipes[i].prev_odm_pipe) { pipe_ctx = &pipes[i]; break; } } + if (pipe_ctx == NULL) + return false; + /* Reset CRTC Test Pattern if it is currently running and request is VideoMode */ if (link->test_pattern_enabled && test_pattern == DP_TEST_PATTERN_VIDEO_MODE) { -- GitLab From 4336be4b07ed3b03a18ac35564c3127eeea05ab6 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Mon, 14 Dec 2020 15:35:02 -0500 Subject: [PATCH 1767/1894] drm/amd/display: Initialize stack variable [WHY] The stack variable "val" is potentially unpopulate it, so initialize it with the value 0xf (indicating an invalid mux) Tested-by: Daniel Wheeler Signed-off-by: Wesley Chalmers Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 100ce0e28fd5a..b096011acb490 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -470,7 +470,7 @@ void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock) unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - uint32_t val = 0; + uint32_t val = 0xf; if (opp_id < MAX_OPP && REG(MUX[opp_id])) REG_GET(MUX[opp_id], MPC_OUT_MUX, &val); -- GitLab From 0eb31a82e378cab17beec1d213e1414e9fea1767 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Tue, 29 Dec 2020 23:33:32 -0500 Subject: [PATCH 1768/1894] drm/amd/display: Add a missing DCN3.01 API mapping [why] Required for DSC MST Tested-by: Daniel Wheeler Signed-off-by: Nikola Cornij Reviewed-by: Zhan Liu Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 4825c5c1c6ed6..35f5bf08ae96e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1731,6 +1731,7 @@ static struct resource_funcs dcn301_res_pool_funcs = { .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, .set_mcif_arb_params = dcn30_set_mcif_arb_params, -- GitLab From 9d03bb102028b4a3f4a64d6069b219e2e1c1f306 Mon Sep 17 00:00:00 2001 From: "Li, Roman" Date: Wed, 30 Dec 2020 18:03:02 +0000 Subject: [PATCH 1769/1894] drm/amd/display: disable dcn10 pipe split by default [Why] The initial purpose of dcn10 pipe split is to support some high bandwidth mode which requires dispclk greater than max dispclk. By initial bring up power measurement data, it showed power consumption is less with pipe split for dcn block. This could be reason for enable pipe split by default. By battery life measurement of some Chromebooks, result shows battery life is longer with pipe split disabled. [How] Disable pipe split by default. Pipe split could be still enabled when required dispclk is greater than max dispclk. Tested-by: Daniel Wheeler Signed-off-by: Hersen Wu Signed-off-by: Roman Li Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 36745193c391c..90e912fef2b36 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -608,8 +608,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_pplib_clock_request = false, .disable_pplib_wm_range = false, .pplib_wm_report_mode = WM_REPORT_DEFAULT, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = true, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .voltage_align_fclk = true, .disable_stereo_support = true, -- GitLab From 4eec66c014e9a406d8d453de958f6791d05427e4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 11 Jan 2021 11:31:51 -0500 Subject: [PATCH 1770/1894] Revert "drm/amd/display: Fixed Intermittent blue screen on OLED panel" commit a861736dae64 ("drm/amd/display: Fixed Intermittent blue screen on OLED panel") causes power regression for many users. It seems that this change causes the MCLK to get forced high; this creates a regression for many users since their devices were not able to drop to a low state after this change. For this reason, this reverts commit a861736dae644a0d7abbca0c638ae6aad28feeb8. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1407 Cc: Aurabindo Pillai Cc: Alex Deucher Cc: Harry Wentland Cc: Naveed Ashfaq Cc: Hersen Wu Cc: Roman Li Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 860e72a51534c..80170f9721ce9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -2635,14 +2635,15 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP } if (mode_lib->vba.DRAMClockChangeSupportsVActive && - mode_lib->vba.MinActiveDRAMClockChangeMargin > 60 && - mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) { mode_lib->vba.DRAMClockChangeWatermark += 25; for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.DRAMClockChangeWatermark > - dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) - mode_lib->vba.MinTTUVBlank[k] += 25; + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + if (mode_lib->vba.DRAMClockChangeWatermark > + dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) + mode_lib->vba.MinTTUVBlank[k] += 25; + } } mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; -- GitLab From 8b335bff643f3b39935c7377dbcd361c5b605d98 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Mon, 11 Jan 2021 16:05:28 -0500 Subject: [PATCH 1771/1894] drm/amdkfd: Fix out-of-bounds read in kdf_create_vcrat_image_cpu() KASAN reported a slab-out-of-bounds read of size 1 in kdf_create_vcrat_image_cpu(). This occurs when, for example, when on an x86_64 with a single NUMA node because kfd_fill_iolink_info_for_cpu() is a no-op, but afterwards the sub_type_hdr->length, which is out-of-bounds, is read and multiplied by entries. Fortunately, entries is 0 in this case so the overall crat_table->length is still correct. Check if there were any entries before de-referencing sub_type_hdr which may be pointing to out-of-bounds memory. Fixes: b7b6c38529c9 ("drm/amdkfd: Calculate CPU VCRAT size dynamically (v2)") Suggested-by: Felix Kuehling Signed-off-by: Jeremy Cline Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 8cac497c2c459..a5640a6138cf2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,11 +1040,14 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) (struct crat_subtype_iolink *)sub_type_hdr); if (ret < 0) return ret; - crat_table->length += (sub_type_hdr->length * entries); - crat_table->total_entries += entries; - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length * entries); + if (entries) { + crat_table->length += (sub_type_hdr->length * entries); + crat_table->total_entries += entries; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length * entries); + } #else pr_info("IO link not available for non x86 platforms\n"); #endif -- GitLab From 04eb6e773e9f3167a5921d74e8ad99cdcc4166c3 Mon Sep 17 00:00:00 2001 From: chen gong Date: Fri, 8 Jan 2021 12:46:44 +0800 Subject: [PATCH 1772/1894] drm/amdgpu/gfx10: add updated GOLDEN_TSC_COUNT_UPPER/LOWER register offsets for VGH The address of the GOLDEN_TSC_COUNT_UPPER/GOLDEN_TSC_COUNT_LOWER for Vnagogh are different from the others. The offset of the GOLDEN_TSC_COUNT_UPPER for Vangogh is 0x0025 by calculation. The offset of the GOLDEN_TSC_COUNT_LOWER for Vangogh is 0x0026 by calculation. Signed-off-by: chen gong Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ba10867845255..34141b785aa4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -99,6 +99,10 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025 +#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 +#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1 #define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 #define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 #define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 @@ -7377,8 +7381,16 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + switch (adev->asic_type) { + case CHIP_VANGOGH: + clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) | + ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); + break; + default: + clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | + ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + break; + } mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; -- GitLab From 12f2df72205fe348481d941c3e593e8068d2d23d Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 13 Jan 2021 20:13:17 +0800 Subject: [PATCH 1773/1894] drm/amdgpu: fix vram type and bandwidth error for DDR5 and DDR4 This patch is to update atomfirmware parser for the memory type and bandwidth of DDR5 and DDR4. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 53 +++++++++++++------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 306077884a679..6107ac91db250 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -112,6 +112,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; + struct atom_integrated_system_info_v2_1 v21; }; union umc_info { @@ -209,24 +210,42 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (adev->flags & AMD_IS_APU) { igp_info = (union igp_info *) (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 11: - mem_channel_number = igp_info->v11.umachannelnumber; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; - mem_type = igp_info->v11.memorytype; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + switch (frev) { + case 1: + switch (crev) { + case 11: + case 12: + mem_channel_number = igp_info->v11.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v11.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; + default: + return -EINVAL; + } break; - case 12: - mem_channel_number = igp_info->v12.umachannelnumber; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; - mem_type = igp_info->v12.memorytype; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + case 2: + switch (crev) { + case 1: + case 2: + mem_channel_number = igp_info->v21.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v21.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; + default: + return -EINVAL; + } break; default: return -EINVAL; -- GitLab From 21702c8cae51535e09b91341a069503c6ef3d2a3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 2 Oct 2020 10:58:55 -0400 Subject: [PATCH 1774/1894] drm/amdgpu: add green_sardine device id (v2) Add green_sardine PCI id support and map it to renoir asic type. v2: add apu flag Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 72efd579ec5ee..e191383f34f78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1085,6 +1085,7 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, -- GitLab From 53f1e7f6a1720f8299b5283857eedc8f07d29533 Mon Sep 17 00:00:00 2001 From: mengwang Date: Wed, 12 Aug 2020 11:49:29 +0800 Subject: [PATCH 1775/1894] drm/amdgpu: add new device id for Renior add DID 0x164C into pciidlist under CHIP_RENOIR family. Signed-off-by: mengwang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e191383f34f78..7169fb5e3d9c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1086,6 +1086,7 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8a23636ecc27f..0b3516c4eefb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1239,7 +1239,8 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RENOIR: adev->asic_funcs = &soc15_asic_funcs; - if (adev->pdev->device == 0x1636) + if ((adev->pdev->device == 0x1636) || + (adev->pdev->device == 0x164c)) adev->apu_flags |= AMD_APU_IS_RENOIR; else adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; -- GitLab From 3c516e038f0cc3915825bdac619d448c2b1811f2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 14 Jan 2021 15:19:23 +0800 Subject: [PATCH 1776/1894] Documentation: ACPI: EINJ: Fix error type values for PCIe errors Fix the error type value for PCI Express uncorrectable non-fatal error to 0x00000080 and fix the error type value for PCI Express uncorrectable fatal error to 0x00000100. See Advanced Configuration and Power Interface Specification, version 6.2, table "18-409 Error Type Definition". Signed-off-by: Qiuxu Zhuo Reported-by: Lijian Zhao [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/apei/einj.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst index e588bccf51583..c042176e17078 100644 --- a/Documentation/firmware-guide/acpi/apei/einj.rst +++ b/Documentation/firmware-guide/acpi/apei/einj.rst @@ -50,8 +50,8 @@ The following files belong to it: 0x00000010 Memory Uncorrectable non-fatal 0x00000020 Memory Uncorrectable fatal 0x00000040 PCI Express Correctable - 0x00000080 PCI Express Uncorrectable fatal - 0x00000100 PCI Express Uncorrectable non-fatal + 0x00000080 PCI Express Uncorrectable non-fatal + 0x00000100 PCI Express Uncorrectable fatal 0x00000200 Platform Correctable 0x00000400 Platform Uncorrectable non-fatal 0x00000800 Platform Uncorrectable fatal -- GitLab From 7de843dbaaa68aa514090e6226ed7c6374fd7e49 Mon Sep 17 00:00:00 2001 From: Nicholas Miell Date: Sun, 10 Jan 2021 22:09:25 -0800 Subject: [PATCH 1777/1894] HID: logitech-hidpp: Add product ID for MX Ergo in Bluetooth mode The Logitech MX Ergo trackball supports HID++ 4.5 over Bluetooth. Add its product ID to the table so we can get battery monitoring support. (The hid-logitech-hidpp driver already recognizes it when connected via a Unifying Receiver.) [jkosina@suse.cz: fix whitespace damage] Signed-off-by: Nicholas Miell Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index f85781464807d..7eb9a6ddb46a6 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4053,6 +4053,8 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX Master mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { /* MX Master 3 mouse over Bluetooth */ -- GitLab From c87a95dc28b1431c7e77e2c0c983cf37698089d2 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 13 Jan 2021 19:17:17 +0000 Subject: [PATCH 1778/1894] dm crypt: defer decryption to a tasklet if interrupts disabled On some specific hardware on early boot we occasionally get: [ 1193.920255][ T0] BUG: sleeping function called from invalid context at mm/mempool.c:381 [ 1193.936616][ T0] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/69 [ 1193.953233][ T0] no locks held by swapper/69/0. [ 1193.965871][ T0] irq event stamp: 575062 [ 1193.977724][ T0] hardirqs last enabled at (575061): [] tick_nohz_idle_exit+0xe2/0x3e0 [ 1194.002762][ T0] hardirqs last disabled at (575062): [] flush_smp_call_function_from_idle+0x4f/0x80 [ 1194.029035][ T0] softirqs last enabled at (575050): [] asm_call_irq_on_stack+0x12/0x20 [ 1194.054227][ T0] softirqs last disabled at (575043): [] asm_call_irq_on_stack+0x12/0x20 [ 1194.079389][ T0] CPU: 69 PID: 0 Comm: swapper/69 Not tainted 5.10.6-cloudflare-kasan-2021.1.4-dev #1 [ 1194.104103][ T0] Hardware name: NULL R162-Z12-CD/MZ12-HD4-CD, BIOS R10 06/04/2020 [ 1194.119591][ T0] Call Trace: [ 1194.130233][ T0] dump_stack+0x9a/0xcc [ 1194.141617][ T0] ___might_sleep.cold+0x180/0x1b0 [ 1194.153825][ T0] mempool_alloc+0x16b/0x300 [ 1194.165313][ T0] ? remove_element+0x160/0x160 [ 1194.176961][ T0] ? blk_mq_end_request+0x4b/0x490 [ 1194.188778][ T0] crypt_convert+0x27f6/0x45f0 [dm_crypt] [ 1194.201024][ T0] ? rcu_read_lock_sched_held+0x3f/0x70 [ 1194.212906][ T0] ? module_assert_mutex_or_preempt+0x3e/0x70 [ 1194.225318][ T0] ? __module_address.part.0+0x1b/0x3a0 [ 1194.237212][ T0] ? is_kernel_percpu_address+0x5b/0x190 [ 1194.249238][ T0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [dm_crypt] [ 1194.261593][ T0] ? is_module_address+0x25/0x40 [ 1194.272905][ T0] ? static_obj+0x8a/0xc0 [ 1194.283582][ T0] ? lockdep_init_map_waits+0x26a/0x700 [ 1194.295570][ T0] ? __raw_spin_lock_init+0x39/0x110 [ 1194.307330][ T0] kcryptd_crypt_read_convert+0x31c/0x560 [dm_crypt] [ 1194.320496][ T0] ? kcryptd_queue_crypt+0x1be/0x380 [dm_crypt] [ 1194.333203][ T0] blk_update_request+0x6d7/0x1500 [ 1194.344841][ T0] ? blk_mq_trigger_softirq+0x190/0x190 [ 1194.356831][ T0] blk_mq_end_request+0x4b/0x490 [ 1194.367994][ T0] ? blk_mq_trigger_softirq+0x190/0x190 [ 1194.379693][ T0] flush_smp_call_function_queue+0x24b/0x560 [ 1194.391847][ T0] flush_smp_call_function_from_idle+0x59/0x80 [ 1194.403969][ T0] do_idle+0x287/0x450 [ 1194.413891][ T0] ? arch_cpu_idle_exit+0x40/0x40 [ 1194.424716][ T0] ? lockdep_hardirqs_on_prepare+0x286/0x3f0 [ 1194.436399][ T0] ? _raw_spin_unlock_irqrestore+0x39/0x40 [ 1194.447759][ T0] cpu_startup_entry+0x19/0x20 [ 1194.458038][ T0] secondary_startup_64_no_verify+0xb0/0xbb IO completion can be queued to a different CPU by the block subsystem as a "call single function/data". The CPU may run these routines from the idle task, but it does so with interrupts disabled. It is not a good idea to do decryption with irqs disabled even in an idle task context, so just defer it to a tasklet (as is done with requests from hard irqs). Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b4d7418b50368..8c874710f0bcf 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2221,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { - if (in_irq()) { - /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */ + /* + * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. + * irqs_disabled(): the kernel may run some IO completion from the idle thread, but + * it is being executed with irqs disabled. + */ + if (in_irq() || irqs_disabled()) { tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; -- GitLab From 55ed4560774d81d7343223b8fd2784c530a9c6c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 9 Dec 2020 14:27:44 +0900 Subject: [PATCH 1779/1894] tools/bootconfig: Add tracing_on support to helper scripts Add ftrace.instance.INSTANCE.tracing_on support to ftrace2bconf.sh and bconf2ftrace.sh. commit 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") added the per-instance tracing_on option, but forgot to update the helper scripts. Link: https://lkml.kernel.org/r/160749166410.3497930.14204335886811029800.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/scripts/bconf2ftrace.sh | 1 + tools/bootconfig/scripts/ftrace2bconf.sh | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh index 595e164dc352f..feb30c2c78815 100755 --- a/tools/bootconfig/scripts/bconf2ftrace.sh +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -152,6 +152,7 @@ setup_instance() { # [instance] set_array_of ${instance}.options ${instancedir}/trace_options set_value_of ${instance}.trace_clock ${instancedir}/trace_clock set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask + set_value_of ${instance}.tracing_on ${instancedir}/tracing_on set_value_of ${instance}.tracer ${instancedir}/current_tracer set_array_of ${instance}.ftrace.filters \ ${instancedir}/set_ftrace_filter diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh index 6c0d4b61e0c26..a0c3bcc6da4f3 100755 --- a/tools/bootconfig/scripts/ftrace2bconf.sh +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -221,6 +221,10 @@ instance_options() { # [instance-name] if [ `echo $val | sed -e s/f//g`x != x ]; then emit_kv $PREFIX.cpumask = $val fi + val=`cat $INSTANCE/tracing_on` + if [ `echo $val | sed -e s/f//g`x != x ]; then + emit_kv $PREFIX.tracing_on = $val + fi val= for i in `cat $INSTANCE/set_event`; do -- GitLab From b79f2dc5ffe17b03ec8c55f0d63f65e87bcac676 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Wed, 13 Jan 2021 14:16:59 +0200 Subject: [PATCH 1780/1894] RDMA/umem: Avoid undefined behavior of rounddown_pow_of_two() rounddown_pow_of_two() is undefined when the input is 0. Therefore we need to avoid it in ib_umem_find_best_pgsz and return 0. Otherwise, it could result in not rejecting an invalid page size which eventually causes a kernel oops due to the logical inconsistency. Fixes: 3361c29e9279 ("RDMA/umem: Use simpler logic for ib_umem_find_best_pgsz()") Link: https://lore.kernel.org/r/20210113121703.559778-2-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Jason Gunthorpe Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 7ca4112e3e8f7..917338db7ac13 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -135,7 +135,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, */ if (mask) pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); - return rounddown_pow_of_two(pgsz_bitmap); + return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0; } EXPORT_SYMBOL(ib_umem_find_best_pgsz); -- GitLab From 2cb091f6293df898b47f4e0f2e54324e2bbaf816 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jan 2021 14:17:00 +0200 Subject: [PATCH 1781/1894] IB/mlx5: Fix error unwinding when set_has_smi_cap fails When set_has_smi_cap() fails, multiport master cleanup is missed. Fix it by doing the correct error unwinding goto. Fixes: a989ea01cb10 ("RDMA/mlx5: Move SMI caps logic") Link: https://lore.kernel.org/r/20210113121703.559778-3-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3bae9ba0ead85..fbe3b75f866b1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3956,7 +3956,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err = set_has_smi_cap(dev); if (err) - return err; + goto err_mp; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { -- GitLab From 1c3aa6bd0b823105c2030af85d92d158e815d669 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 13 Jan 2021 14:17:03 +0200 Subject: [PATCH 1782/1894] RDMA/mlx5: Fix wrong free of blue flame register on error If the allocation of the fast path blue flame register fails, the driver should free the regular blue flame register allocated a statement above, not the one that it just failed to allocate. Fixes: 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages") Link: https://lore.kernel.org/r/20210113121703.559778-6-leon@kernel.org Reported-by: Hans Petter Selasky Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fbe3b75f866b1..d26f3f3e0462a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4319,7 +4319,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); if (err) - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); + mlx5_free_bfreg(dev->mdev, &dev->bfreg); return err; } -- GitLab From 7c7b3e5d9aeed31d35c5dab0bf9c0fd4c8923206 Mon Sep 17 00:00:00 2001 From: Neta Ostrovsky Date: Wed, 13 Jan 2021 15:02:14 +0200 Subject: [PATCH 1783/1894] RDMA/cma: Fix error flow in default_roce_mode_store In default_roce_mode_store(), we took a reference to cma_dev, but didn't return it with cma_dev_put in the error flow. Fixes: 1c15b4f2a42f ("RDMA/core: Modify enum ib_gid_type and enum rdma_network_type") Link: https://lore.kernel.org/r/20210113130214.562108-1-leon@kernel.org Signed-off-by: Neta Ostrovsky Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma_configfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7f70e5a7de105..97a77ea8d3c9c 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item, return ret; gid_type = ib_cache_gid_parse_type_str(buf); - if (gid_type < 0) + if (gid_type < 0) { + cma_configfs_params_put(cma_dev); return -EINVAL; + } ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); -- GitLab From 47e4bb147a96f1c9b4e7691e7e994e53838bfff8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:29:47 -0800 Subject: [PATCH 1784/1894] net: sit: unregister_netdevice on newlink's error path We need to unregister the netdevice if config failed. .ndo_uninit takes care of most of the heavy lifting. This was uncovered by recent commit c269a24ce057 ("net: make free_netdev() more lenient with unregistering devices"). Previously the partially-initialized device would be left in the system. Reported-and-tested-by: syzbot+2393580080a2da190f04@syzkaller.appspotmail.com Fixes: e2f1f072db8d ("sit: allow to configure 6rd tunnels via netlink") Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20210114012947.2515313-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2da0ee7037795..93636867aee28 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1645,8 +1645,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, } #ifdef CONFIG_IPV6_SIT_6RD - if (ipip6_netlink_6rd_parms(data, &ip6rd)) + if (ipip6_netlink_6rd_parms(data, &ip6rd)) { err = ipip6_tunnel_update_6rd(nt, &ip6rd); + if (err < 0) + unregister_netdevice_queue(dev, NULL); + } #endif return err; -- GitLab From 25764779298f23a659f3daf39f9e2b5975a7a89d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:48 +0100 Subject: [PATCH 1785/1894] net: tip: fix a couple kernel-doc markups A function has a different name between their prototype and its kernel-doc markup: ../net/tipc/link.c:2551: warning: expecting prototype for link_reset_stats(). Prototype was for tipc_link_reset_stats() instead ../net/tipc/node.c:1678: warning: expecting prototype for is the general link level function for message sending(). Prototype was for tipc_node_xmit() instead Signed-off-by: Mauro Carvalho Chehab Acked-by: Jon Maloy Signed-off-by: Jakub Kicinski --- net/tipc/link.c | 2 +- net/tipc/node.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index a6a694b789274..1151092594302 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2544,7 +2544,7 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) } /** - * link_reset_stats - reset link statistics + * tipc_link_reset_stats - reset link statistics * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) diff --git a/net/tipc/node.c b/net/tipc/node.c index 83d9eb8305928..008670d1f43e1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1665,7 +1665,7 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) } /** - * tipc_node_xmit() is the general link level function for message sending + * tipc_node_xmit() - general link level function for message sending * @net: the applicable net namespace * @list: chain of buffers containing message * @dnode: address of destination node -- GitLab From b76889ff51bfee318bea15891420e5aefd2833a0 Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Wed, 13 Jan 2021 14:15:56 +0100 Subject: [PATCH 1786/1894] net: stmmac: fix taprio schedule configuration When configuring a 802.1Qbv schedule through the tc taprio qdisc on an NXP i.MX8MPlus device, the effective cycle time differed from the requested one by N*96ns, with N number of entries in the Qbv Gate Control List. This is because the driver was adding a 96ns margin to each interval of the GCL, apparently to account for the IPG. The problem was observed on NXP i.MX8MPlus devices but likely affected all devices relying on the same configuration callback (dwmac 4.00, 4.10, 5.10 variants). Fix the issue by removing the margins, and simply setup the MAC with the provided cycle time value. This is the behavior expected by the user-space API, as altering the Qbv schedule timings would break standards conformance. This is also the behavior of several other Ethernet MAC implementations supporting taprio, including the dwxgmac variant of stmmac. Fixes: 504723af0d85 ("net: stmmac: Add basic EST support for GMAC5+") Signed-off-by: Yannick Vignon Link: https://lore.kernel.org/r/20210113131557.24651-1-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 52 ++------------------ 1 file changed, 4 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 03e79a677c8bd..8f7ac24545efe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -568,68 +568,24 @@ static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate) { - u32 speed, total_offset, offset, ctrl, ctr_low; - u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG); - u32 mac_cfg = readl(ioaddr + GMAC_CONFIG); int i, ret = 0x0; - u64 total_ctr; - - if (extcfg & GMAC_CONFIG_EIPG_EN) { - offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT; - offset = 104 + (offset * 8); - } else { - offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT; - offset = 96 - (offset * 8); - } - - speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES); - speed = speed >> GMAC_CONFIG_FES_SHIFT; - - switch (speed) { - case 0x0: - offset = offset * 1000; /* 1G */ - break; - case 0x1: - offset = offset * 400; /* 2.5G */ - break; - case 0x2: - offset = offset * 100000; /* 10M */ - break; - case 0x3: - offset = offset * 10000; /* 100M */ - break; - default: - return -EINVAL; - } - - offset = offset / 1000; + u32 ctrl; ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false); ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false); ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false); ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false); + ret |= dwmac5_est_write(ioaddr, CTR_LOW, cfg->ctr[0], false); + ret |= dwmac5_est_write(ioaddr, CTR_HIGH, cfg->ctr[1], false); if (ret) return ret; - total_offset = 0; for (i = 0; i < cfg->gcl_size; i++) { - ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true); + ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i], true); if (ret) return ret; - - total_offset += offset; } - total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000ULL; - total_ctr += total_offset; - - ctr_low = do_div(total_ctr, 1000000000); - - ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false); - ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false); - if (ret) - return ret; - ctrl = readl(ioaddr + MTL_EST_CONTROL); ctrl &= ~PTOV; ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT; -- GitLab From fe28c53ed71d463e187748b6b10e1130dd72ceeb Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Wed, 13 Jan 2021 14:15:57 +0100 Subject: [PATCH 1787/1894] net: stmmac: fix taprio configuration when base_time is in the past The Synopsys TSN MAC supports Qbv base times in the past, but only up to a certain limit. As a result, a taprio qdisc configuration with a small base time (for example when treating the base time as a simple phase offset) is not applied by the hardware and silently ignored. This was observed on an NXP i.MX8MPlus device, but likely affects all TSN-variants of the MAC. Fix the issue by making sure the base time is in the future, pushing it by an integer amount of cycle times if needed. (a similar check is already done in several other taprio implementations, see for example drivers/net/ethernet/intel/igc/igc_tsn.c#L116 or drivers/net/dsa/sja1105/sja1105_ptp.h#L39). Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Signed-off-by: Yannick Vignon Link: https://lore.kernel.org/r/20210113131557.24651-2-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index f5bed4d26e804..8ed3b2c834a09 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -599,7 +599,8 @@ static int tc_setup_taprio(struct stmmac_priv *priv, { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; struct plat_stmmacenet_data *plat = priv->plat; - struct timespec64 time; + struct timespec64 time, current_time; + ktime_t current_time_ns; bool fpe = false; int i, ret = 0; u64 ctr; @@ -694,7 +695,22 @@ static int tc_setup_taprio(struct stmmac_priv *priv, } /* Adjust for real system time */ - time = ktime_to_timespec64(qopt->base_time); + priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); + current_time_ns = timespec64_to_ktime(current_time); + if (ktime_after(qopt->base_time, current_time_ns)) { + time = ktime_to_timespec64(qopt->base_time); + } else { + ktime_t base_time; + s64 n; + + n = div64_s64(ktime_sub_ns(current_time_ns, qopt->base_time), + qopt->cycle_time); + base_time = ktime_add_ns(qopt->base_time, + (n + 1) * qopt->cycle_time); + + time = ktime_to_timespec64(base_time); + } + priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; -- GitLab From 7da17624e7948d5d9660b910f8079d26d26ce453 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Jan 2021 15:43:09 +0100 Subject: [PATCH 1788/1894] nt: usb: USB_RTL8153_ECM should not default to y In general, device drivers should not be enabled by default. Fixes: 657bc1d10bfc23ac ("r8153_ecm: avoid to be prior to r8152 driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210113144309.1384615-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/usb/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 1e37190287808..fbbe786436319 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -631,7 +631,6 @@ config USB_NET_AQC111 config USB_RTL8153_ECM tristate "RTL8153 ECM support" depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n) - default y help This option supports ECM mode for RTL8153 ethernet adapter, when CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not -- GitLab From 3226b158e67cfaa677fd180152bfb28989cb2fac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jan 2021 08:18:19 -0800 Subject: [PATCH 1789/1894] net: avoid 32 x truesize under-estimation for tiny skbs Both virtio net and napi_get_frags() allocate skbs with a very small skb->head While using page fragments instead of a kmalloc backed skb->head might give a small performance improvement in some cases, there is a huge risk of under estimating memory usage. For both GOOD_COPY_LEN and GRO_MAX_HEAD, we can fit at least 32 allocations per page (order-3 page in x86), or even 64 on PowerPC We have been tracking OOM issues on GKE hosts hitting tcp_mem limits but consuming far more memory for TCP buffers than instructed in tcp_mem[2] Even if we force napi_alloc_skb() to only use order-0 pages, the issue would still be there on arches with PAGE_SIZE >= 32768 This patch makes sure that small skb head are kmalloc backed, so that other objects in the slab page can be reused instead of being held as long as skbs are sitting in socket queues. Note that we might in the future use the sk_buff napi cache, instead of going through a more expensive __alloc_skb() Another idea would be to use separate page sizes depending on the allocated length (to never have more than 4 frags per page) I would like to thank Greg Thelen for his precious help on this matter, analysing crash dumps is always a time consuming task. Fixes: fd11a83dd363 ("net: Pull out core bits of __netdev_alloc_skb and add __napi_alloc_skb") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Cc: Greg Thelen Reviewed-by: Alexander Duyck Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20210113161819.1155526-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0da035c1e53f1..c1a6f262636a1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -501,13 +501,17 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct napi_alloc_cache *nc; struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; - if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ + if (len <= SKB_WITH_OVERHEAD(1024) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) @@ -515,6 +519,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, goto skb_success; } + nc = this_cpu_ptr(&napi_alloc_cache); len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); -- GitLab From 93089de91e85743942a5f804850d4f0846e5402b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:06 -0800 Subject: [PATCH 1790/1894] MAINTAINERS: altx: move Jay Cliburn to CREDITS Jay was not active in recent years and does not have plans to return to work on ATLX drivers. Subsystem ATLX ETHERNET DRIVERS Changes 20 / 116 (17%) Last activity: 2020-02-24 Jay Cliburn : Chris Snook : Tags ea973742140b 2020-02-24 00:00:00 1 Top reviewers: [4]: andrew@lunn.ch [2]: kuba@kernel.org [2]: o.rempel@pengutronix.de INACTIVE MAINTAINER Jay Cliburn Acked-by: Chris Snook Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 090ed4b004a5b..59a704a45170d 100644 --- a/CREDITS +++ b/CREDITS @@ -710,6 +710,10 @@ S: Las Cuevas 2385 - Bo Guemes S: Las Heras, Mendoza CP 5539 S: Argentina +N: Jay Cliburn +E: jcliburn@gmail.com +D: ATLX Ethernet drivers + N: Steven P. Cole E: scole@lanl.gov E: elenstev@mesatop.com diff --git a/MAINTAINERS b/MAINTAINERS index b15514a770e3c..57e17762d4110 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2942,7 +2942,6 @@ S: Maintained F: drivers/hwmon/asus_atk0110.c ATLX ETHERNET DRIVERS -M: Jay Cliburn M: Chris Snook L: netdev@vger.kernel.org S: Maintained -- GitLab From 09cd3f4683a901d572ad17f0564cc9e3e989f0f4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:07 -0800 Subject: [PATCH 1791/1894] MAINTAINERS: net: move Alexey Kuznetsov to CREDITS Move Alexey to CREDITS. I am probably not giving him enough justice with the description line.. Subsystem NETWORKING [IPv4/IPv6] Changes 1535 / 5111 (30%) Last activity: 2020-12-10 "David S. Miller" : Author b7e4ba9a91df 2020-12-09 00:00:00 407 Committer e0fecb289ad3 2020-12-10 00:00:00 3992 Tags e0fecb289ad3 2020-12-10 00:00:00 3978 Alexey Kuznetsov : Hideaki YOSHIFUJI : Tags d5d8760b78d0 2016-06-16 00:00:00 8 Top reviewers: [225]: edumazet@google.com [222]: dsahern@gmail.com [176]: ncardwell@google.com INACTIVE MAINTAINER Alexey Kuznetsov Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 59a704a45170d..3dceea7376947 100644 --- a/CREDITS +++ b/CREDITS @@ -2142,6 +2142,10 @@ E: seasons@falcon.sch.bme.hu E: seasons@makosteszta.sote.hu D: Original author of software suspend +N: Alexey Kuznetsov +E: kuznet@ms2.inr.ac.ru +D: Author and maintainer of large parts of the networking stack + N: Jaroslav Kysela E: perex@perex.cz W: https://www.perex.cz diff --git a/MAINTAINERS b/MAINTAINERS index 57e17762d4110..c6e7f6bf7f6db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12417,7 +12417,6 @@ F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] M: "David S. Miller" -M: Alexey Kuznetsov M: Hideaki YOSHIFUJI L: netdev@vger.kernel.org S: Maintained -- GitLab From 5e62d124f75aae0e96fd8a588ad31659a2468710 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:08 -0800 Subject: [PATCH 1792/1894] MAINTAINERS: vrf: move Shrijeet to CREDITS Shrijeet has moved on from VRF-related work. Subsystem VRF Changes 30 / 120 (25%) Last activity: 2020-12-09 David Ahern : Author 1b6687e31a2d 2020-07-23 00:00:00 1 Tags 9125abe7b9cb 2020-12-09 00:00:00 4 Shrijeet Mukherjee : Top reviewers: [13]: dsahern@gmail.com [4]: dsa@cumulusnetworks.com INACTIVE MAINTAINER Shrijeet Mukherjee Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 3dceea7376947..98e7485ec1067 100644 --- a/CREDITS +++ b/CREDITS @@ -2704,6 +2704,10 @@ N: Wolfgang Muees E: wolfgang@iksw-muees.de D: Auerswald USB driver +N: Shrijeet Mukherjee +E: shrijeet@gmail.com +D: Network routing domains (VRF). + N: Paul Mundt E: paul.mundt@gmail.com D: SuperH maintainer diff --git a/MAINTAINERS b/MAINTAINERS index c6e7f6bf7f6db..a06faf9e20184 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19056,7 +19056,6 @@ K: regulator_get_optional VRF M: David Ahern -M: Shrijeet Mukherjee L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/vrf.rst -- GitLab From c41efbf2ad56280762d19a531eb7edbf2e6a9f84 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:09 -0800 Subject: [PATCH 1793/1894] MAINTAINERS: ena: remove Zorik Machulsky from reviewers While ENA has 3 reviewers and 2 maintainers, we mostly see review tags and comments from the maintainers. While we very much appreciate Zorik's invovment in the community let's trim the reviewer list down to folks we've seen tags from. Subsystem AMAZON ETHERNET DRIVERS Changes 13 / 269 (4%) Last activity: 2020-11-24 Netanel Belgazal : Author 24dee0c7478d 2019-12-10 00:00:00 43 Tags 0e3a3f6dacf0 2020-07-21 00:00:00 47 Arthur Kiyanovski : Author 0e3a3f6dacf0 2020-07-21 00:00:00 79 Tags 09323b3bca95 2020-11-24 00:00:00 104 Guy Tzalik : Tags 713865da3c62 2020-09-10 00:00:00 3 Saeed Bishara : Tags 470793a78ce3 2020-02-11 00:00:00 2 Zorik Machulsky : Top reviewers: [4]: sameehj@amazon.com [3]: snelson@pensando.io [3]: shayagr@amazon.com INACTIVE MAINTAINER Zorik Machulsky Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a06faf9e20184..64dd19dfc9c3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -820,7 +820,6 @@ M: Netanel Belgazal M: Arthur Kiyanovski R: Guy Tzalik R: Saeed Bishara -R: Zorik Machulsky L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst -- GitLab From 0e4ed0b62b5a1f60b72ab7aaa29efd735d4cb6a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:10 -0800 Subject: [PATCH 1794/1894] MAINTAINERS: tls: move Aviad to CREDITS Aviad wrote parts of the initial TLS implementation but hasn't been contributing to TLS since. Subsystem NETWORKING [TLS] Changes 123 / 308 (39%) Last activity: 2020-12-01 Boris Pismenny : Tags 138559b9f99d 2020-11-17 00:00:00 1 Aviad Yehezkel : John Fastabend : Author e91de6afa81c 2020-06-01 00:00:00 22 Tags e91de6afa81c 2020-06-01 00:00:00 29 Daniel Borkmann : Author c16ee04c9b30 2018-10-20 00:00:00 7 Committer b8e202d1d1d0 2020-02-21 00:00:00 19 Tags b8e202d1d1d0 2020-02-21 00:00:00 28 Jakub Kicinski : Author 5c39f26e67c9 2020-11-27 00:00:00 89 Committer d31c08007523 2020-12-01 00:00:00 15 Tags d31c08007523 2020-12-01 00:00:00 117 Top reviewers: [50]: dirk.vandermerwe@netronome.com [26]: simon.horman@netronome.com [14]: john.hurley@netronome.com INACTIVE MAINTAINER Aviad Yehezkel Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 98e7485ec1067..90384691876c5 100644 --- a/CREDITS +++ b/CREDITS @@ -4122,6 +4122,10 @@ S: B-1206 Jingmao Guojigongyu S: 16 Baliqiao Nanjie, Beijing 101100 S: People's Repulic of China +N: Aviad Yehezkel +E: aviadye@nvidia.com +D: Kernel TLS implementation and offload support. + N: Victor Yodaiken E: yodaiken@fsmlabs.com D: RTLinux (RealTime Linux) diff --git a/MAINTAINERS b/MAINTAINERS index 64dd19dfc9c3b..92fdc134ca14f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12472,7 +12472,6 @@ F: net/ipv6/tcp*.c NETWORKING [TLS] M: Boris Pismenny -M: Aviad Yehezkel M: John Fastabend M: Daniel Borkmann M: Jakub Kicinski -- GitLab From 4f3786e011940d83d7a9c365730936db96a0b233 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:11 -0800 Subject: [PATCH 1795/1894] MAINTAINERS: ipvs: move Wensong Zhang to CREDITS Move Wensong Zhang to credits, we haven't heard from him in years. Subsystem IPVS Changes 83 / 226 (36%) Last activity: 2020-11-27 Wensong Zhang : Simon Horman : Committer c24b75e0f923 2019-10-24 00:00:00 33 Tags 7980d2eabde8 2020-10-12 00:00:00 76 Julian Anastasov : Author 7980d2eabde8 2020-10-12 00:00:00 26 Tags 4bc3c8dc9f5f 2020-11-27 00:00:00 78 Top reviewers: [6]: horms+renesas@verge.net.au INACTIVE MAINTAINER Wensong Zhang Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 90384691876c5..ce8eae8c5aa48 100644 --- a/CREDITS +++ b/CREDITS @@ -4183,6 +4183,10 @@ S: 1507 145th Place SE #B5 S: Bellevue, Washington 98007 S: USA +N: Wensong Zhang +E: wensong@linux-vs.org +D: IP virtual server (IPVS). + N: Haojian Zhuang E: haojian.zhuang@gmail.com D: MMP support diff --git a/MAINTAINERS b/MAINTAINERS index 92fdc134ca14f..18e75e29c6729 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9325,7 +9325,6 @@ W: http://www.adaptec.com/ F: drivers/scsi/ips* IPVS -M: Wensong Zhang M: Simon Horman M: Julian Anastasov L: netdev@vger.kernel.org -- GitLab From 054c4610bd05e7bf677efefa880da2da340599fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:12 -0800 Subject: [PATCH 1796/1894] MAINTAINERS: dccp: move Gerrit Renker to CREDITS As far as I can tell we haven't heard from Gerrit for roughly 5 years now. DCCP patch would really benefit from some review. Gerrit was the last maintainer so mark this entry as orphaned. Subsystem DCCP PROTOCOL Changes 38 / 166 (22%) (No activity) Top reviewers: [6]: kstewart@linuxfoundation.org [6]: allison@lohutok.net [5]: edumazet@google.com INACTIVE MAINTAINER Gerrit Renker Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index ce8eae8c5aa48..9add7e6a4fa02 100644 --- a/CREDITS +++ b/CREDITS @@ -1288,6 +1288,10 @@ D: Major kbuild rework during the 2.5 cycle D: ISDN Maintainer S: USA +N: Gerrit Renker +E: gerrit@erg.abdn.ac.uk +D: DCCP protocol support. + N: Philip Gladstone E: philip@gladstonefamily.net D: Kernel / timekeeping stuff diff --git a/MAINTAINERS b/MAINTAINERS index 18e75e29c6729..2a6dc5bfa08c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4920,9 +4920,8 @@ F: Documentation/scsi/dc395x.rst F: drivers/scsi/dc395x.* DCCP PROTOCOL -M: Gerrit Renker L: dccp@vger.kernel.org -S: Maintained +S: Orphan W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp F: include/linux/dccp.h F: include/linux/tfrc.h -- GitLab From 25537d71e2d007faf42a244a75e5a2bb7c356234 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 14 Jan 2021 17:12:15 +0200 Subject: [PATCH 1797/1894] net: Allow NETIF_F_HW_TLS_TX if IP_CSUM && IPV6_CSUM Cited patch below blocked the TLS TX device offload unless HW_CSUM is set. This broke devices that use IP_CSUM && IP6_CSUM. Here we fix it. Note that the single HW_TLS_TX feature flag indicates support for both IPv4/6, hence it should still be disabled in case only one of (IP_CSUM | IPV6_CSUM) is set. Fixes: ae0b04b238e2 ("net: Disable NETIF_F_HW_TLS_TX when HW_CSUM is disabled") Signed-off-by: Tariq Toukan Reported-by: Rohit Maheshwari Reviewed-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20210114151215.7061-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/tls-offload.rst | 2 +- net/core/dev.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 0f55c6d540f97..9af3334d9ad08 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -530,7 +530,7 @@ TLS device feature flags only control adding of new TLS connection offloads, old connections will remain active after flags are cleared. TLS encryption cannot be offloaded to devices without checksum calculation -offload. Hence, TLS TX device feature flag requires NETIF_F_HW_CSUM being set. +offload. Hence, TLS TX device feature flag requires TX csum offload being set. Disabling the latter implies clearing the former. Disabling TX checksum offload should not affect old connections, and drivers should make sure checksum calculation does not break for them. diff --git a/net/core/dev.c b/net/core/dev.c index 0071a11a6dc3c..c360bb5367e24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9661,9 +9661,15 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } - if ((features & NETIF_F_HW_TLS_TX) && !(features & NETIF_F_HW_CSUM)) { - netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); - features &= ~NETIF_F_HW_TLS_TX; + if (features & NETIF_F_HW_TLS_TX) { + bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + bool hw_csum = features & NETIF_F_HW_CSUM; + + if (!ip_csum && !hw_csum) { + netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_TX; + } } return features; -- GitLab From 4369376ba91c97a1b2dd74abeec18c0c0ddf4ac9 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 8 Jan 2021 16:34:31 +0800 Subject: [PATCH 1798/1894] drm/amdgpu: set power brake sequence Add function to set power brake sequence. Signed-off-by: Likun Gao Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 34141b785aa4c..619d34c041ee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -164,6 +164,9 @@ #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid 0x15db #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030 +#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -3328,6 +3331,7 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev); static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev); +static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -7196,6 +7200,9 @@ static int gfx_v10_0_hw_init(void *handle) if (adev->asic_type == CHIP_SIENNA_CICHLID) gfx_v10_3_program_pbb_mode(adev); + if (adev->asic_type >= CHIP_SIENNA_CICHLID) + gfx_v10_3_set_power_brake_sequence(adev); + return r; } @@ -9181,6 +9188,31 @@ static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev) } } +static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, + (0x1 << GRBM_GFX_INDEX__SA_BROADCAST_WRITES__SHIFT) | + (0x1 << GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT) | + (0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + + WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, + (0x1 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_STEP_INTERVAL__SHIFT) | + (0x12 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_BEGIN_STEP__SHIFT) | + (0x13 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_END_STEP__SHIFT) | + (0xf << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_THROTTLE_PATTERN_BIT_NUMS__SHIFT)); + + WREG32_SOC15(GC, 0, mmGC_THROTTLE_CTRL_Sienna_Cichlid, + (0x1 << GC_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT) | + (0x1 << GC_THROTTLE_CTRL__PATTERN_MODE__SHIFT) | + (0x5 << GC_THROTTLE_CTRL__RELEASE_STEP_INTERVAL__SHIFT)); + + WREG32_SOC15(GC, 0, mmDIDT_IND_INDEX, ixDIDT_SQ_THROTTLE_CTRL); + + WREG32_SOC15(GC, 0, mmDIDT_IND_DATA, + (0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT)); +} + const struct amdgpu_ip_block_version gfx_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, -- GitLab From 55df908bd663ead7d85bd64dd49562d5ac3889ef Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 14:50:51 -0500 Subject: [PATCH 1799/1894] Revert "drm/amd/display: Fix unused variable warning" This reverts commit f01afd1ee48816457fb22e201f1d0cfb14589904. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 146486071d01d..8fac6116591bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8378,7 +8378,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); } - if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active && +#ifdef CONFIG_DEBUG_FS + if (new_crtc_state->active && amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture @@ -8399,6 +8400,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_crtc_configure_crc_source( crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); } +#endif } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index eba2f1d35d075..0235bfb246e5d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source } /* amdgpu_dm_crc.c */ +#ifdef CONFIG_DEBUG_FS bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); -#ifdef CONFIG_DEBUG_FS int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, -- GitLab From 3c517ca5212faab4604e1725b4d31e290945ff87 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 14:51:49 -0500 Subject: [PATCH 1800/1894] Revert "drm/amdgpu/disply: fix documentation warnings in display manager" This reverts commit 6ae09fa49147e557eb6aebbb5b2059b63706d454. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2ee6edb3df931..0b31779a04856 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -337,29 +337,10 @@ struct amdgpu_display_manager { const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; #ifdef CONFIG_DEBUG_FS - /** - * @crc_win_x_start_property: - * - * X start of the crc calculation window - */ + /* set the crc calculation window*/ struct drm_property *crc_win_x_start_property; - /** - * @crc_win_y_start_property: - * - * Y start of the crc calculation window - */ struct drm_property *crc_win_y_start_property; - /** - * @crc_win_x_end_property: - * - * X end of the crc calculation window - */ struct drm_property *crc_win_x_end_property; - /** - * @crc_win_y_end_property: - * - * Y end of the crc calculation window - */ struct drm_property *crc_win_y_end_property; #endif /** -- GitLab From a7ddd22151fc2910c7b2faad64680cc2bb699b03 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 15:09:30 -0500 Subject: [PATCH 1801/1894] Revert "drm/amd/display: Expose new CRC window property" This reverts commit c920888c604d72799d057bbcd9e28a6c003ccfbe. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Reviewed-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 142 +----------------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 56 +------ .../drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 3 - 4 files changed, 10 insertions(+), 210 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8fac6116591bf..c6da89df055de 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -939,41 +939,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } #endif -#ifdef CONFIG_DEBUG_FS -static int create_crtc_crc_properties(struct amdgpu_display_manager *dm) -{ - dm->crc_win_x_start_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_X_START", 0, U16_MAX); - if (!dm->crc_win_x_start_property) - return -ENOMEM; - - dm->crc_win_y_start_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_Y_START", 0, U16_MAX); - if (!dm->crc_win_y_start_property) - return -ENOMEM; - - dm->crc_win_x_end_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_X_END", 0, U16_MAX); - if (!dm->crc_win_x_end_property) - return -ENOMEM; - - dm->crc_win_y_end_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_Y_END", 0, U16_MAX); - if (!dm->crc_win_y_end_property) - return -ENOMEM; - - return 0; -} -#endif - static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1120,10 +1085,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_init_callbacks(adev->dm.dc, &init_params); } -#endif -#ifdef CONFIG_DEBUG_FS - if (create_crtc_crc_properties(&adev->dm)) - DRM_ERROR("amdgpu: failed to create crc property.\n"); #endif if (amdgpu_dm_initialize_drm_device(adev)) { DRM_ERROR( @@ -5333,64 +5294,12 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) state->crc_src = cur->crc_src; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; -#ifdef CONFIG_DEBUG_FS - state->crc_window = cur->crc_window; -#endif + /* TODO Duplicate dc_stream after objects are stream object is flattened */ return &state->base; } -#ifdef CONFIG_DEBUG_FS -static int amdgpu_dm_crtc_atomic_set_property(struct drm_crtc *crtc, - struct drm_crtc_state *crtc_state, - struct drm_property *property, - uint64_t val) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct dm_crtc_state *dm_new_state = - to_dm_crtc_state(crtc_state); - - if (property == adev->dm.crc_win_x_start_property) - dm_new_state->crc_window.x_start = val; - else if (property == adev->dm.crc_win_y_start_property) - dm_new_state->crc_window.y_start = val; - else if (property == adev->dm.crc_win_x_end_property) - dm_new_state->crc_window.x_end = val; - else if (property == adev->dm.crc_win_y_end_property) - dm_new_state->crc_window.y_end = val; - else - return -EINVAL; - - return 0; -} - -static int amdgpu_dm_crtc_atomic_get_property(struct drm_crtc *crtc, - const struct drm_crtc_state *state, - struct drm_property *property, - uint64_t *val) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct dm_crtc_state *dm_state = - to_dm_crtc_state(state); - - if (property == adev->dm.crc_win_x_start_property) - *val = dm_state->crc_window.x_start; - else if (property == adev->dm.crc_win_y_start_property) - *val = dm_state->crc_window.y_start; - else if (property == adev->dm.crc_win_x_end_property) - *val = dm_state->crc_window.x_end; - else if (property == adev->dm.crc_win_y_end_property) - *val = dm_state->crc_window.y_end; - else - return -EINVAL; - - return 0; -} -#endif - static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) { enum dc_irq_source irq_source; @@ -5457,10 +5366,6 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .enable_vblank = dm_enable_vblank, .disable_vblank = dm_disable_vblank, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -#ifdef CONFIG_DEBUG_FS - .atomic_set_property = amdgpu_dm_crtc_atomic_set_property, - .atomic_get_property = amdgpu_dm_crtc_atomic_get_property, -#endif }; static enum drm_connector_status @@ -6662,25 +6567,6 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, return 0; } -#ifdef CONFIG_DEBUG_FS -static void attach_crtc_crc_properties(struct amdgpu_display_manager *dm, - struct amdgpu_crtc *acrtc) -{ - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_x_start_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_y_start_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_x_end_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_y_end_property, - 0); -} -#endif - static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t crtc_index) @@ -6728,9 +6614,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, true, MAX_COLOR_LUT_ENTRIES); drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); -#ifdef CONFIG_DEBUG_FS - attach_crtc_crc_properties(dm, acrtc); -#endif + return 0; fail: @@ -8367,7 +8251,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - bool configure_crc = false; dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); @@ -8377,30 +8260,21 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dc_stream_retain(dm_new_crtc_state->stream); acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); - } + #ifdef CONFIG_DEBUG_FS - if (new_crtc_state->active && - amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture * settings for the stream. */ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - - if (amdgpu_dm_crc_window_is_default(dm_new_crtc_state)) { - if (!old_crtc_state->active || drm_atomic_crtc_needs_modeset(new_crtc_state)) - configure_crc = true; - } else { - if (amdgpu_dm_crc_window_changed(dm_new_crtc_state, dm_old_crtc_state)) - configure_crc = true; - } - if (configure_crc) + if (amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { amdgpu_dm_crtc_configure_crc_source( - crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); - } + crtc, dm_new_crtc_state, + dm_new_crtc_state->crc_src); + } #endif + } } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 0b31779a04856..1182dafcef022 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -336,13 +336,6 @@ struct amdgpu_display_manager { */ const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; -#ifdef CONFIG_DEBUG_FS - /* set the crc calculation window*/ - struct drm_property *crc_win_x_start_property; - struct drm_property *crc_win_y_start_property; - struct drm_property *crc_win_x_end_property; - struct drm_property *crc_win_y_end_property; -#endif /** * @mst_encoders: * @@ -429,15 +422,6 @@ struct dm_plane_state { struct dc_plane_state *dc_state; }; -#ifdef CONFIG_DEBUG_FS -struct crc_rec { - uint16_t x_start; - uint16_t y_start; - uint16_t x_end; - uint16_t y_end; - }; -#endif - struct dm_crtc_state { struct drm_crtc_state base; struct dc_stream_state *stream; @@ -460,9 +444,6 @@ struct dm_crtc_state { struct dc_info_packet vrr_infopacket; int abm_level; -#ifdef CONFIG_DEBUG_FS - struct crc_rec crc_window; -#endif }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 7b886a779a8ca..c29dc11619f79 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -81,41 +81,6 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc, return pipe_crc_sources; } -static void amdgpu_dm_set_crc_window_default(struct dm_crtc_state *dm_crtc_state) -{ - dm_crtc_state->crc_window.x_start = 0; - dm_crtc_state->crc_window.y_start = 0; - dm_crtc_state->crc_window.x_end = 0; - dm_crtc_state->crc_window.y_end = 0; -} - -bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state) -{ - bool ret = true; - - if ((dm_crtc_state->crc_window.x_start != 0) || - (dm_crtc_state->crc_window.y_start != 0) || - (dm_crtc_state->crc_window.x_end != 0) || - (dm_crtc_state->crc_window.y_end != 0)) - ret = false; - - return ret; -} - -bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, - struct dm_crtc_state *dm_old_crtc_state) -{ - bool ret = false; - - if ((dm_new_crtc_state->crc_window.x_start != dm_old_crtc_state->crc_window.x_start) || - (dm_new_crtc_state->crc_window.y_start != dm_old_crtc_state->crc_window.y_start) || - (dm_new_crtc_state->crc_window.x_end != dm_old_crtc_state->crc_window.x_end) || - (dm_new_crtc_state->crc_window.y_end != dm_old_crtc_state->crc_window.y_end)) - ret = true; - - return ret; -} - int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, size_t *values_cnt) @@ -140,7 +105,6 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dc_stream_state *stream_state = dm_crtc_state->stream; bool enable = amdgpu_dm_is_valid_crc_source(source); int ret = 0; - struct crc_params *crc_window = NULL, tmp_window; /* Configuration will be deferred to stream enable. */ if (!stream_state) @@ -149,25 +113,9 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); /* Enable CRTC CRC generation if necessary. */ - if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { - if (!enable) - amdgpu_dm_set_crc_window_default(dm_crtc_state); - - if (!amdgpu_dm_crc_window_is_default(dm_crtc_state)) { - crc_window = &tmp_window; - - tmp_window.windowa_x_start = dm_crtc_state->crc_window.x_start; - tmp_window.windowa_y_start = dm_crtc_state->crc_window.y_start; - tmp_window.windowa_x_end = dm_crtc_state->crc_window.x_end; - tmp_window.windowa_y_end = dm_crtc_state->crc_window.y_end; - tmp_window.windowb_x_start = dm_crtc_state->crc_window.x_start; - tmp_window.windowb_y_start = dm_crtc_state->crc_window.y_start; - tmp_window.windowb_x_end = dm_crtc_state->crc_window.x_end; - tmp_window.windowb_y_end = dm_crtc_state->crc_window.y_end; - } - + if (dm_is_crc_source_crtc(source)) { if (!dc_stream_configure_crc(stream_state->ctx->dc, - stream_state, crc_window, enable, enable)) { + stream_state, NULL, enable, enable)) { ret = -EINVAL; goto unlock; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 0235bfb246e5d..f7d731797d3fc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -47,9 +47,6 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source /* amdgpu_dm_crc.c */ #ifdef CONFIG_DEBUG_FS -bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); -bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, - struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); -- GitLab From 2f0fa789f7b9fb022440f8f846cae175233987aa Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 24 Nov 2020 19:57:03 +0800 Subject: [PATCH 1802/1894] drm/amd/display: Fix to be able to stop crc calculation [Why] Find out when we try to disable CRC calculation, crc generation is still enabled. Main reason is that dc_stream_configure_crc() will never get called when the source is AMDGPU_DM_PIPE_CRC_SOURCE_NONE. [How] Add checking condition that when source is AMDGPU_DM_PIPE_CRC_SOURCE_NONE, we should also call dc_stream_configure_crc() to disable crc calculation. Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index c29dc11619f79..66cb8730586b1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -113,7 +113,7 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); /* Enable CRTC CRC generation if necessary. */ - if (dm_is_crc_source_crtc(source)) { + if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state, NULL, enable, enable)) { ret = -EINVAL; -- GitLab From 13a9499e833387fcc7a53915bbe5cddf3c336b59 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 14 Jan 2021 16:37:37 +0100 Subject: [PATCH 1803/1894] mptcp: fix locking in mptcp_disconnect() tcp_disconnect() expects the caller acquires the sock lock, but mptcp_disconnect() is not doing that. Add the missing required lock. Reported-by: Eric Dumazet Fixes: 76e2a55d1625 ("mptcp: better msk-level shutdown.") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/f818e82b58a556feeb71dcccc8bf1c87aafc6175.1610638176.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 81faeff8f3bb7..f998a077c7dd0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2646,8 +2646,13 @@ static int mptcp_disconnect(struct sock *sk, int flags) struct mptcp_sock *msk = mptcp_sk(sk); __mptcp_flush_join_list(msk); - mptcp_for_each_subflow(msk, subflow) - tcp_disconnect(mptcp_subflow_tcp_sock(subflow), flags); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + tcp_disconnect(ssk, flags); + release_sock(ssk); + } return 0; } -- GitLab From 7a84665619bb5da8c8b6517157875a1fd7632014 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 10 Jan 2021 14:09:05 +0200 Subject: [PATCH 1804/1894] nvmet-rdma: Fix NULL deref when setting pi_enable and traddr INADDR_ANY When setting port traddr to INADDR_ANY, the listening cm_id->device is NULL. The associate IB device is known only when a connect request event arrives, so checking T10-PI device capability should be done at this stage. Fixes: b09160c3996c ("nvmet-rdma: add metadata/T10-PI support") Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index bdfc22eb2a10f..06b6b742bb213 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1220,6 +1220,14 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) } ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; + + if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + pr_warn("T10-PI is not supported by device %s. Disabling it\n", + cm_id->device->name); + nport->pi_enable = false; + } + ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -1855,14 +1863,6 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } - if (port->nport->pi_enable && - !(cm_id->device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { - pr_err("T10-PI is not supported for %pISpcs\n", addr); - ret = -EINVAL; - goto out_destroy_id; - } - port->cm_id = cm_id; return 0; -- GitLab From ada831772188192243f9ea437c46e37e97a5975d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 14:03:04 -0800 Subject: [PATCH 1805/1894] nvme-tcp: Fix warning with CONFIG_DEBUG_PREEMPT We shouldn't call smp_processor_id() in a preemptible context, but this is advisory at best, so instead call __smp_processor_id(). Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Reported-by: Or Gerlitz Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 979ee31b8dd1c..b2e0865785ef4 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -286,7 +286,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * directly, otherwise queue io_work. Also, only do that if we * are on the same cpu, so we don't introduce contention. */ - if (queue->io_cpu == smp_processor_id() && + if (queue->io_cpu == __smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; nvme_tcp_send_all(queue); -- GitLab From ca1ff67d0fb14f39cf0cc5102b1fbcc3b14f6fb9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 13:56:57 -0800 Subject: [PATCH 1806/1894] nvme-tcp: fix possible data corruption with bio merges When a bio merges, we can get a request that spans multiple bios, and the overall request payload size is the sum of all bios. When we calculate how much we need to send from the existing bio (and bvec), we did not take into account the iov_iter byte count cap. Since multipage bvecs support, bvecs can split in the middle which means that when we account for the last bvec send we should also take the iov_iter byte count cap as it might be lower than the last bvec size. Reported-by: Hao Wang Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Tested-by: Hao Wang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b2e0865785ef4..216619926563e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -201,7 +201,7 @@ static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) { - return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset, + return min_t(size_t, iov_iter_single_seg_count(&req->iter), req->pdu_len - req->pdu_sent); } -- GitLab From 5ab25a32cd90ce561ac28b9302766e565d61304c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 16:00:22 -0800 Subject: [PATCH 1807/1894] nvme: don't intialize hwmon for discovery controllers Discovery controllers usually don't support smart log page command. So when we connect to the discovery controller we see this warning: nvme nvme0: Failed to read smart log (error 24577) nvme nvme0: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery", addr 192.168.123.1:8009 nvme nvme0: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" Introduce a new helper to understand if the controller is a discovery controller and use this helper to skip nvme_init_hwmon (also use it in other places that we check if the controller is a discovery controller). Fixes: 400b6a7b13a3 ("nvme: Add hardware monitoring support") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f320273fc6727..200bdd672c281 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2856,6 +2856,11 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; +static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl) +{ + return ctrl->opts && ctrl->opts->discovery_nqn; +} + static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { @@ -2875,7 +2880,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, } if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || - (ctrl->opts && ctrl->opts->discovery_nqn)) + nvme_discovery_ctrl(ctrl)) continue; dev_err(ctrl->device, @@ -3144,7 +3149,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } - if (!ctrl->opts->discovery_nqn && !ctrl->kas) { + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { dev_err(ctrl->device, "keep-alive support is mandatory for fabrics\n"); ret = -EINVAL; @@ -3184,7 +3189,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; - if (!ctrl->identified) { + if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { ret = nvme_hwmon_init(ctrl); if (ret < 0) return ret; -- GitLab From 402a89660e9dc880710b12773076a336c9dab3d7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1808/1894] drm/nouveau/bios: fix issue shadowing expansion ROMs This issue has generally been covered up by the presence of additional expansion ROMs after the ones we're interested in, with header fetches of subsequent images loading enough of the ROM to hide the issue. Noticed on GA102, which lacks a type 0x70 image compared to TU102,. [ 906.364197] nouveau 0000:09:00.0: bios: 00000000: type 00, 65024 bytes [ 906.381205] nouveau 0000:09:00.0: bios: 0000fe00: type 03, 91648 bytes [ 906.405213] nouveau 0000:09:00.0: bios: 00026400: type e0, 22016 bytes [ 906.410984] nouveau 0000:09:00.0: bios: 0002ba00: type e0, 366080 bytes vs [ 22.961901] nouveau 0000:09:00.0: bios: 00000000: type 00, 60416 bytes [ 22.984174] nouveau 0000:09:00.0: bios: 0000ec00: type 03, 71168 bytes [ 23.010446] nouveau 0000:09:00.0: bios: 00020200: type e0, 48128 bytes [ 23.028220] nouveau 0000:09:00.0: bios: 0002be00: type e0, 140800 bytes [ 23.080196] nouveau 0000:09:00.0: bios: 0004e400: type 70, 7168 bytes Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 7deb81b6dbac6..4b571cc6bc70f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -75,7 +75,7 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) nvkm_debug(subdev, "%08x: type %02x, %d bytes\n", image.base, image.type, image.size); - if (!shadow_fetch(bios, mthd, image.size)) { + if (!shadow_fetch(bios, mthd, image.base + image.size)) { nvkm_debug(subdev, "%08x: fetch failed\n", image.base); return 0; } -- GitLab From e05e06cd34f5311f677294a08b609acfbc315236 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1809/1894] drm/nouveau/privring: ack interrupts the same way as RM Whatever it is that we were doing before doesn't work on Ampere. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c | 10 +++++++--- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c index 2340040942c93..1115376bc85f5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0400)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gf100_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0400)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gf100_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0400)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0400), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gf100_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x121c4c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x121c4c) & 0x0000003f)) + break; + ); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c index f3915f85838ed..22e487b493ad1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0800)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gk104_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0800)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gk104_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0800)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0800), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gk104_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x12004c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x12004c) & 0x0000003f)) + break; + ); } static int -- GitLab From b5510d1e21d80e2fa2286468ca8c2922f5895ef8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1810/1894] drm/nouveau/i2c/gk110: split out from i2c/gk104 No functional changes here yet. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/i2c.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 12 +++--- .../gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c | 38 +++++++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h index 81b977319640a..640f649ce497e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h @@ -92,6 +92,7 @@ int g94_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gf117_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gf119_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gk104_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); +int gk110_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gm200_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); static inline int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7851bec5f0e5f..ffada13c416c5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1815,7 +1815,7 @@ nvf0_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1853,7 +1853,7 @@ nvf1_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1891,7 +1891,7 @@ nv106_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1929,7 +1929,7 @@ nv108_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1967,7 +1967,7 @@ nv117_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -2003,7 +2003,7 @@ nv118_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild index 723d0284caefc..819703913a00c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild @@ -7,6 +7,7 @@ nvkm-y += nvkm/subdev/i2c/g94.o nvkm-y += nvkm/subdev/i2c/gf117.o nvkm-y += nvkm/subdev/i2c/gf119.o nvkm-y += nvkm/subdev/i2c/gk104.o +nvkm-y += nvkm/subdev/i2c/gk110.o nvkm-y += nvkm/subdev/i2c/gm200.o nvkm-y += nvkm/subdev/i2c/pad.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c new file mode 100644 index 0000000000000..10eaf1d70f623 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c @@ -0,0 +1,38 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" +#include "pad.h" + +static const struct nvkm_i2c_func +gk110_i2c = { + .pad_x_new = gf119_i2c_pad_x_new, + .pad_s_new = gf119_i2c_pad_s_new, + .aux = 4, + .aux_stat = gk104_aux_stat, + .aux_mask = gk104_aux_mask, +}; + +int +gk110_i2c_new(struct nvkm_device *device, int index, struct nvkm_i2c **pi2c) +{ + return nvkm_i2c_new_(&gk110_i2c, device, index, pi2c); +} -- GitLab From 8ad95edc39100c22c29ab1d2588332b99f387c8e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1811/1894] drm/nouveau/i2c/gk110-: disable hw-initiated dpcd reads RM does this around transactions, and it seemed to help while debugging AUXCH issues on GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c | 10 +++++++--- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 9 +++++++-- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h | 4 ++++ 7 files changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h index 30b48896965eb..f920eabf8628d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h @@ -3,6 +3,13 @@ #define __NVKM_I2C_AUX_H__ #include "pad.h" +static inline void +nvkm_i2c_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + if (i2c->func->aux_autodpcd) + i2c->func->aux_autodpcd(i2c, aux, false); +} + struct nvkm_i2c_aux_func { bool address_only; int (*xfer)(struct nvkm_i2c_aux *, bool retry, u8 type, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c index db7769cb33eba..47068f6f9c55d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c @@ -77,7 +77,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u8 type, u32 addr, u8 *data, u8 *size) { struct g94_i2c_aux *aux = g94_i2c_aux(obj); - struct nvkm_device *device = aux->base.pad->i2c->subdev.device; + struct nvkm_i2c *i2c = aux->base.pad->i2c; + struct nvkm_device *device = i2c->subdev.device; const u32 base = aux->ch * 0x50; u32 ctrl, stat, timeout, retries = 0; u32 xbuf[4] = {}; @@ -96,6 +97,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, goto out; } + nvkm_i2c_aux_autodpcd(i2c, aux->ch, false); + if (!(type & 1)) { memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { @@ -128,7 +131,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (!timeout--) { AUX_ERR(&aux->base, "timeout %08x", ctrl); ret = -EIO; - goto out; + goto out_err; } } while (ctrl & 0x00010000); ret = 0; @@ -154,7 +157,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, memcpy(data, xbuf, *size); *size = stat & 0x0000001f; } - +out_err: + nvkm_i2c_aux_autodpcd(i2c, aux->ch, true); out: g94_i2c_aux_fini(aux); return ret < 0 ? ret : (stat & 0x000f0000) >> 16; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index edb6148cbca04..ab67b67fd2a53 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -77,7 +77,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u8 type, u32 addr, u8 *data, u8 *size) { struct gm200_i2c_aux *aux = gm200_i2c_aux(obj); - struct nvkm_device *device = aux->base.pad->i2c->subdev.device; + struct nvkm_i2c *i2c = aux->base.pad->i2c; + struct nvkm_device *device = i2c->subdev.device; const u32 base = aux->ch * 0x50; u32 ctrl, stat, timeout, retries = 0; u32 xbuf[4] = {}; @@ -96,6 +97,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, goto out; } + nvkm_i2c_aux_autodpcd(i2c, aux->ch, false); + if (!(type & 1)) { memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { @@ -128,7 +131,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (!timeout--) { AUX_ERR(&aux->base, "timeout %08x", ctrl); ret = -EIO; - goto out; + goto out_err; } } while (ctrl & 0x00010000); ret = 0; @@ -155,6 +158,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, *size = stat & 0x0000001f; } +out_err: + nvkm_i2c_aux_autodpcd(i2c, aux->ch, true); out: gm200_i2c_aux_fini(aux); return ret < 0 ? ret : (stat & 0x000f0000) >> 16; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c index 10eaf1d70f623..8e3bfa1af52a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c @@ -22,6 +22,12 @@ #include "priv.h" #include "pad.h" +static void +gk110_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + nvkm_mask(i2c->subdev.device, 0x00e4f8 + (aux * 0x50), 0x00010000, enable << 16); +} + static const struct nvkm_i2c_func gk110_i2c = { .pad_x_new = gf119_i2c_pad_x_new, @@ -29,6 +35,7 @@ gk110_i2c = { .aux = 4, .aux_stat = gk104_aux_stat, .aux_mask = gk104_aux_mask, + .aux_autodpcd = gk110_aux_autodpcd, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c index a23c5f315221c..7b2375bff8a9c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c @@ -24,6 +24,12 @@ #include "priv.h" #include "pad.h" +static void +gm200_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + nvkm_mask(i2c->subdev.device, 0x00d968 + (aux * 0x50), 0x00010000, enable << 16); +} + static const struct nvkm_i2c_func gm200_i2c = { .pad_x_new = gf119_i2c_pad_x_new, @@ -31,6 +37,7 @@ gm200_i2c = { .aux = 8, .aux_stat = gk104_aux_stat, .aux_mask = gk104_aux_mask, + .aux_autodpcd = gm200_aux_autodpcd, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h index 461016814f4f2..44b7bb7d47776 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: MIT */ #ifndef __NVKM_I2C_PAD_H__ #define __NVKM_I2C_PAD_H__ -#include +#include "priv.h" struct nvkm_i2c_pad { const struct nvkm_i2c_pad_func *func; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h index bd86bc298ebe5..e35f6036fcfcb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h @@ -23,6 +23,10 @@ struct nvkm_i2c_func { /* mask on/off interrupt types for a given set of auxch */ void (*aux_mask)(struct nvkm_i2c *, u32, u32, u32); + + /* enable/disable HW-initiated DPCD reads + */ + void (*aux_autodpcd)(struct nvkm_i2c *, int aux, bool enable); }; void g94_aux_stat(struct nvkm_i2c *, u32 *, u32 *, u32 *, u32 *); -- GitLab From ba6e9ab0fcf3d76e3952deb12b5f993991621d9c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1812/1894] drm/nouveau/i2c/gm200: increase width of aux semaphore owner fields Noticed while debugging GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index ab67b67fd2a53..8bd1d442e4654 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -33,7 +33,7 @@ static void gm200_i2c_aux_fini(struct gm200_i2c_aux *aux) { struct nvkm_device *device = aux->base.pad->i2c->subdev.device; - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00310000, 0x00000000); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00710000, 0x00000000); } static int @@ -54,10 +54,10 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) AUX_ERR(&aux->base, "begin idle timeout %08x", ctrl); return -EBUSY; } - } while (ctrl & 0x03010000); + } while (ctrl & 0x07010000); /* set some magic, and wait up to 1ms for it to appear */ - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00300000, ureq); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00700000, ureq); timeout = 1000; do { ctrl = nvkm_rd32(device, 0x00d954 + (aux->ch * 0x50)); @@ -67,7 +67,7 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) gm200_i2c_aux_fini(aux); return -EBUSY; } - } while ((ctrl & 0x03000000) != urep); + } while ((ctrl & 0x07000000) != urep); return 0; } -- GitLab From add42781ad76c5ae65127bf13852a4c6b2f08849 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1813/1894] drm/nouveau/mmu: fix vram heap sizing Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index de91e9a261725..6d5212ae2fd57 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -316,9 +316,9 @@ nvkm_mmu_vram(struct nvkm_mmu *mmu) { struct nvkm_device *device = mmu->subdev.device; struct nvkm_mm *mm = &device->fb->ram->vram; - const u32 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); - const u32 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); - const u32 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); + const u64 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); + const u64 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); + const u64 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); u8 type = NVKM_MEM_KIND * !!mmu->func->kind; u8 heap = NVKM_MEM_VRAM; int heapM, heapN, heapU; -- GitLab From 3b050680c84153d8e6f5ae3785922cd417f4b071 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1814/1894] drm/nouveau/core: recognise GA10[024] GA100 hidden behind a module option, as it's not been as well verified since initial bring-up and may need additional changes. There's no display anyway, so this can wait for a bit. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/cl0080.h | 1 + .../drm/nouveau/include/nvkm/core/device.h | 1 + drivers/gpu/drm/nouveau/nouveau_backlight.c | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 35 +++++++++++++++++-- .../gpu/drm/nouveau/nvkm/engine/device/user.c | 1 + 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h index cd9a2e687bb61..57d4f457a7d4a 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h @@ -33,6 +33,7 @@ struct nv_device_info_v0 { #define NV_DEVICE_INFO_V0_PASCAL 0x0a #define NV_DEVICE_INFO_V0_VOLTA 0x0b #define NV_DEVICE_INFO_V0_TURING 0x0c +#define NV_DEVICE_INFO_V0_AMPERE 0x0d __u8 family; __u8 pad06[2]; __u64 ram_size; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 5c007ce62fc34..c920939a14679 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -120,6 +120,7 @@ struct nvkm_device { GP100 = 0x130, GV100 = 0x140, TU100 = 0x160, + GA100 = 0x170, } card_type; u32 chipset; u8 chiprev; diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index c7a94c94dbf37..72f35a2babcb2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -256,6 +256,7 @@ nouveau_backlight_init(struct drm_connector *connector) case NV_DEVICE_INFO_V0_PASCAL: case NV_DEVICE_INFO_V0_VOLTA: case NV_DEVICE_INFO_V0_TURING: + case NV_DEVICE_INFO_V0_AMPERE: //XXX: not confirmed ret = nv50_backlight_init(nv_encoder, &props, &ops); break; default: diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index ffada13c416c5..b6b094bbd562f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2652,6 +2652,21 @@ nv168_chipset = { .sec2 = tu102_sec2_new, }; +static const struct nvkm_device_chip +nv170_chipset = { + .name = "GA100", +}; + +static const struct nvkm_device_chip +nv172_chipset = { + .name = "GA102", +}; + +static const struct nvkm_device_chip +nv174_chipset = { + .name = "GA104", +}; + static int nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size, struct nvkm_notify *notify) @@ -3063,6 +3078,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x130: device->card_type = GP100; break; case 0x140: device->card_type = GV100; break; case 0x160: device->card_type = TU100; break; + case 0x170: device->card_type = GA100; break; default: break; } @@ -3160,10 +3176,23 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; + case 0x172: device->chip = &nv172_chipset; break; + case 0x174: device->chip = &nv174_chipset; break; default: - nvdev_error(device, "unknown chipset (%08x)\n", boot0); - ret = -ENODEV; - goto done; + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + + if (!device->chip) { + nvdev_error(device, "unknown chipset (%08x)\n", boot0); + ret = -ENODEV; + goto done; + } + break; } nvdev_info(device, "NVIDIA %s (%08x)\n", diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index 03c6d9aef075c..1478947987860 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -176,6 +176,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size) case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break; case GV100: args->v0.family = NV_DEVICE_INFO_V0_VOLTA; break; case TU100: args->v0.family = NV_DEVICE_INFO_V0_TURING; break; + case GA100: args->v0.family = NV_DEVICE_INFO_V0_AMPERE; break; default: args->v0.family = 0; break; -- GitLab From caeb6ab899c3d36a74cda6e299c6e1c9c4e2a22e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1815/1894] drm/nouveau/kms/nv50-: fix case where notifier buffer is at offset 0 VRAM offset 0 is a valid address, triggered on GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 4 ++-- drivers/gpu/drm/nouveau/dispnv50/disp.h | 2 +- drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 33fff388dd83c..c6367035970eb 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -222,7 +222,7 @@ nv50_dmac_wait(struct nvif_push *push, u32 size) int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, - const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf, + const s32 *oclass, u8 head, void *data, u32 size, s64 syncbuf, struct nv50_dmac *dmac) { struct nouveau_cli *cli = (void *)device->object.client; @@ -271,7 +271,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, if (ret) return ret; - if (!syncbuf) + if (syncbuf < 0) return 0; ret = nvif_object_ctor(&dmac->base.user, "kmsSyncCtxDma", NV50_DISP_HANDLE_SYNCBUF, diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h index 92bddc0836171..38dec11e7dda5 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -95,7 +95,7 @@ struct nv50_outp_atom { int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, const s32 *oclass, u8 head, void *data, u32 size, - u64 syncbuf, struct nv50_dmac *dmac); + s64 syncbuf, struct nv50_dmac *dmac); void nv50_dmac_destroy(struct nv50_dmac *); /* diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c index 685b708713242..b390029c69ec1 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c @@ -76,7 +76,7 @@ wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, int ret; ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, - &oclass, 0, &args, sizeof(args), 0, + &oclass, 0, &args, sizeof(args), -1, &wndw->wimm); if (ret) { NV_ERROR(drm, "wimm%04x allocation failed: %d\n", oclass, ret); -- GitLab From 70afbe4bdc0a7ccdb462a38216f5abc3db7e5c1b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1816/1894] drm/nouveau/pci/ga10[024]: initial support Appears to be compatible with GP100 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b6b094bbd562f..95c470d13cb9f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,16 +2655,19 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .pci = gp100_pci_new, }; static int -- GitLab From a34632482f1ea768429a9d4c79a10d12f5093405 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1817/1894] drm/nouveau/bios/ga10[024]: initial support Forcing PRAMIN-shadowing off for GA100, as it requires display, and we don't know if/where the fuse register for detecting its presence is. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 95c470d13cb9f..7c35c32cdf734 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,18 +2655,21 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c index 3634cd0630b81..023ddc7c5399a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c @@ -64,6 +64,9 @@ pramin_init(struct nvkm_bios *bios, const char *name) return NULL; /* we can't get the bios image pointer without PDISP */ + if (device->card_type >= GA100) + addr = device->chipset == 0x170; /*XXX: find the fuse reg for this */ + else if (device->card_type >= GM100) addr = nvkm_rd32(device, 0x021c04); else -- GitLab From 7ddf5e9597faa6f939370e294e0f6d9516d2a431 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1818/1894] drm/nouveau/devinit/ga10[024]: initial support VPLL regs changed a bit. There's more stuff to do around these, but it's less invasive to stick those changes into disp for now. None of that belongs here anymore anyhow - fix that someday. Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/devinit.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + .../drm/nouveau/nvkm/subdev/devinit/Kbuild | 1 + .../drm/nouveau/nvkm/subdev/devinit/ga100.c | 76 +++++++++++++++++++ .../drm/nouveau/nvkm/subdev/devinit/priv.h | 1 + .../drm/nouveau/nvkm/subdev/devinit/tu102.c | 2 +- 6 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h index 1a39e52e09e36..50cc7c05eac49 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h @@ -32,4 +32,5 @@ int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int tu102_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); +int ga100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7c35c32cdf734..76f1b2504f3a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2656,6 +2656,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; @@ -2663,6 +2664,7 @@ static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; @@ -2670,6 +2672,7 @@ static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild index b3429371ed824..d1abb64841dac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild @@ -15,3 +15,4 @@ nvkm-y += nvkm/subdev/devinit/gm107.o nvkm-y += nvkm/subdev/devinit/gm200.o nvkm-y += nvkm/subdev/devinit/gv100.o nvkm-y += nvkm/subdev/devinit/tu102.o +nvkm-y += nvkm/subdev/devinit/ga100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c new file mode 100644 index 0000000000000..636a92128f6c8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c @@ -0,0 +1,76 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" + +#include +#include +#include + +static int +ga100_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) +{ + struct nvkm_subdev *subdev = &init->subdev; + struct nvkm_device *device = subdev->device; + struct nvbios_pll info; + int head = type - PLL_VPLL0; + int N, fN, M, P; + int ret; + + ret = nvbios_pll_parse(device->bios, type, &info); + if (ret) + return ret; + + ret = gt215_pll_calc(subdev, &info, freq, &N, &fN, &M, &P); + if (ret < 0) + return ret; + + switch (info.type) { + case PLL_VPLL0: + case PLL_VPLL1: + case PLL_VPLL2: + case PLL_VPLL3: + nvkm_wr32(device, 0x00ef00 + (head * 0x40), 0x02080004); + nvkm_wr32(device, 0x00ef18 + (head * 0x40), (N << 16) | fN); + nvkm_wr32(device, 0x00ef04 + (head * 0x40), (P << 16) | M); + nvkm_wr32(device, 0x00e9c0 + (head * 0x04), 0x00000001); + break; + default: + nvkm_warn(subdev, "%08x/%dKhz unimplemented\n", type, freq); + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct nvkm_devinit_func +ga100_devinit = { + .init = nv50_devinit_init, + .post = tu102_devinit_post, + .pll_set = ga100_devinit_pll_set, +}; + +int +ga100_devinit_new(struct nvkm_device *device, int index, struct nvkm_devinit **pinit) +{ + return nv50_devinit_new_(&ga100_devinit, device, index, pinit); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h index 94723352137a7..05961e6242647 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h @@ -19,4 +19,5 @@ void nvkm_devinit_ctor(const struct nvkm_devinit_func *, struct nvkm_device *, int index, struct nvkm_devinit *); int nv04_devinit_post(struct nvkm_devinit *, bool); +int tu102_devinit_post(struct nvkm_devinit *, bool); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c index 397670e72fff9..9a469bf482f2f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c @@ -65,7 +65,7 @@ tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) return ret; } -static int +int tu102_devinit_post(struct nvkm_devinit *base, bool post) { struct nv50_devinit *init = nv50_devinit(base); -- GitLab From 5961c62d20753009408df4752e22991097386aa9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1819/1894] drm/nouveau/mc/ga10[024]: initial support Fortunately, all the interrupts we need to bring up basic display support are contained in a single leaf register, allowing this basic (but hackish) implementation. There's a bunch more invasive patches to come implementing all this in a better/more complete way, but trying to get a minimal series out first. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/mc.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/mc/ga100.c | 74 +++++++++++++++++++ 4 files changed, 79 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h index 6641fe4c252c6..e45ca45839670 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h @@ -32,4 +32,5 @@ int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp10b_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int tu102_mc_new(struct nvkm_device *, int, struct nvkm_mc **); +int ga100_mc_new(struct nvkm_device *, int, struct nvkm_mc **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 76f1b2504f3a2..fb551edc6a357 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2665,6 +2666,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2673,6 +2675,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild index 2585ef07532ac..ac2b34e9ac6ad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild @@ -14,3 +14,4 @@ nvkm-y += nvkm/subdev/mc/gk20a.o nvkm-y += nvkm/subdev/mc/gp100.o nvkm-y += nvkm/subdev/mc/gp10b.o nvkm-y += nvkm/subdev/mc/tu102.o +nvkm-y += nvkm/subdev/mc/ga100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c new file mode 100644 index 0000000000000..967eb3af11eb0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c @@ -0,0 +1,74 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +static void +ga100_mc_intr_unarm(struct nvkm_mc *mc) +{ + nvkm_wr32(mc->subdev.device, 0xb81610, 0x00000004); +} + +static void +ga100_mc_intr_rearm(struct nvkm_mc *mc) +{ + nvkm_wr32(mc->subdev.device, 0xb81608, 0x00000004); +} + +static void +ga100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 intr) +{ + nvkm_wr32(mc->subdev.device, 0xb81210, mask & intr ); + nvkm_wr32(mc->subdev.device, 0xb81410, mask & ~(mask & intr)); +} + +static u32 +ga100_mc_intr_stat(struct nvkm_mc *mc) +{ + u32 intr_top = nvkm_rd32(mc->subdev.device, 0xb81600), intr = 0x00000000; + if (intr_top & 0x00000004) + intr = nvkm_mask(mc->subdev.device, 0xb81010, 0x00000000, 0x00000000); + return intr; +} + +static void +ga100_mc_init(struct nvkm_mc *mc) +{ + nv50_mc_init(mc); + nvkm_wr32(mc->subdev.device, 0xb81210, 0xffffffff); +} + +static const struct nvkm_mc_func +ga100_mc = { + .init = ga100_mc_init, + .intr = gp100_mc_intr, + .intr_unarm = ga100_mc_intr_unarm, + .intr_rearm = ga100_mc_intr_rearm, + .intr_mask = ga100_mc_intr_mask, + .intr_stat = ga100_mc_intr_stat, + .reset = gk104_mc_reset, +}; + +int +ga100_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc) +{ + return nvkm_mc_new_(&ga100_mc, device, index, pmc); +} -- GitLab From e0df4bbfc3365d7699e32bebb24647dc7a09b00c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1820/1894] drm/nouveau/privring/ga10[024]: initial support Appears to be compatible with GM200 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index fb551edc6a357..2f6e1b1ce0bb3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2666,6 +2667,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2675,6 +2677,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; -- GitLab From de4781d0f22b54fdbe7ac459eb67b585ca3ee430 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1821/1894] drm/nouveau/imem/ga10[024]: initial support Appears to be compatible with NV50 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 2f6e1b1ce0bb3..d2be48fc5db1d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2658,6 +2658,7 @@ nv170_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2668,6 +2669,7 @@ nv172_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2678,6 +2680,7 @@ nv174_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; -- GitLab From 41ba806f40a9a4c4f4c04a474bf368160f1baa2c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1822/1894] drm/nouveau/fb/ga10[024]: initial support No VPR scrub. GA102 and GA104 have a new VRAM size detection method. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/fb.h | 2 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 ++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild | 3 ++ .../gpu/drm/nouveau/nvkm/subdev/fb/ga100.c | 40 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/subdev/fb/ga102.c | 40 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/subdev/fb/gv100.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 1 + .../gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c | 40 +++++++++++++++++++ 9 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 34b56b10218a8..2ecd52aec1d12 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -86,6 +86,8 @@ int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp102_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp10b_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gv100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int ga100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int ga102_fb_new(struct nvkm_device *, int, struct nvkm_fb **); #include #include diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index d2be48fc5db1d..4a5d43fe3f3ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga100_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2668,6 +2669,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga102_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2679,6 +2681,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga102_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild index 43a42159a3d00..5d0bab8ecb433 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild @@ -32,6 +32,8 @@ nvkm-y += nvkm/subdev/fb/gp100.o nvkm-y += nvkm/subdev/fb/gp102.o nvkm-y += nvkm/subdev/fb/gp10b.o nvkm-y += nvkm/subdev/fb/gv100.o +nvkm-y += nvkm/subdev/fb/ga100.o +nvkm-y += nvkm/subdev/fb/ga102.o nvkm-y += nvkm/subdev/fb/ram.o nvkm-y += nvkm/subdev/fb/ramnv04.o @@ -52,6 +54,7 @@ nvkm-y += nvkm/subdev/fb/ramgk104.o nvkm-y += nvkm/subdev/fb/ramgm107.o nvkm-y += nvkm/subdev/fb/ramgm200.o nvkm-y += nvkm/subdev/fb/ramgp100.o +nvkm-y += nvkm/subdev/fb/ramga102.o nvkm-y += nvkm/subdev/fb/sddr2.o nvkm-y += nvkm/subdev/fb/sddr3.o nvkm-y += nvkm/subdev/fb/gddr3.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c new file mode 100644 index 0000000000000..bf82686851cd4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ram.h" + +static const struct nvkm_fb_func +ga100_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gp100_fb_init, + .init_page = gv100_fb_init_page, + .init_unkn = gp100_fb_init_unkn, + .ram_new = gp100_ram_new, + .default_bigpage = 16, +}; + +int +ga100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gp102_fb_new_(&ga100_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c new file mode 100644 index 0000000000000..bcecf84a6e679 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ram.h" + +static const struct nvkm_fb_func +ga102_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gp100_fb_init, + .init_page = gv100_fb_init_page, + .init_unkn = gp100_fb_init_unkn, + .ram_new = ga102_ram_new, + .default_bigpage = 16, +}; + +int +ga102_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gp102_fb_new_(&ga102_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c index 10ff5d053f7ea..feda86a5fba85 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c @@ -22,7 +22,7 @@ #include "gf100.h" #include "ram.h" -static int +int gv100_fb_init_page(struct nvkm_fb *fb) { return (fb->page == 16) ? 0 : -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h index 5be9c563350d7..66932ac10d15c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h @@ -82,4 +82,6 @@ int gp102_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *, int, struct nvkm_fb **); bool gp102_fb_vpr_scrub_required(struct nvkm_fb *); int gp102_fb_vpr_scrub(struct nvkm_fb *); + +int gv100_fb_init_page(struct nvkm_fb *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index d723a9b4e3c47..ea7d66f3dd825 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -70,4 +70,5 @@ int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm200_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **); +int ga102_ram_new(struct nvkm_fb *, struct nvkm_ram **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c new file mode 100644 index 0000000000000..298c136cefe0c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ram.h" + +#include +#include +#include + +static const struct nvkm_ram_func +ga102_ram = { +}; + +int +ga102_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +{ + struct nvkm_device *device = fb->subdev.device; + enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios); + u32 size = nvkm_rd32(device, 0x1183a4); + + return nvkm_ram_new_(&ga102_ram, fb, type, (u64)size << 20, pram); +} -- GitLab From 6f300e0a0ba8873f1225959089f8bb2897d93ec6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1823/1894] drm/nouveau/timer/ga10[024]: initial support Appears to be compatible with GK20A code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 4a5d43fe3f3ab..6a1920965c019 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2662,6 +2662,7 @@ nv170_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static const struct nvkm_device_chip @@ -2674,6 +2675,7 @@ nv172_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static const struct nvkm_device_chip @@ -2686,6 +2688,7 @@ nv174_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static int -- GitLab From a3abc23ac40111c76708119013d63451169e7838 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1824/1894] drm/nouveau/mmu/ga10[024]: initial support Appears to be compatible with TU102 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 6a1920965c019..ad4cece038b47 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2661,6 +2661,7 @@ nv170_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; @@ -2674,6 +2675,7 @@ nv172_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; @@ -2687,6 +2689,7 @@ nv174_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; -- GitLab From f5cbe7c8bd1ac6f8c91179de381e10ee5f0f8809 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1825/1894] drm/nouveau/bar/ga10[024]: initial support Appears to be compatible with TU102 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index ad4cece038b47..c9cc08ba14443 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,6 +2655,7 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga100_fb_new, @@ -2669,6 +2670,7 @@ nv170_chipset = { static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, @@ -2683,6 +2685,7 @@ nv172_chipset = { static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, -- GitLab From c28efb15f9e51a96c6bce2b92c0f3a4da87db877 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1826/1894] drm/nouveau/gpio/ga10[024]: initial support GA100 appears to be compatible with GK104 code, the others have some register moves. Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/gpio.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + .../gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c | 118 ++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h index eaacf8d80527c..cdcce5ece6ff5 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h @@ -37,4 +37,5 @@ int nv50_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int g94_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int gf119_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int gk104_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); +int ga102_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index c9cc08ba14443..20d947808ef77 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2659,6 +2659,7 @@ nv170_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga100_fb_new, + .gpio = gk104_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2674,6 +2675,7 @@ nv172_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, + .gpio = ga102_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2689,6 +2691,7 @@ nv174_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, + .gpio = ga102_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild index b2ad5922a1c2d..efbbaa080de51 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild @@ -5,3 +5,4 @@ nvkm-y += nvkm/subdev/gpio/nv50.o nvkm-y += nvkm/subdev/gpio/g94.o nvkm-y += nvkm/subdev/gpio/gf119.o nvkm-y += nvkm/subdev/gpio/gk104.o +nvkm-y += nvkm/subdev/gpio/ga102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c new file mode 100644 index 0000000000000..62c791baf4008 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c @@ -0,0 +1,118 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +static void +ga102_gpio_reset(struct nvkm_gpio *gpio, u8 match) +{ + struct nvkm_device *device = gpio->subdev.device; + struct nvkm_bios *bios = device->bios; + u8 ver, len; + u16 entry; + int ent = -1; + + while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) { + u32 data = nvbios_rd32(bios, entry); + u8 line = (data & 0x0000003f); + u8 defs = !!(data & 0x00000080); + u8 func = (data & 0x0000ff00) >> 8; + u8 unk0 = (data & 0x00ff0000) >> 16; + u8 unk1 = (data & 0x1f000000) >> 24; + + if ( func == DCB_GPIO_UNUSED || + (match != DCB_GPIO_UNUSED && match != func)) + continue; + + nvkm_gpio_set(gpio, 0, func, line, defs); + + nvkm_mask(device, 0x021200 + (line * 4), 0xff, unk0); + if (unk1--) + nvkm_mask(device, 0x00d740 + (unk1 * 4), 0xff, line); + } +} + +static int +ga102_gpio_drive(struct nvkm_gpio *gpio, int line, int dir, int out) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 data = ((dir ^ 1) << 13) | (out << 12); + nvkm_mask(device, 0x021200 + (line * 4), 0x00003000, data); + nvkm_mask(device, 0x00d604, 0x00000001, 0x00000001); /* update? */ + return 0; +} + +static int +ga102_gpio_sense(struct nvkm_gpio *gpio, int line) +{ + struct nvkm_device *device = gpio->subdev.device; + return !!(nvkm_rd32(device, 0x021200 + (line * 4)) & 0x00004000); +} + +static void +ga102_gpio_intr_stat(struct nvkm_gpio *gpio, u32 *hi, u32 *lo) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 intr0 = nvkm_rd32(device, 0x021640); + u32 intr1 = nvkm_rd32(device, 0x02164c); + u32 stat0 = nvkm_rd32(device, 0x021648) & intr0; + u32 stat1 = nvkm_rd32(device, 0x021654) & intr1; + *lo = (stat1 & 0xffff0000) | (stat0 >> 16); + *hi = (stat1 << 16) | (stat0 & 0x0000ffff); + nvkm_wr32(device, 0x021640, intr0); + nvkm_wr32(device, 0x02164c, intr1); +} + +static void +ga102_gpio_intr_mask(struct nvkm_gpio *gpio, u32 type, u32 mask, u32 data) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 inte0 = nvkm_rd32(device, 0x021648); + u32 inte1 = nvkm_rd32(device, 0x021654); + if (type & NVKM_GPIO_LO) + inte0 = (inte0 & ~(mask << 16)) | (data << 16); + if (type & NVKM_GPIO_HI) + inte0 = (inte0 & ~(mask & 0xffff)) | (data & 0xffff); + mask >>= 16; + data >>= 16; + if (type & NVKM_GPIO_LO) + inte1 = (inte1 & ~(mask << 16)) | (data << 16); + if (type & NVKM_GPIO_HI) + inte1 = (inte1 & ~mask) | data; + nvkm_wr32(device, 0x021648, inte0); + nvkm_wr32(device, 0x021654, inte1); +} + +static const struct nvkm_gpio_func +ga102_gpio = { + .lines = 32, + .intr_stat = ga102_gpio_intr_stat, + .intr_mask = ga102_gpio_intr_mask, + .drive = ga102_gpio_drive, + .sense = ga102_gpio_sense, + .reset = ga102_gpio_reset, +}; + +int +ga102_gpio_new(struct nvkm_device *device, int index, struct nvkm_gpio **pgpio) +{ + return nvkm_gpio_new_(&ga102_gpio, device, index, pgpio); +} -- GitLab From 8a0412265f06490d93724bf8badf220180790ad1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1827/1894] drm/nouveau/i2c/ga10[024]: initial support Appears to be compatible with GM200 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 20d947808ef77..946b0001ca548 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2660,6 +2660,7 @@ nv170_chipset = { .devinit = ga100_devinit_new, .fb = ga100_fb_new, .gpio = gk104_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2676,6 +2677,7 @@ nv172_chipset = { .devinit = ga100_devinit_new, .fb = ga102_fb_new, .gpio = ga102_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2692,6 +2694,7 @@ nv174_chipset = { .devinit = ga100_devinit_new, .fb = ga102_fb_new, .gpio = ga102_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, -- GitLab From a6cf0320aad0c69a6b558dd41d3cb6891a6c9872 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1828/1894] drm/nouveau/dmaobj/ga10[24]: initial support Appears to be compatible with GV100 code, and not required on GA100, as it shouldn't have display. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 946b0001ca548..bea3daf085700 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2684,6 +2684,7 @@ nv172_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .dma = gv100_dma_new, }; static const struct nvkm_device_chip @@ -2701,6 +2702,7 @@ nv174_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .dma = gv100_dma_new, }; static int -- GitLab From 8ef23b6f6a79e6fa2a169081d2d76011fffa0482 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 1829/1894] drm/nouveau/disp/ga10[24]: initial support UEFI/RM no longer use IED scripts from the VBIOS, though they appear to have been updated for use by the x86 VBIOS code, so we should be able to continue using them for the moment. Unfortunately, we require some hacks to do so, as the BeforeLinkTraining IED script became a pointer to an array of scripts instead, without a revbump of the relevant tables. There's also some changes to SOR clock divider fiddling, which are hopefully correct enough that things work as they should. AFAIK, GA100 shouldn't have display, so it hasn't been added. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/Kbuild | 1 + drivers/gpu/drm/nouveau/dispnv50/core.c | 1 + drivers/gpu/drm/nouveau/dispnv50/curs.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wimm.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wndw.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wndw.h | 8 + drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c | 10 +- drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c | 106 +++++++++++++ drivers/gpu/drm/nouveau/include/nvif/class.h | 5 + .../drm/nouveau/include/nvkm/engine/disp.h | 1 + drivers/gpu/drm/nouveau/nvif/disp.c | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 + .../gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 3 + drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 33 ++++- .../gpu/drm/nouveau/nvkm/engine/disp/ga102.c | 46 ++++++ .../gpu/drm/nouveau/nvkm/engine/disp/ior.h | 4 + .../gpu/drm/nouveau/nvkm/engine/disp/nv50.h | 2 + .../drm/nouveau/nvkm/engine/disp/rootga102.c | 52 +++++++ .../drm/nouveau/nvkm/engine/disp/rootnv50.h | 1 + .../drm/nouveau/nvkm/engine/disp/sorga102.c | 140 ++++++++++++++++++ .../drm/nouveau/nvkm/engine/disp/sortu102.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/tu102.c | 2 +- 22 files changed, 410 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c diff --git a/drivers/gpu/drm/nouveau/dispnv50/Kbuild b/drivers/gpu/drm/nouveau/dispnv50/Kbuild index 6fdddb266fb1b..4488e1c061b3d 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/Kbuild +++ b/drivers/gpu/drm/nouveau/dispnv50/Kbuild @@ -37,6 +37,7 @@ nouveau-y += dispnv50/wimmc37b.o nouveau-y += dispnv50/wndw.o nouveau-y += dispnv50/wndwc37e.o nouveau-y += dispnv50/wndwc57e.o +nouveau-y += dispnv50/wndwc67e.o nouveau-y += dispnv50/base.o nouveau-y += dispnv50/base507c.o diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c index 27ea3f34706d4..abefc2343443b 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/core.c +++ b/drivers/gpu/drm/nouveau/dispnv50/core.c @@ -42,6 +42,7 @@ nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore) int version; int (*new)(struct nouveau_drm *, s32, struct nv50_core **); } cores[] = { + { GA102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, { TU102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, { GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new }, { GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c index 121c24a18f111..31d8b2e4791dd 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/curs.c +++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c @@ -31,6 +31,7 @@ nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw) int version; int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **); } curses[] = { + { GA102_DISP_CURSOR, 0, cursc37a_new }, { TU102_DISP_CURSOR, 0, cursc37a_new }, { GV100_DISP_CURSOR, 0, cursc37a_new }, { GK104_DISP_CURSOR, 0, curs907a_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c index a1ac153d5e984..566fbddfc8d7f 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimm.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c @@ -31,6 +31,7 @@ nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw) int version; int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *); } wimms[] = { + { GA102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, { TU102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, { GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, {} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 0356474ad6f6a..ce451242f79eb 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -784,6 +784,7 @@ nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, int (*new)(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); } wndws[] = { + { GA102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc67e_new }, { TU102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new }, { GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new }, {} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h index 3278e28800343..f4e0c50800344 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h @@ -129,6 +129,14 @@ int wndwc37e_update(struct nv50_wndw *, u32 *); int wndwc57e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); +bool wndwc57e_ilut(struct nv50_wndw *, struct nv50_wndw_atom *, int); +int wndwc57e_ilut_set(struct nv50_wndw *, struct nv50_wndw_atom *); +int wndwc57e_ilut_clr(struct nv50_wndw *); +int wndwc57e_csc_set(struct nv50_wndw *, struct nv50_wndw_atom *); +int wndwc57e_csc_clr(struct nv50_wndw *); + +int wndwc67e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, + struct nv50_wndw **); int nv50_wndw_new(struct nouveau_drm *, enum drm_plane_type, int index, struct nv50_wndw **); diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c index 429be0bb02220..abdd3bb658b38 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c @@ -80,7 +80,7 @@ wndwc57e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } -static int +int wndwc57e_csc_clr(struct nv50_wndw *wndw) { struct nvif_push *push = wndw->wndw.push; @@ -98,7 +98,7 @@ wndwc57e_csc_clr(struct nv50_wndw *wndw) return 0; } -static int +int wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { struct nvif_push *push = wndw->wndw.push; @@ -111,7 +111,7 @@ wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } -static int +int wndwc57e_ilut_clr(struct nv50_wndw *wndw) { struct nvif_push *push = wndw->wndw.push; @@ -124,7 +124,7 @@ wndwc57e_ilut_clr(struct nv50_wndw *wndw) return 0; } -static int +int wndwc57e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { struct nvif_push *push = wndw->wndw.push; @@ -179,7 +179,7 @@ wndwc57e_ilut_load(struct drm_color_lut *in, int size, void __iomem *mem) writew(readw(mem - 4), mem + 4); } -static bool +bool wndwc57e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, int size) { if (size = size ? size : 1024, size != 256 && size != 1024) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c new file mode 100644 index 0000000000000..7a370fa1df20c --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c @@ -0,0 +1,106 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wndw.h" +#include "atom.h" + +#include + +#include + +static int +wndwc67e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + struct nvif_push *push = wndw->wndw.push; + int ret; + + if ((ret = PUSH_WAIT(push, 17))) + return ret; + + PUSH_MTHD(push, NVC57E, SET_PRESENT_CONTROL, + NVVAL(NVC57E, SET_PRESENT_CONTROL, MIN_PRESENT_INTERVAL, asyw->image.interval) | + NVVAL(NVC57E, SET_PRESENT_CONTROL, BEGIN_MODE, asyw->image.mode) | + NVDEF(NVC57E, SET_PRESENT_CONTROL, TIMESTAMP_MODE, DISABLE)); + + PUSH_MTHD(push, NVC57E, SET_SIZE, + NVVAL(NVC57E, SET_SIZE, WIDTH, asyw->image.w) | + NVVAL(NVC57E, SET_SIZE, HEIGHT, asyw->image.h), + + SET_STORAGE, + NVVAL(NVC57E, SET_STORAGE, BLOCK_HEIGHT, asyw->image.blockh), + + SET_PARAMS, + NVVAL(NVC57E, SET_PARAMS, FORMAT, asyw->image.format) | + NVDEF(NVC57E, SET_PARAMS, CLAMP_BEFORE_BLEND, DISABLE) | + NVDEF(NVC57E, SET_PARAMS, SWAP_UV, DISABLE) | + NVDEF(NVC57E, SET_PARAMS, FMT_ROUNDING_MODE, ROUND_TO_NEAREST), + + SET_PLANAR_STORAGE(0), + NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.blocks[0]) | + NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.pitch[0] >> 6)); + + PUSH_MTHD(push, NVC57E, SET_CONTEXT_DMA_ISO(0), asyw->image.handle, 1); + PUSH_MTHD(push, NVC57E, SET_OFFSET(0), asyw->image.offset[0] >> 8); + + PUSH_MTHD(push, NVC57E, SET_POINT_IN(0), + NVVAL(NVC57E, SET_POINT_IN, X, asyw->state.src_x >> 16) | + NVVAL(NVC57E, SET_POINT_IN, Y, asyw->state.src_y >> 16)); + + PUSH_MTHD(push, NVC57E, SET_SIZE_IN, + NVVAL(NVC57E, SET_SIZE_IN, WIDTH, asyw->state.src_w >> 16) | + NVVAL(NVC57E, SET_SIZE_IN, HEIGHT, asyw->state.src_h >> 16)); + + PUSH_MTHD(push, NVC57E, SET_SIZE_OUT, + NVVAL(NVC57E, SET_SIZE_OUT, WIDTH, asyw->state.crtc_w) | + NVVAL(NVC57E, SET_SIZE_OUT, HEIGHT, asyw->state.crtc_h)); + return 0; +} + +static const struct nv50_wndw_func +wndwc67e = { + .acquire = wndwc37e_acquire, + .release = wndwc37e_release, + .sema_set = wndwc37e_sema_set, + .sema_clr = wndwc37e_sema_clr, + .ntfy_set = wndwc37e_ntfy_set, + .ntfy_clr = wndwc37e_ntfy_clr, + .ntfy_reset = corec37d_ntfy_init, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .ilut = wndwc57e_ilut, + .ilut_identity = true, + .ilut_size = 1024, + .xlut_set = wndwc57e_ilut_set, + .xlut_clr = wndwc57e_ilut_clr, + .csc = base907c_csc, + .csc_set = wndwc57e_csc_set, + .csc_clr = wndwc57e_csc_clr, + .image_set = wndwc67e_image_set, + .image_clr = wndwc37e_image_clr, + .blend_set = wndwc37e_blend_set, + .update = wndwc37e_update, +}; + +int +wndwc67e_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, + s32 oclass, struct nv50_wndw **pwndw) +{ + return wndwc37e_new_(&wndwc67e, drm, type, index, oclass, BIT(index >> 1), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 2c79beb41126f..ba2c28ea43d20 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -88,6 +88,7 @@ #define GP102_DISP /* cl5070.h */ 0x00009870 #define GV100_DISP /* cl5070.h */ 0x0000c370 #define TU102_DISP /* cl5070.h */ 0x0000c570 +#define GA102_DISP /* cl5070.h */ 0x0000c670 #define GV100_DISP_CAPS 0x0000c373 @@ -103,6 +104,7 @@ #define GK104_DISP_CURSOR /* cl507a.h */ 0x0000917a #define GV100_DISP_CURSOR /* cl507a.h */ 0x0000c37a #define TU102_DISP_CURSOR /* cl507a.h */ 0x0000c57a +#define GA102_DISP_CURSOR /* cl507a.h */ 0x0000c67a #define NV50_DISP_OVERLAY /* cl507b.h */ 0x0000507b #define G82_DISP_OVERLAY /* cl507b.h */ 0x0000827b @@ -112,6 +114,7 @@ #define GV100_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c37b #define TU102_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c57b +#define GA102_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c67b #define NV50_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000507c #define G82_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000827c @@ -135,6 +138,7 @@ #define GP102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000987d #define GV100_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c37d #define TU102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c57d +#define GA102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c67d #define NV50_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000507e #define G82_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000827e @@ -145,6 +149,7 @@ #define GV100_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c37e #define TU102_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c57e +#define GA102_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c67e #define NV50_TESLA 0x00005097 #define G82_TESLA 0x00008297 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h index 5a96c942d912f..0f6fa6631a197 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h @@ -37,4 +37,5 @@ int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int tu102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); +int ga102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); #endif diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c index 8d0d30e08f57e..529cb60d5efb0 100644 --- a/drivers/gpu/drm/nouveau/nvif/disp.c +++ b/drivers/gpu/drm/nouveau/nvif/disp.c @@ -35,6 +35,7 @@ nvif_disp_ctor(struct nvif_device *device, const char *name, s32 oclass, struct nvif_disp *disp) { static const struct nvif_mclass disps[] = { + { GA102_DISP, -1 }, { TU102_DISP, -1 }, { GV100_DISP, -1 }, { GP102_DISP, -1 }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index bea3daf085700..cdcc851e06f9b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2684,6 +2684,7 @@ nv172_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .disp = ga102_disp_new, .dma = gv100_dma_new, }; @@ -2702,6 +2703,7 @@ nv174_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .disp = ga102_disp_new, .dma = gv100_dma_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index cf075311cdd27..b03f043efe261 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -17,6 +17,7 @@ nvkm-y += nvkm/engine/disp/gp100.o nvkm-y += nvkm/engine/disp/gp102.o nvkm-y += nvkm/engine/disp/gv100.o nvkm-y += nvkm/engine/disp/tu102.o +nvkm-y += nvkm/engine/disp/ga102.o nvkm-y += nvkm/engine/disp/vga.o nvkm-y += nvkm/engine/disp/head.o @@ -42,6 +43,7 @@ nvkm-y += nvkm/engine/disp/sorgm200.o nvkm-y += nvkm/engine/disp/sorgp100.o nvkm-y += nvkm/engine/disp/sorgv100.o nvkm-y += nvkm/engine/disp/sortu102.o +nvkm-y += nvkm/engine/disp/sorga102.o nvkm-y += nvkm/engine/disp/outp.o nvkm-y += nvkm/engine/disp/dp.o @@ -75,6 +77,7 @@ nvkm-y += nvkm/engine/disp/rootgp100.o nvkm-y += nvkm/engine/disp/rootgp102.o nvkm-y += nvkm/engine/disp/rootgv100.o nvkm-y += nvkm/engine/disp/roottu102.o +nvkm-y += nvkm/engine/disp/rootga102.o nvkm-y += nvkm/engine/disp/capsgv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 3800aeb507d01..55fbfe28c6dc1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -33,6 +33,12 @@ #include +/* IED scripts are no longer used by UEFI/RM from Ampere, but have been updated for + * the x86 option ROM. However, the relevant VBIOS table versions weren't modified, + * so we're unable to detect this in a nice way. + */ +#define AMPERE_IED_HACK(disp) ((disp)->engine.subdev.device->card_type >= GA100) + struct lt_state { struct nvkm_dp *dp; u8 stat[6]; @@ -238,6 +244,19 @@ nvkm_dp_train_links(struct nvkm_dp *dp) dp->dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED; lt.pc2 = dp->dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED; + if (AMPERE_IED_HACK(disp) && (lnkcmp = lt.dp->info.script[0])) { + /* Execute BeforeLinkTraining script from DP Info table. */ + while (ior->dp.bw < nvbios_rd08(bios, lnkcmp)) + lnkcmp += 3; + lnkcmp = nvbios_rd16(bios, lnkcmp + 1); + + nvbios_init(&dp->outp.disp->engine.subdev, lnkcmp, + init.outp = &dp->outp.info; + init.or = ior->id; + init.link = ior->asy.link; + ); + } + /* Set desired link configuration on the source. */ if ((lnkcmp = lt.dp->info.lnkcmp)) { if (dp->version < 0x30) { @@ -316,12 +335,14 @@ nvkm_dp_train_init(struct nvkm_dp *dp) ); } - /* Execute BeforeLinkTraining script from DP Info table. */ - nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0], - init.outp = &dp->outp.info; - init.or = dp->outp.ior->id; - init.link = dp->outp.ior->asy.link; - ); + if (!AMPERE_IED_HACK(dp->outp.disp)) { + /* Execute BeforeLinkTraining script from DP Info table. */ + nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0], + init.outp = &dp->outp.info; + init.or = dp->outp.ior->id; + init.link = dp->outp.ior->asy.link; + ); + } } static const struct dp_rates { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c new file mode 100644 index 0000000000000..aa2e5645fe365 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c @@ -0,0 +1,46 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" +#include "head.h" +#include "ior.h" +#include "channv50.h" +#include "rootnv50.h" + +static const struct nv50_disp_func +ga102_disp = { + .init = tu102_disp_init, + .fini = gv100_disp_fini, + .intr = gv100_disp_intr, + .uevent = &gv100_disp_chan_uevent, + .super = gv100_disp_super, + .root = &ga102_disp_root_oclass, + .wndw = { .cnt = gv100_disp_wndw_cnt }, + .head = { .cnt = gv100_head_cnt, .new = gv100_head_new }, + .sor = { .cnt = gv100_sor_cnt, .new = ga102_sor_new }, + .ramht_size = 0x2000, +}; + +int +ga102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) +{ + return nv50_disp_new_(&ga102_disp, device, index, pdisp); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index 09f3038eff26f..9f0bb7c6b0100 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -150,6 +150,8 @@ void gv100_sor_dp_audio(struct nvkm_ior *, int, bool); void gv100_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32); void gv100_sor_dp_watermark(struct nvkm_ior *, int, u8); +void tu102_sor_dp_vcpi(struct nvkm_ior *, int, u8, u8, u16, u16); + void g84_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gt215_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gf119_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); @@ -207,4 +209,6 @@ int gv100_sor_cnt(struct nvkm_disp *, unsigned long *); int gv100_sor_new(struct nvkm_disp *, int); int tu102_sor_new(struct nvkm_disp *, int); + +int ga102_sor_new(struct nvkm_disp *, int); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h index a677161c7f3a6..db31b37752a27 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h @@ -86,6 +86,8 @@ void gv100_disp_intr(struct nv50_disp *); void gv100_disp_super(struct work_struct *); int gv100_disp_wndw_cnt(struct nvkm_disp *, unsigned long *); +int tu102_disp_init(struct nv50_disp *); + void nv50_disp_dptmds_war_2(struct nv50_disp *, struct dcb_output *); void nv50_disp_dptmds_war_3(struct nv50_disp *, struct dcb_output *); void nv50_disp_update_sppll1(struct nv50_disp *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c new file mode 100644 index 0000000000000..9af07c3cf9fc0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c @@ -0,0 +1,52 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "rootnv50.h" +#include "channv50.h" + +#include + +static const struct nv50_disp_root_func +ga102_disp_root = { + .user = { + {{-1,-1,GV100_DISP_CAPS }, gv100_disp_caps_new }, + {{0,0,GA102_DISP_CURSOR }, gv100_disp_curs_new }, + {{0,0,GA102_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new }, + {{0,0,GA102_DISP_CORE_CHANNEL_DMA }, gv100_disp_core_new }, + {{0,0,GA102_DISP_WINDOW_CHANNEL_DMA }, gv100_disp_wndw_new }, + {} + }, +}; + +static int +ga102_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass, + void *data, u32 size, struct nvkm_object **pobject) +{ + return nv50_disp_root_new_(&ga102_disp_root, disp, oclass, data, size, pobject); +} + +const struct nvkm_disp_oclass +ga102_disp_root_oclass = { + .base.oclass = GA102_DISP, + .base.minver = -1, + .base.maxver = -1, + .ctor = ga102_disp_root_new, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h index 7070f5408d92b..27bb170d02930 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h @@ -41,4 +41,5 @@ extern const struct nvkm_disp_oclass gp100_disp_root_oclass; extern const struct nvkm_disp_oclass gp102_disp_root_oclass; extern const struct nvkm_disp_oclass gv100_disp_root_oclass; extern const struct nvkm_disp_oclass tu102_disp_root_oclass; +extern const struct nvkm_disp_oclass ga102_disp_root_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c new file mode 100644 index 0000000000000..033827de91162 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c @@ -0,0 +1,140 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ior.h" + +#include + +static int +ga102_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + const u32 soff = nv50_ior_base(sor); + const u32 loff = nv50_sor_link(sor); + u32 dpctrl = 0x00000000; + u32 clksor = 0x00000000; + + switch (sor->dp.bw) { + case 0x06: clksor |= 0x00000000; break; + case 0x0a: clksor |= 0x00040000; break; + case 0x14: clksor |= 0x00080000; break; + case 0x1e: clksor |= 0x000c0000; break; + default: + WARN_ON(1); + return -EINVAL; + } + + dpctrl |= ((1 << sor->dp.nr) - 1) << 16; + if (sor->dp.mst) + dpctrl |= 0x40000000; + if (sor->dp.ef) + dpctrl |= 0x00004000; + + nvkm_mask(device, 0x612300 + soff, 0x007c0000, clksor); + + /*XXX*/ + nvkm_msec(device, 40, NVKM_DELAY); + nvkm_mask(device, 0x612300 + soff, 0x00030000, 0x00010000); + nvkm_mask(device, 0x61c10c + loff, 0x00000003, 0x00000001); + + nvkm_mask(device, 0x61c10c + loff, 0x401f4000, dpctrl); + return 0; +} + +static void +ga102_sor_clock(struct nvkm_ior *sor) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + u32 div2 = 0; + if (sor->asy.proto == TMDS) { + if (sor->tmds.high_speed) + div2 = 1; + } + nvkm_wr32(device, 0x00ec08 + (sor->id * 0x10), 0x00000000); + nvkm_wr32(device, 0x00ec04 + (sor->id * 0x10), div2); +} + +static const struct nvkm_ior_func +ga102_sor_hda = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = ga102_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = ga102_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = tu102_sor_dp_vcpi, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, + .hda = { + .hpd = gf119_hda_hpd, + .eld = gf119_hda_eld, + .device_entry = gv100_hda_device_entry, + }, +}; + +static const struct nvkm_ior_func +ga102_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = ga102_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = ga102_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = tu102_sor_dp_vcpi, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, +}; + +int +ga102_sor_new(struct nvkm_disp *disp, int id) +{ + struct nvkm_device *device = disp->engine.subdev.device; + u32 hda = nvkm_rd32(device, 0x08a15c); + if (hda & BIT(id)) + return nvkm_ior_new_(&ga102_sor_hda, disp, SOR, id); + return nvkm_ior_new_(&ga102_sor, disp, SOR, id); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c index 59865a934c4b9..0cf9e8752d258 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c @@ -23,7 +23,7 @@ #include -static void +void tu102_sor_dp_vcpi(struct nvkm_ior *sor, int head, u8 slot, u8 slot_nr, u16 pbn, u16 aligned) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c index 883ae4151ff88..4c85d1d4fbd42 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c @@ -28,7 +28,7 @@ #include #include -static int +int tu102_disp_init(struct nv50_disp *disp) { struct nvkm_device *device = disp->base.engine.subdev.device; -- GitLab From dec822771b0174a01e72d7641d08e44461b6a82f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 14 Jan 2021 10:46:57 +0800 Subject: [PATCH 1830/1894] riscv: stacktrace: Move register keyword to beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using global sp_in_global directly to fix the following warning, arch/riscv/kernel/stacktrace.c:31:3: warning: ‘register’ is not at beginning of declaration [-Wold-style-declaration] 31 | const register unsigned long current_sp = sp_in_global; | ^~~~~ Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 48b870a685b30..df5d2da7c40be 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -14,7 +14,7 @@ #include -register unsigned long sp_in_global __asm__("sp"); +register const unsigned long sp_in_global __asm__("sp"); #ifdef CONFIG_FRAME_POINTER @@ -28,9 +28,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp = sp_in_global; fp = (unsigned long)__builtin_frame_address(0); - sp = current_sp; + sp = sp_in_global; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ -- GitLab From dca5244d2f5b94f1809f0c02a549edf41ccd5493 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:48:32 +0000 Subject: [PATCH 1831/1894] compiler.h: Raise minimum version of GCC to 5.1 for arm64 GCC versions >= 4.9 and < 5.1 have been shown to emit memory references beyond the stack pointer, resulting in memory corruption if an interrupt is taken after the stack pointer has been adjusted but before the reference has been executed. This leads to subtle, infrequent data corruption such as the EXT4 problems reported by Russell King at the link below. Life is too short for buggy compilers, so raise the minimum GCC version required by arm64 to 5.1. Reported-by: Russell King Suggested-by: Arnd Bergmann Signed-off-by: Will Deacon Tested-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Linus Torvalds Cc: Cc: Theodore Ts'o Cc: Florian Weimer Cc: Peter Zijlstra Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20210105154726.GD1551@shell.armlinux.org.uk Link: https://lore.kernel.org/r/20210112224832.10980-1-will@kernel.org Signed-off-by: Catalin Marinas --- include/linux/compiler-gcc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 74c6c0486eed7..555ab0fddbef7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -13,6 +13,12 @@ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ #if GCC_VERSION < 40900 # error Sorry, your version of GCC is too old - please use 4.9 or newer. +#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100 +/* + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 + * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk + */ +# error Sorry, your version of GCC is too old - please use 5.1 or newer. #endif /* -- GitLab From b6d8878d24e39f213df0f3ea7abebd15edc7be21 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 14 Jan 2021 12:48:12 +0000 Subject: [PATCH 1832/1894] arm64: syscall: include prototype for EL0 SVC functions The kbuild test robot reports that when building with W=1, GCC will warn for a couple of missing prototypes in syscall.c: | arch/arm64/kernel/syscall.c:157:6: warning: no previous prototype for 'do_el0_svc' [-Wmissing-prototypes] | 157 | void do_el0_svc(struct pt_regs *regs) | | ^~~~~~~~~~ | arch/arm64/kernel/syscall.c:164:6: warning: no previous prototype for 'do_el0_svc_compat' [-Wmissing-prototypes] | 164 | void do_el0_svc_compat(struct pt_regs *regs) | | ^~~~~~~~~~~~~~~~~ While this isn't a functional problem, as a general policy we should include the prototype for functions wherever possible to catch any accidental divergence between the prototype and implementation. Here we can easily include , so let's do so. While there are a number of warnings elsewhere and some warnings enabled under W=1 are of questionable benefit, this change helps to make the code more robust as it evolved and reduces the noise somewhat, so it seems worthwhile. Signed-off-by: Mark Rutland Reported-by: kernel test robot Cc: Will Deacon Link: https://lore.kernel.org/r/202101141046.n8iPO3mw-lkp@intel.com Link: https://lore.kernel.org/r/20210114124812.17754-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/syscall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 0bfac95fe4643..c2877c332f2dc 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include -- GitLab From 3a57a643a851dbb1c4a1819394ca009e3bfa4813 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Jan 2021 18:31:44 +0000 Subject: [PATCH 1833/1894] arm64: selftests: Fix spelling of 'Mismatch' The SVE and FPSIMD stress tests have a spelling mistake in the output, fix it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210108183144.673-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fpsimd-test.S | 2 +- tools/testing/selftests/arm64/fp/sve-test.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 1c5556bdd11d1..0dbd594c2747c 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -457,7 +457,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f95074c9b48b7..9210691aa9985 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -625,7 +625,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" -- GitLab From f010505b78a4fa8d5b6480752566e7313fb5ca6e Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Fri, 15 Jan 2021 11:54:40 -0500 Subject: [PATCH 1834/1894] io_uring: flush timeouts that should already have expired Right now io_flush_timeouts() checks if the current number of events is equal to ->timeout.target_seq, but this will miss some timeouts if there have been more than 1 event added since the last time they were flushed (possible in io_submit_flush_completions(), for example). Fix it by recording the last sequence at which timeouts were flushed so that the number of events seen can be compared to the number of events needed without overflow. Signed-off-by: Marcelo Diop-Gonzalez Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 372be9caf3402..06cc79d39586d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -354,6 +354,7 @@ struct io_ring_ctx { unsigned cq_entries; unsigned cq_mask; atomic_t cq_timeouts; + unsigned cq_last_tm_flush; unsigned long cq_check_overflow; struct wait_queue_head cq_wait; struct fasync_struct *cq_fasync; @@ -1639,19 +1640,38 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) static void io_flush_timeouts(struct io_ring_ctx *ctx) { - while (!list_empty(&ctx->timeout_list)) { + u32 seq; + + if (list_empty(&ctx->timeout_list)) + return; + + seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + + do { + u32 events_needed, events_got; struct io_kiocb *req = list_first_entry(&ctx->timeout_list, struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; - if (req->timeout.target_seq != ctx->cached_cq_tail - - atomic_read(&ctx->cq_timeouts)) + + /* + * Since seq can easily wrap around over time, subtract + * the last seq at which timeouts were flushed before comparing. + * Assuming not more than 2^31-1 events have happened since, + * these subtractions won't have wrapped, so we can check if + * target is in [last_seq, current_seq] by comparing the two. + */ + events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; + events_got = seq - ctx->cq_last_tm_flush; + if (events_got < events_needed) break; list_del_init(&req->timeout.list); io_kill_timeout(req); - } + } while (!list_empty(&ctx->timeout_list)); + + ctx->cq_last_tm_flush = seq; } static void io_commit_cqring(struct io_ring_ctx *ctx) @@ -5837,6 +5857,12 @@ static int io_timeout(struct io_kiocb *req) tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); req->timeout.target_seq = tail + off; + /* Update the last seq here in case io_flush_timeouts() hasn't. + * This is safe because ->completion_lock is held, and submissions + * and completions are never mixed in the same ->completion_lock section. + */ + ctx->cq_last_tm_flush = tail; + /* * Insertion sort, ensuring the first entry in the list is always * the one we need first. -- GitLab From 301f0203e04293c13372c032198665bd75adf81b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Dec 2020 15:41:19 -0300 Subject: [PATCH 1835/1894] perf bpf examples: Fix bpf.h header include directive in 5sec.c example It was looking at bpf/bpf.h, which caused this problem: # perf trace -e tools/perf/examples/bpf/5sec.c /home/acme/git/perf/tools/perf/examples/bpf/5sec.c:42:10: fatal error: 'bpf/bpf.h' file not found #include ^~~~~~~~~~~ 1 error generated. ERROR: unable to compile tools/perf/examples/bpf/5sec.c Hint: Check error message shown above. Hint: You can also pre-compile it into .o using: clang -target bpf -O2 -c tools/perf/examples/bpf/5sec.c with proper -I and -D options. event syntax error: 'tools/perf/examples/bpf/5sec.c' \___ Failed to load tools/perf/examples/bpf/5sec.c from source: Error when compiling BPF scriptlet # Change that to plain bpf.h, to make it work again: # perf trace -e tools/perf/examples/bpf/5sec.c sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) # perf trace -e tools/perf/examples/bpf/5sec.c/max-stack=16/ sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) hrtimer_nanosleep ([kernel.kallsyms]) common_nsleep ([kernel.kallsyms]) __x64_sys_clock_nanosleep ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __clock_nanosleep_2 (/usr/lib64/libc-2.32.so) # perf trace -e tools/perf/examples/bpf/5sec.c sleep 4s # Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/5sec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index 65c4ff6892d96..e6b6181c6dc63 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,7 +39,7 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo */ -#include +#include #define NSEC_PER_SEC 1000000000L -- GitLab From 38c53947a7dcb6d295769830c9085b0409921ec9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:26:08 -0300 Subject: [PATCH 1836/1894] tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: 647daca25d24fb6e ("KVM: SVM: Add support for booting APs in an SEV-ES guest") That don't cause any tooling change, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 886802b8ffba3..374c67875cdbd 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; -- GitLab From addbdff24293ef772a1b8e5d127b570e70f08cdc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:35:27 -0300 Subject: [PATCH 1837/1894] tools headers: Syncronize linux/build_bug.h with the kernel sources To pick up the changes in: 3a176b94609a18f5 ("Revert "kbuild: avoid static_assert for genksyms"") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/build_bug.h' differs from latest version at 'include/linux/build_bug.h' diff -u tools/include/linux/build_bug.h include/linux/build_bug.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/build_bug.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ce365d2127682..cc7070c7439ba 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,9 +79,4 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ -- GitLab From a042a82ddbb3434f523c0671f5301d1fe796b4eb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 14 Jan 2021 14:06:09 +0900 Subject: [PATCH 1838/1894] perf test: Fix shadow stat test for non-bash shells It was using some bash-specific features and failed to parse when running with a different shell like below: root@kbl-ppc:~/kbl-ws/perf-dev/lck-9077/acme.tmp/tools/perf# ./perf test 83 -vv 83: perf stat metrics (shadow stat) test : --- start --- test child forked, pid 3922 ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 45: ./tests/shell/stat+shadow_stat.sh: declare: not found test child finished with -1 ---- end ---- perf stat metrics (shadow stat) test: FAILED! Reported-by: Jin Yao Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Laight Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114050609.1258820-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index 249dfe48cf6ad..ebebd3596cf99 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2 test_global_aggr() { - local cyc - perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ grep -e cycles -e instructions | \ while read num evt hash ipc rest do # skip not counted events - if [[ $num == "&1 | \ grep ^CPU | \ while read cpu num evt hash ipc rest do # skip not counted events - if [[ $num == " Date: Wed, 30 Dec 2020 11:38:27 +0800 Subject: [PATCH 1839/1894] ext4: use IS_ERR instead of IS_ERR_OR_NULL and set inode null when IS_ERR 1: ext4_iget/ext4_find_extent never returns NULL, use IS_ERR instead of IS_ERR_OR_NULL to fix this. 2: ext4_fc_replay_inode should set the inode to NULL when IS_ERR. and go to call iput properly. Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Signed-off-by: Yi Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201230033827.3996064-1-yili@winhong.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 4fcc21c25e793..6b5489273c859 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1318,14 +1318,14 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl) entry.len = darg.dname_len; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", darg.ino); return 0; } old_parent = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(old_parent)) { + if (IS_ERR(old_parent)) { jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); iput(inode); return 0; @@ -1410,7 +1410,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl) darg.parent_ino, darg.dname_len); inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1466,10 +1466,11 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) trace_ext4_fc_replay(sb, tag, ino, 0, 0); inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (!IS_ERR_OR_NULL(inode)) { + if (!IS_ERR(inode)) { ext4_ext_clear_bb(inode); iput(inode); } + inode = NULL; ext4_fc_record_modified_inode(sb, ino); @@ -1512,7 +1513,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return -EFSCORRUPTED; } @@ -1564,7 +1565,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) goto out; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "inode %d not found.", darg.ino); inode = NULL; ret = -EINVAL; @@ -1577,7 +1578,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) * dot and dot dot dirents are setup properly. */ dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(dir)) { + if (IS_ERR(dir)) { jbd_debug(1, "Dir %d not found.", darg.ino); goto out; } @@ -1653,7 +1654,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1777,7 +1778,7 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) le32_to_cpu(lrange->fc_ino), cur, remaining); inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino)); return 0; } @@ -1832,7 +1833,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) for (i = 0; i < state->fc_modified_inodes_used; i++) { inode = ext4_iget(sb, state->fc_modified_inodes[i], EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found.", state->fc_modified_inodes[i]); continue; @@ -1849,7 +1850,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) if (ret > 0) { path = ext4_find_extent(inode, map.m_lblk, NULL, 0); - if (!IS_ERR_OR_NULL(path)) { + if (!IS_ERR(path)) { for (j = 0; j < path->p_depth; j++) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 1); -- GitLab From 31e203e09f036f48e7c567c2d32df0196bbd303f Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Wed, 30 Dec 2020 18:48:51 +0900 Subject: [PATCH 1840/1894] ext4: fix wrong list_splice in ext4_fc_cleanup After full/fast commit, entries in staging queue are promoted to main queue. In ext4_fs_cleanup function, it splice to staging queue to staging queue. Fixes: aa75f4d3daaeb ("ext4: main fast-commit commit path") Signed-off-by: Daejun Park Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201230094851epcms2p6eeead8cc984379b37b2efd21af90fd1a@epcms2p6 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6b5489273c859..ec5316fbcd1d4 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1268,7 +1268,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], &sbi->s_fc_dentry_q[FC_Q_MAIN]); list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], - &sbi->s_fc_q[FC_Q_STAGING]); + &sbi->s_fc_q[FC_Q_MAIN]); ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); -- GitLab From 6b4b8e6b4ad8553660421d6360678b3811d5deb9 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Tue, 5 Jan 2021 14:28:57 +0800 Subject: [PATCH 1841/1894] ext4: fix bug for rename with RENAME_WHITEOUT We got a "deleted inode referenced" warning cross our fsstress test. The bug can be reproduced easily with following steps: cd /dev/shm mkdir test/ fallocate -l 128M img mkfs.ext4 -b 1024 img mount img test/ dd if=/dev/zero of=test/foo bs=1M count=128 mkdir test/dir/ && cd test/dir/ for ((i=0;i<1000;i++)); do touch file$i; done # consume all block cd ~ && renameat2(AT_FDCWD, /dev/shm/test/dir/file1, AT_FDCWD, /dev/shm/test/dir/dst_file, RENAME_WHITEOUT) # ext4_add_entry in ext4_rename will return ENOSPC!! cd /dev/shm/ && umount test/ && mount img test/ && ls -li test/dir/file1 We will get the output: "ls: cannot access 'test/dir/file1': Structure needs cleaning" and the dmesg show: "EXT4-fs error (device loop0): ext4_lookup:1626: inode #2049: comm ls: deleted inode referenced: 139" ext4_rename will create a special inode for whiteout and use this 'ino' to replace the source file's dir entry 'ino'. Once error happens latter(the error above was the ENOSPC return from ext4_add_entry in ext4_rename since all space has been consumed), the cleanup do drop the nlink for whiteout, but forget to restore 'ino' with source file. This will trigger the bug describle as above. Signed-off-by: yangerkun Reviewed-by: Jan Kara Cc: stable@vger.kernel.org Fixes: cd808deced43 ("ext4: support RENAME_WHITEOUT") Link: https://lore.kernel.org/r/20210105062857.3566-1-yangerkun@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a3b28ef2455a8..fa625a247e9a3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3601,9 +3601,6 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, return retval2; } } - brelse(ent->bh); - ent->bh = NULL; - return retval; } @@ -3802,6 +3799,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } } + old_file_type = old.de->file_type; if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) ext4_handle_sync(handle); @@ -3829,7 +3827,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, force_reread = (new.dir->i_ino == old.dir->i_ino && ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); - old_file_type = old.de->file_type; if (whiteout) { /* * Do this before adding a new entry, so the old entry is sure @@ -3927,15 +3924,19 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, retval = 0; end_rename: - brelse(old.dir_bh); - brelse(old.bh); - brelse(new.bh); if (whiteout) { - if (retval) + if (retval) { + ext4_setent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); + } unlock_new_inode(whiteout); iput(whiteout); + } + brelse(old.dir_bh); + brelse(old.bh); + brelse(new.bh); if (handle) ext4_journal_stop(handle); return retval; -- GitLab From e9f53353e166a67dfe4f8295100f8ac39d6cf10b Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Wed, 6 Jan 2021 10:32:42 +0900 Subject: [PATCH 1842/1894] ext4: remove expensive flush on fast commit In the fast commit, it adds REQ_FUA and REQ_PREFLUSH on each fast commit block when barrier is enabled. However, in recovery phase, ext4 compares CRC value in the tail. So it is sufficient to add REQ_FUA and REQ_PREFLUSH on the block that has tail. Signed-off-by: Daejun Park Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20210106013242epcms2p5b6b4ed8ca86f29456fdf56aa580e74b4@epcms2p5 Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index ec5316fbcd1d4..0a14a7c87bf82 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -604,13 +604,13 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star trace_ext4_fc_track_range(inode, start, end, ret); } -static void ext4_fc_submit_bh(struct super_block *sb) +static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) { int write_flags = REQ_SYNC; struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; - /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */ - if (test_opt(sb, BARRIER)) + /* Add REQ_FUA | REQ_PREFLUSH only its tail */ + if (test_opt(sb, BARRIER) && is_tail) write_flags |= REQ_FUA | REQ_PREFLUSH; lock_buffer(bh); set_buffer_dirty(bh); @@ -684,7 +684,7 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); if (pad_len > 0) ext4_fc_memzero(sb, tl + 1, pad_len, crc); - ext4_fc_submit_bh(sb); + ext4_fc_submit_bh(sb, false); ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); if (ret) @@ -741,7 +741,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) tail.fc_crc = cpu_to_le32(crc); ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); - ext4_fc_submit_bh(sb); + ext4_fc_submit_bh(sb, true); return 0; } -- GitLab From be82fddca81eefd1edbd9b290dfcb2177e24785b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 13:23:04 -0800 Subject: [PATCH 1843/1894] libperf tests: Avoid uninitialized variable warning The variable 'bf' is read (for a write call) without being initialized triggering a memory sanitizer warning. Use 'bf' in the read and switch the write to reading from a string. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114212304.4018119-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 6d8ebe0c25042..1b225fe34a722 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -208,7 +208,6 @@ static int test_mmap_thread(void) char path[PATH_MAX]; int id, err, pid, go_pipe[2]; union perf_event *event; - char bf; int count = 0; snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", @@ -229,6 +228,7 @@ static int test_mmap_thread(void) pid = fork(); if (!pid) { int i; + char bf; read(go_pipe[0], &bf, 1); @@ -266,7 +266,7 @@ static int test_mmap_thread(void) perf_evlist__enable(evlist); /* kick the child and wait for it to finish */ - write(go_pipe[1], &bf, 1); + write(go_pipe[1], "A", 1); waitpid(pid, NULL, 0); /* -- GitLab From bba2ea17ef553aea0df80cb64399fe2f70f225dd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:49 -0800 Subject: [PATCH 1844/1894] libperf tests: If a test fails return non-zero If a test fails return -1 rather than 0. This is consistent with the return value in test-cpumap.c Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-cpumap.c | 2 +- tools/lib/perf/tests/test-evlist.c | 2 +- tools/lib/perf/tests/test-evsel.c | 2 +- tools/lib/perf/tests/test-threadmap.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c8d45091e7c26..c70e9e03af3e9 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_cpu_map__put(cpus); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 1b225fe34a722..220a95be47c34 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -409,5 +409,5 @@ int main(int argc, char **argv) test_mmap_cpus(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 135722ac965bf..0ad82d7a2a51b 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -131,5 +131,5 @@ int main(int argc, char **argv) test_stat_thread_enable(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 7dc4d6fbeddee..384471441b484 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_thread_map__put(threads); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } -- GitLab From 66dd86b2a2bee129c70f7ff054d3a6a2e5f8eb20 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:50 -0800 Subject: [PATCH 1845/1894] libperf tests: Fail when failing to get a tracepoint id Permissions are necessary to get a tracepoint id. Fail the test when the read fails. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 220a95be47c34..e2ac0b7f432ea 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -214,6 +214,7 @@ static int test_mmap_thread(void) sysfs__mountpoint()); if (filename__read_int(path, &id)) { + tests_failed++; fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); return -1; } -- GitLab From 3ff1e7180abc7f6db413933c110df69157216715 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:38 +0900 Subject: [PATCH 1846/1894] perf stat: Introduce struct runtime_stat_data To pass more info to the saved_value in the runtime_stat, add a new struct runtime_stat_data. Currently it only has 'ctx' field but later patch will add more. Note that we intentionally pass 0 as ctx to clock-related events for compatibility. It was already there in a few places. So move the code into the saved_value_lookup() explicitly and add a comment. Suggested-by: Andi Kleen Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 346 +++++++++++++++++----------------- 1 file changed, 173 insertions(+), 173 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 901265127e369..a1565b6e38f27 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -114,6 +114,10 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, rblist = &st->value_list; + /* don't use context info for clock events */ + if (type == STAT_NSECS) + dm.ctx = 0; + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); @@ -191,12 +195,17 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) reset_stat(st); } +struct runtime_stat_data { + int ctx; +}; + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int ctx, int cpu, u64 count) + int cpu, u64 count, + struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, - type, ctx, st); + struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + rsd->ctx, st); if (v) update_stats(&v->stats, count); @@ -210,73 +219,75 @@ static void update_runtime_stat(struct runtime_stat *st, void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, int cpu, struct runtime_stat *st) { - int ctx = evsel_context(counter); u64 count_ns = count; struct saved_value *v; + struct runtime_stat_data rsd = { + .ctx = evsel_context(counter), + }; count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); + update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); + update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); + update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); + update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); + update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); + update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, ctx, cpu, count); + update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); @@ -422,11 +433,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -434,11 +446,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -446,16 +459,15 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -470,16 +482,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -490,17 +501,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -511,18 +520,15 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -533,18 +539,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -554,17 +557,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -574,17 +575,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -594,17 +593,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -662,56 +659,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) +static double td_total_slots(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); } -static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) +static double td_bad_spec(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); - total_slots = td_total_slots(ctx, cpu, st); + total_slots = td_total_slots(cpu, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu, struct runtime_stat *st) +static double td_retiring(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu); + cpu, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_fe_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu); + cpu, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_be_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(ctx, cpu, st) + - td_bad_spec(ctx, cpu, st) + - td_retiring(ctx, cpu, st)); + double sum = (td_fe_bound(cpu, st, rsd) + + td_bad_spec(cpu, st, rsd) + + td_retiring(cpu, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -722,15 +724,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int ctx, int cpu, - enum stat_type type, - struct runtime_stat *stat) +static double td_metric_ratio(int cpu, enum stat_type type, + struct runtime_stat *stat, + struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu); - double d = runtime_stat_avg(stat, type, ctx, cpu); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); + double d = runtime_stat_avg(stat, type, cpu, rsd); if (sum) return d / sum; @@ -742,34 +744,33 @@ static double td_metric_ratio(int ctx, int cpu, * We allow two missing. */ -static bool full_td(int ctx, int cpu, - struct runtime_stat *stat) +static bool full_td(int cpu, struct runtime_stat *stat, + struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, - int cpu, struct evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, int cpu, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double smi_num, aperf, cycles, cost = 0.0; - int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); - aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); - cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -930,12 +931,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; - int ctx = evsel_context(evsel); + struct runtime_stat_data rsd = { + .ctx = evsel_context(evsel), + }; struct metric_event *me; int num = 1; if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) { ratio = avg / total; @@ -945,12 +948,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu)); + cpu, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -960,8 +962,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) + print_branch_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -970,8 +972,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) + print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -980,8 +982,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) + print_l1_icache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -990,8 +992,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) + print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1000,8 +1002,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) + print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1010,27 +1012,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) + print_ll_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) { ratio = avg / total; @@ -1039,7 +1041,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1049,8 +1051,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (total2 < avg) total2 = avg; @@ -1060,21 +1062,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; @@ -1087,28 +1087,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu, st); + double fe_bound = td_fe_bound(cpu, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu, st); + double retiring = td_retiring(cpu, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu, st); + double bad_spec = td_bad_spec(cpu, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu, st); + double be_bound = td_be_bound(cpu, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1121,43 +1121,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu, st) > 0) + if (td_total_slots(cpu, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(ctx, cpu, st)) { - double retiring = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_RETIRING, st); - + full_td(cpu, st, &rsd)) { + double retiring = td_metric_ratio(cpu, + STAT_TOPDOWN_RETIRING, st, + &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(ctx, cpu, st)) { - double fe_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_FE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_FE_BOUND, st, + &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(ctx, cpu, st)) { - double be_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double be_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_BE_BOUND, st, + &rsd); if (be_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(ctx, cpu, st)) { - double bad_spec = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BAD_SPEC, st); - + full_td(cpu, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu, + STAT_TOPDOWN_BAD_SPEC, st, + &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", @@ -1165,11 +1165,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { + } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { char unit = 'M'; char unit_buf[10]; - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) ratio = 1000.0 * avg / total; @@ -1180,7 +1180,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, evsel, out, st); + print_smi_cost(config, cpu, out, st, &rsd); } else { num = 0; } -- GitLab From a1bf23052bdfe30ec3c693cf32feb2d79114ac16 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:39 +0900 Subject: [PATCH 1847/1894] perf stat: Take cgroups into account for shadow stats As of now it doesn't consider cgroups when collecting shadow stats and metrics so counter values from different cgroups will be saved in a same slot. This resulted in incorrect numbers when those cgroups have different workloads. For example, let's look at the scenario below: cgroups A and C runs same workload which burns a cpu while cgroup B runs a light workload. $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 3,958,116,522 cycles A 6,722,650,929 instructions A # 2.53 insn per cycle 1,132,741 cycles B 571,743 instructions B # 0.00 insn per cycle 4,007,799,935 cycles C 6,793,181,523 instructions C # 2.56 insn per cycle 1.001050869 seconds time elapsed When I run 'perf stat' with single workload, it usually shows IPC around 1.7. We can verify it (6,722,650,929.0 / 3,958,116,522 = 1.698) for cgroup A. But in this case, since cgroups are ignored, cycles are averaged so it used the lower value for IPC calculation and resulted in around 2.5. avg cycle: (3958116522 + 1132741 + 4007799935) / 3 = 2655683066 IPC (A) : 6722650929 / 2655683066 = 2.531 IPC (B) : 571743 / 2655683066 = 0.0002 IPC (C) : 6793181523 / 2655683066 = 2.557 We can simply compare cgroup pointers in the evsel and it'll be NULL when cgroups are not specified. With this patch, I can see correct numbers like below: $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 4,171,051,687 cycles A 7,219,793,922 instructions A # 1.73 insn per cycle 1,051,189 cycles B 583,102 instructions B # 0.55 insn per cycle 4,171,124,710 cycles C 7,192,944,580 instructions C # 1.72 insn per cycle 1.007909814 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a1565b6e38f27..12eafd12a693c 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "expr.h" #include "metricgroup.h" +#include "cgroup.h" #include /* @@ -28,6 +29,7 @@ struct saved_value { enum stat_type type; int ctx; int cpu; + struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; u64 metric_total; @@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->ctx != b->ctx) return a->ctx - b->ctx; + if (a->cgrp != b->cgrp) + return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1; + if (a->evsel == NULL && b->evsel == NULL) { if (a->stat == b->stat) return 0; @@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, bool create, enum stat_type type, int ctx, - struct runtime_stat *st) + struct runtime_stat *st, + struct cgroup *cgrp) { struct rblist *rblist; struct rb_node *nd; @@ -110,6 +116,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, .type = type, .ctx = ctx, .stat = st, + .cgrp = cgrp, }; rblist = &st->value_list; @@ -197,6 +204,7 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) struct runtime_stat_data { int ctx; + struct cgroup *cgrp; }; static void update_runtime_stat(struct runtime_stat *st, @@ -205,7 +213,7 @@ static void update_runtime_stat(struct runtime_stat *st, struct runtime_stat_data *rsd) { struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, - rsd->ctx, st); + rsd->ctx, st, rsd->cgrp); if (v) update_stats(&v->stats, count); @@ -223,6 +231,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, struct saved_value *v; struct runtime_stat_data rsd = { .ctx = evsel_context(counter), + .cgrp = counter->cgrp, }; count *= counter->scale; @@ -290,13 +299,14 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st); + cpu, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -438,7 +448,7 @@ static double runtime_stat_avg(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -451,7 +461,7 @@ static double runtime_stat_n(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -805,7 +815,8 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, st); + STAT_NONE, 0, st, + metric_events[i]->cgrp); if (!v) break; stats = &v->stats; @@ -933,6 +944,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, const char *color = NULL; struct runtime_stat_data rsd = { .ctx = evsel_context(evsel), + .cgrp = evsel->cgrp, }; struct metric_event *me; int num = 1; -- GitLab From 5501e9229a80d95a1ea68609f44c447a75d23ed5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 7 Jan 2021 19:41:59 +0200 Subject: [PATCH 1848/1894] perf intel-pt: Fix 'CPU too large' error In some cases, the number of cpus (nr_cpus_online) is confused with the maximum cpu number (nr_cpus_avail), which results in the error in the example below: Example on system with 8 cpus: Before: # echo 0 > /sys/devices/system/cpu/cpu2/online # ./perf record --kcore -e intel_pt// taskset --cpu-list 7 uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.147 MB perf.data ] # ./perf script --itrace=e Requested CPU 7 too large. Consider raising MAX_NR_CPUS 0x25908 [0x8]: failed to process type: 68 [Invalid argument] After: # ./perf script --itrace=e # Fixes: 8c7274691f0d ("perf machine: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Fixes: 7df4e36a4785 ("perf session: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Signed-off-by: Adrian Hunter Tested-by: Kan Liang Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210107174159.24897-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 ++-- tools/perf/util/session.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f841f3503cae6..1e9d3f982b477 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2980,7 +2980,7 @@ int machines__for_each_thread(struct machines *machines, pid_t machine__get_current_tid(struct machine *machine, int cpu) { - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid) return -1; @@ -2992,7 +2992,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid) { struct thread *thread; - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0) return -EINVAL; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 50ff9795a4f11..25adbcce02814 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2404,7 +2404,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); for (i = 0; i < PERF_TYPE_MAX; ++i) { struct evsel *evsel; -- GitLab From 648b054a4647cd62e13ba79f398b8b97a7c82b19 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Tue, 24 Nov 2020 19:58:17 +0000 Subject: [PATCH 1849/1894] perf inject: Correct event attribute sizes When 'perf inject' reads a perf.data file from an older version of perf, it writes event attributes into the output with the original size field, but lays them out as if they had the size currently used. Readers see a corrupt file. Update the size field to match the layout. Signed-off-by: Al Grant Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201124195818.30603-1-al.grant@arm.com Signed-off-by: Denis Nikitin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 062383e225a3e..c4ed3dc2c8f40 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session, attr_offset = lseek(ff.fd, 0, SEEK_CUR); evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.size < sizeof(evsel->core.attr)) { + /* + * We are likely in "perf inject" and have read + * from an older file. Update attr size so that + * reader gets the right offset to the ids. + */ + evsel->core.attr.size = sizeof(evsel->core.attr); + } f_attr = (struct perf_file_attr){ .attr = evsel->core.attr, .ids = { -- GitLab From a8d13dbccb137c46fead2ec1a4f1fbc8cfc9ea91 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Jan 2021 16:04:23 -0700 Subject: [PATCH 1850/1894] io_uring: ensure finish_wait() is always called in __io_uring_task_cancel() If we enter with requests pending and performm cancelations, we'll have a different inflight count before and after calling prepare_to_wait(). This causes the loop to restart. If we actually ended up canceling everything, or everything completed in-between, then we'll break out of the loop without calling finish_wait() on the waitqueue. This can trigger a warning on exit_signals(), as we leave the task state in TASK_UNINTERRUPTIBLE. Put a finish_wait() after the loop to catch that case. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 06cc79d39586d..985a9e3f976d3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9101,6 +9101,7 @@ void __io_uring_task_cancel(void) finish_wait(&tctx->wait, &wait); } while (1); + finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); io_uring_remove_task_files(tctx); -- GitLab From a959a9782fa87669feeed095ced5d78181a7c02d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Jan 2021 18:19:26 +0100 Subject: [PATCH 1851/1894] iov_iter: fix the uaccess area in copy_compat_iovec_from_user sizeof needs to be called on the compat pointer, not the native one. Fixes: 89cd35c58bc2 ("iov_iter: transparently handle compat iovecs in import_iovec") Reported-by: David Laight Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1635111c5bd2a..a21e6a5792c5a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1658,7 +1658,7 @@ static int copy_compat_iovec_from_user(struct iovec *iov, (const struct compat_iovec __user *)uvec; int ret = -EFAULT, i; - if (!user_access_begin(uvec, nr_segs * sizeof(*uvec))) + if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; for (i = 0; i < nr_segs; i++) { -- GitLab From 797f0375dd2ef5cdc68ac23450cbae9a5c67a74e Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:01 -0800 Subject: [PATCH 1852/1894] RISC-V: Do not allocate memblock while iterating reserved memblocks Currently, resource tree allocates memory blocks while iterating on the list. It leads to following kernel warning because memblock allocation also invokes memory block reservation API. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 __insert_resource+0x8e/0xd0 [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-00022-ge20097fb37e2-dirty #549 [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff This is also unnecessary as we can pre-compute the total memblocks required for each memory region and allocate it before the loop. It save precious boot time not going through memblock allocation code every time. Fixes: 00ab027a3b82 ("RISC-V: Add kernel image sections to the resource tree") Reviewed-by: Anup Patel Tested-by: Geert Uytterhoeven Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 1d85e9bf783cf..3fa3f26dde856 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -127,7 +127,9 @@ static void __init init_resources(void) { struct memblock_region *region = NULL; struct resource *res = NULL; - int ret = 0; + struct resource *mem_res = NULL; + size_t mem_res_sz = 0; + int ret = 0, i = 0; code_res.start = __pa_symbol(_text); code_res.end = __pa_symbol(_etext) - 1; @@ -145,16 +147,17 @@ static void __init init_resources(void) bss_res.end = __pa_symbol(__bss_stop) - 1; bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res); + mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); + if (!mem_res) + panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); /* * Start by adding the reserved regions, if they overlap * with /memory regions, insert_resource later on will take * care of it. */ for_each_reserved_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; res->name = "Reserved"; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; @@ -171,8 +174,10 @@ static void __init init_resources(void) * Ignore any other reserved regions within * system memory. */ - if (memblock_is_memory(res->start)) + if (memblock_is_memory(res->start)) { + memblock_free((phys_addr_t) res, sizeof(struct resource)); continue; + } ret = add_resource(&iomem_resource, res); if (ret < 0) @@ -181,10 +186,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -205,9 +207,9 @@ static void __init init_resources(void) return; error: - memblock_free((phys_addr_t) res, sizeof(struct resource)); /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); + memblock_free((phys_addr_t) mem_res, mem_res_sz); } -- GitLab From abb8e86b269604e906a6a4af7a09f04b72dbb862 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:02 -0800 Subject: [PATCH 1853/1894] RISC-V: Set current memblock limit Currently, linux kernel can not use last 4k bytes of addressable space because IS_ERR_VALUE macro treats those as an error. This will be an issue for RV32 as any memblock allocator potentially allocate chunk of memory from the end of DRAM (2GB) leading bad address error even though the address was technically valid. Fix this issue by limiting the memblock if available memory spans the entire address space. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index bf5379135e39b..7cd4993f4ff21 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -157,9 +157,10 @@ disable: void __init setup_bootmem(void) { phys_addr_t mem_start = 0; - phys_addr_t start, end = 0; + phys_addr_t start, dram_end, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); + phys_addr_t max_mapped_addr = __pa(~(ulong)0); u64 i; /* Find the memory region containing the kernel */ @@ -181,7 +182,18 @@ void __init setup_bootmem(void) /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + dram_end = memblock_end_of_DRAM(); + + /* + * memblock allocator is not aware of the fact that last 4K bytes of + * the addressable memory can not be mapped because of IS_ERR_VALUE + * macro. Make sure that last 4k bytes are not usable by memblock + * if end of dram is equal to maximum addressable memory. + */ + if (max_mapped_addr == (dram_end - 1)) + memblock_set_current_limit(max_mapped_addr - 4096); + + max_pfn = PFN_DOWN(dram_end); max_low_pfn = max_pfn; dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn); -- GitLab From e557793799c5a8406afb08aa170509619f7eac36 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:04 -0800 Subject: [PATCH 1854/1894] RISC-V: Fix maximum allowed phsyical memory for RV32 Linux kernel can only map 1GB of address space for RV32 as the page offset is set to 0xC0000000. The current description in the Kconfig is confusing as it indicates that RV32 can support 2GB of physical memory. That is simply not true for current kernel. In future, a 2GB split support can be added to allow 2GB physical address space. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 81b76d44725d7..e9e2c1f0a6905 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -137,7 +137,7 @@ config PA_BITS config PAGE_OFFSET hex - default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB + default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB default 0x80000000 if 64BIT && !MMU default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB @@ -247,10 +247,12 @@ config MODULE_SECTIONS choice prompt "Maximum Physical Memory" - default MAXPHYSMEM_2GB if 32BIT + default MAXPHYSMEM_1GB if 32BIT default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY + config MAXPHYSMEM_1GB + bool "1GiB" config MAXPHYSMEM_2GB bool "2GiB" config MAXPHYSMEM_128GB -- GitLab From 29a951dfb3c3263c3a0f3bd9f7f2c2cfde4baedb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 8 Jan 2021 13:13:41 -0800 Subject: [PATCH 1855/1894] mm: fix clear_refs_write locking Turning page table entries read-only requires the mmap_sem held for writing. So stop doing the odd games with turning things from read locks to write locks and back. Just get the write lock. Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee5a235b30562..ab7d700b2caa4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1215,41 +1215,26 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .type = type, }; + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } if (type == CLEAR_REFS_MM_HIWATER_RSS) { - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - mmap_write_unlock(mm); - goto out_mm; + goto out_unlock; } - if (mmap_read_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; - mmap_read_unlock(mm); - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) { - vma->vm_flags &= ~VM_SOFTDIRTY; - vma_set_page_prot(vma); - } - mmap_write_downgrade(mm); - break; + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); } mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, @@ -1261,7 +1246,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, 0, -1); - mmap_read_unlock(mm); +out_unlock: + mmap_write_unlock(mm); out_mm: mmput(mm); } -- GitLab From 9348b73c2e1bfea74ccd4a44fb4ccc7276ab9623 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 Jan 2021 17:09:10 -0800 Subject: [PATCH 1856/1894] mm: don't play games with pinned pages in clear_page_refs Turning a pinned page read-only breaks the pinning after COW. Don't do it. The whole "track page soft dirty" state doesn't work with pinned pages anyway, since the page might be dirtied by the pinning entity without ever being noticed in the page tables. Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ab7d700b2caa4..602e3a52884d8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1035,6 +1035,25 @@ struct clear_refs_private { }; #ifdef CONFIG_MEM_SOFT_DIRTY + +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) + +static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +{ + struct page *page; + + if (!pte_write(pte)) + return false; + if (!is_cow_mapping(vma->vm_flags)) + return false; + if (likely(!atomic_read(&vma->vm_mm->has_pinned))) + return false; + page = vm_normal_page(vma, addr, pte); + if (!page) + return false; + return page_maybe_dma_pinned(page); +} + static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { @@ -1049,6 +1068,8 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, if (pte_present(ptent)) { pte_t old_pte; + if (pte_is_pinned(vma, addr, ptent)) + return; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); -- GitLab From d36a1dd9f77ae1e72da48f4123ed35627848507d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Jan 2021 14:43:46 -0500 Subject: [PATCH 1857/1894] dump_common_audit_data(): fix racy accesses to ->d_name We are not guaranteed the locking environment that would prevent dentry getting renamed right under us. And it's possible for old long name to be freed after rename, leading to UAF here. Cc: stable@kernel.org # v2.6.2+ Signed-off-by: Al Viro --- security/lsm_audit.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 7d8026f3f3772..a0cd28cd31a85 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -275,7 +275,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, struct inode *inode; audit_log_format(ab, " name="); + spin_lock(&a->u.dentry->d_lock); audit_log_untrustedstring(ab, a->u.dentry->d_name.name); + spin_unlock(&a->u.dentry->d_lock); inode = d_backing_inode(a->u.dentry); if (inode) { @@ -293,8 +295,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, dentry = d_find_alias(inode); if (dentry) { audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, - dentry->d_name.name); + spin_lock(&dentry->d_lock); + audit_log_untrustedstring(ab, dentry->d_name.name); + spin_unlock(&dentry->d_lock); dput(dentry); } audit_log_format(ab, " dev="); -- GitLab From feb889fb40fafc6933339cf1cca8f770126819fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Jan 2021 15:34:57 -0800 Subject: [PATCH 1858/1894] mm: don't put pinned pages into the swap cache So technically there is nothing wrong with adding a pinned page to the swap cache, but the pinning obviously means that the page can't actually be free'd right now anyway, so it's a bit pointless. However, the real problem is not with it being a bit pointless: the real issue is that after we've added it to the swap cache, we'll try to unmap the page. That will succeed, because the code in mm/rmap.c doesn't know or care about pinned pages. Even the unmapping isn't fatal per se, since the page will stay around in memory due to the pinning, and we do hold the connection to it using the swap cache. But when we then touch it next and take a page fault, the logic in do_swap_page() will map it back into the process as a possibly read-only page, and we'll then break the page association on the next COW fault. Honestly, this issue could have been fixed in any of those other places: (a) we could refuse to unmap a pinned page (which makes conceptual sense), or (b) we could make sure to re-map a pinned page writably in do_swap_page(), or (c) we could just make do_wp_page() not COW the pinned page (which was what we historically did before that "mm: do_wp_page() simplification" commit). But while all of them are equally valid models for breaking this chain, not putting pinned pages into the swap cache in the first place is the simplest one by far. It's also the safest one: the reason why do_wp_page() was changed in the first place was that getting the "can I re-use this page" wrong is so fraught with errors. If you do it wrong, you end up with an incorrectly shared page. As a result, using "page_maybe_dma_pinned()" in either do_wp_page() or do_swap_page() would be a serious bug since it is only a (very good) heuristic. Re-using the page requires a hard black-and-white rule with no room for ambiguity. In contrast, saying "this page is very likely dma pinned, so let's not add it to the swap cache and try to unmap it" is an obviously safe thing to do, and if the heuristic might very rarely be a false positive, no harm is done. Fixes: 09854ba94c6a ("mm: do_wp_page() simplification") Reported-and-tested-by: Martin Raiber Cc: Pavel Begunkov Cc: Jens Axboe Cc: Peter Xu Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 257cba79a96dd..b1b574ad199d2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1238,6 +1238,8 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; + if (page_maybe_dma_pinned(page)) + goto keep_locked; if (PageTransHuge(page)) { /* cannot split THP, skip it */ if (!can_split_huge_page(page, NULL)) -- GitLab From 19c329f6808995b142b3966301f217c831e7cf31 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Jan 2021 16:37:05 -0800 Subject: [PATCH 1859/1894] Linux 5.11-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9e73f82e0d863..b0e4767735dca 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 0eacdd16adc425e61926876f15b58008aa5c4784 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 8 Oct 2020 00:48:22 +0200 Subject: [PATCH 1860/1894] gfs2: Turn gfs2_rbm_incr into gfs2_rbm_add Change gfs2_rbm_incr to advance an rbm by a given number of blocks. Use that in gfs2_reservation_check_and_update to save a gfs2_rbm_to_block -> gfs2_rbm_from_block round trip. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 55 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5e8eef9990e32..47d171745f83c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -277,29 +277,38 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) } /** - * gfs2_rbm_incr - increment an rbm structure + * gfs2_rbm_add - add a number of blocks to an rbm * @rbm: The rbm with rgd already set correctly + * @blocks: The number of blocks to add to rpm * - * This function takes an existing rbm structure and increments it to the next - * viable block offset. - * - * Returns: If incrementing the offset would cause the rbm to go past the - * end of the rgrp, true is returned, otherwise false. + * This function takes an existing rbm structure and adds a number of blocks to + * it. * + * Returns: True if the new rbm would point past the end of the rgrp. */ -static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) +static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks) { - if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ - rbm->offset++; + struct gfs2_rgrpd *rgd = rbm->rgd; + struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii; + + if (rbm->offset + blocks < bi->bi_blocks) { + rbm->offset += blocks; return false; } - if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ - return true; + blocks -= bi->bi_blocks - rbm->offset; - rbm->offset = 0; - rbm->bii++; - return false; + for(;;) { + bi++; + if (bi == rgd->rd_bits + rgd->rd_length) + return true; + if (blocks < bi->bi_blocks) { + rbm->offset = blocks; + rbm->bii = bi - rgd->rd_bits; + return false; + } + blocks -= bi->bi_blocks; + } } /** @@ -323,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le (*len)--; if (*len == 0) return true; - if (gfs2_rbm_incr(rbm)) + if (gfs2_rbm_add(rbm, 1)) return true; } @@ -1639,7 +1648,6 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, u64 block = gfs2_rbm_to_block(rbm); u32 extlen = 1; u64 nblock; - int ret; /* * If we have a minimum extent length, then skip over any extent @@ -1664,12 +1672,15 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, maxext->len = extlen; maxext->rbm = *rbm; } -fail: - nblock = block + extlen; + } else { + u64 len = nblock - block; + if (len >= (u64)1 << 32) + return -E2BIG; + extlen = len; } - ret = gfs2_rbm_from_block(rbm, nblock); - if (ret < 0) - return ret; +fail: + if (gfs2_rbm_add(rbm, extlen)) + return -E2BIG; return 1; } @@ -2205,7 +2216,7 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd, bi_prev = bi; } gfs2_setbit(&rbm, false, new_state); - gfs2_rbm_incr(&rbm); + gfs2_rbm_add(&rbm, 1); } } -- GitLab From c65b76b893032a9fa69056656c771f5e2fc9ef0c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 11 Oct 2018 18:56:40 +0200 Subject: [PATCH 1861/1894] gfs2: Only use struct gfs2_rbm for bitmap manipulations GFS2 uses struct gfs2_rbm to represent a filesystem block number as a bit position within a resource group. This representation is used in the bitmap manipulation code to prevent excessive conversions between block numbers and bit positions, but also in struct gfs2_blkreserv which is part of struct gfs2_inode, to mark the start of a reservation. In the inode, the bit position representation makes less sense: first, the start position is used as a block number about as often as a bit position; second, the bit position representation makes the code unnecessarily complicated and difficult to read. Therefore, change struct gfs2_blkreserv to represent the start of a reservation as a block number instead of a bit position. (This requires keeping track of the resource group in gfs2_blkreserv separately.) With that change, various things can be slightly simplified, and struct gfs2_rbm can be moved to rgrp.c. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 2 +- fs/gfs2/incore.h | 30 +-------- fs/gfs2/rgrp.c | 154 +++++++++++++++++++++++++------------------ fs/gfs2/trace_gfs2.h | 10 +-- fs/gfs2/trans.h | 2 +- 5 files changed, 101 insertions(+), 97 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 62d9081d1e26e..e2ee3703f7df9 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1544,7 +1544,7 @@ more_rgrps: /* Must be done with the rgrp glock held: */ if (gfs2_rs_active(&ip->i_res) && - rgd == ip->i_res.rs_rbm.rgd) + rgd == ip->i_res.rs_rgd) gfs2_rs_deltree(&ip->i_res); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8e1ab8ed4abc7..cca806ff611ce 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -125,31 +125,6 @@ struct gfs2_rgrpd { struct rb_root rd_rstree; /* multi-block reservation tree */ }; -struct gfs2_rbm { - struct gfs2_rgrpd *rgd; - u32 offset; /* The offset is bitmap relative */ - int bii; /* Bitmap index */ -}; - -static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) -{ - return rbm->rgd->rd_bits + rbm->bii; -} - -static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) -{ - BUG_ON(rbm->offset >= rbm->rgd->rd_data); - return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + - rbm->offset; -} - -static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, - const struct gfs2_rbm *rbm2) -{ - return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) && - (rbm1->offset == rbm2->offset); -} - enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, @@ -313,8 +288,9 @@ struct gfs2_qadata { /* quota allocation data */ */ struct gfs2_blkreserv { - struct rb_node rs_node; /* link to other block reservations */ - struct gfs2_rbm rs_rbm; /* Start of reservation */ + struct rb_node rs_node; /* node within rd_rstree */ + struct gfs2_rgrpd *rs_rgd; + u64 rs_start; /* start of reservation */ u32 rs_free; /* how many blocks are still free */ }; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 47d171745f83c..391186382ceed 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -36,6 +36,24 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + u32 offset; /* The offset is bitmap relative */ + int bii; /* Bitmap index */ +}; + +static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_bits + rbm->bii; +} + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + BUG_ON(rbm->offset >= rbm->rgd->rd_data); + return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + + rbm->offset; +} + /* * These routines are used by the resource group routines (rgrp.c) * to keep track of block allocation. Each block is represented by two @@ -175,7 +193,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) /** * rs_cmp - multi-block reservation range compare - * @blk: absolute file system block number of the new reservation + * @start: start of the new reservation * @len: number of blocks in the new reservation * @rs: existing reservation to compare against * @@ -183,13 +201,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) * -1 if the block range is before the start of the reservation * 0 if the block range overlaps with the reservation */ -static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); - - if (blk >= startblk + rs->rs_free) + if (start >= rs->rs_start + rs->rs_free) return 1; - if (blk + len - 1 < startblk) + if (rs->rs_start >= start + len) return -1; return 0; } @@ -311,13 +327,28 @@ static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks) } } +static struct gfs2_bitmap *gfs2_block_to_bitmap(struct gfs2_rgrpd *rgd, + u64 block) +{ + unsigned int delta = (sizeof(struct gfs2_rgrp) - + sizeof(struct gfs2_meta_header)) * GFS2_NBBY; + unsigned int rblock, bii; + + if (!rgrp_contains_block(rgd, block)) + return NULL; + rblock = block - rgd->rd_data0; + bii = (rblock + delta) / rgd->rd_sbd->sd_blocks_per_bitmap; + return rgd->rd_bits + bii; +} + /** * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned * @rbm: Position to search (value/result) * @n_unaligned: Number of unaligned blocks to check * @len: Decremented for each block found (terminate on zero) * - * Returns: true if a non-free block is encountered + * Returns: true if a non-free block is encountered or the end of the resource + * group is reached. */ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) @@ -604,10 +635,11 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, { struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); - gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf, + gfs2_print_dbg(seq, "%s B: n:%llu s:%llu f:%u\n", + fs_id_buf, (unsigned long long)ip->i_no_addr, - (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), - rs->rs_rbm.offset, rs->rs_free); + (unsigned long long)rs->rs_start, + rs->rs_free); } /** @@ -622,30 +654,28 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rbm.rgd; + rgd = rs->rs_rgd; trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { - u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) + - rs->rs_free - 1; - struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, }; + u64 last_block = rs->rs_start + rs->rs_free - 1; struct gfs2_bitmap *start, *last; /* return reserved blocks to the rgrp */ - BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); - rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; + BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); + rs->rs_rgd->rd_reserved -= rs->rs_free; /* The rgrp extent failure point is likely not to increase; it will only do so if the freed blocks are somehow contiguous with a span of free blocks that follows. Still, it will force the number to be recalculated later. */ rgd->rd_extfail_pt += rs->rs_free; rs->rs_free = 0; - if (gfs2_rbm_from_block(&last_rbm, last_block)) + start = gfs2_block_to_bitmap(rgd, rs->rs_start); + last = gfs2_block_to_bitmap(rgd, last_block); + if (!start || !last) return; - start = rbm_bi(&rs->rs_rbm); - last = rbm_bi(&last_rbm); do clear_bit(GBF_FULL, &start->bi_flags); while (start++ != last); @@ -661,7 +691,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - rgd = rs->rs_rbm.rgd; + rgd = rs->rs_rgd; if (rgd) { spin_lock(&rgd->rd_rsspin); __rs_deltree(rs); @@ -1467,8 +1497,7 @@ static void rs_insert(struct gfs2_inode *ip) struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = &ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + struct gfs2_rgrpd *rgd = rs->rs_rgd; BUG_ON(gfs2_rs_active(rs)); @@ -1479,7 +1508,7 @@ static void rs_insert(struct gfs2_inode *ip) rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, rs->rs_free, cur); + rc = rs_cmp(rs->rs_start, rs->rs_free, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) @@ -1567,7 +1596,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true); if (ret == 0) { - rs->rs_rbm = rbm; + rs->rs_start = gfs2_rbm_to_block(&rbm); rs->rs_free = extlen; rs_insert(ip); } else { @@ -1612,7 +1641,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, if (n) { while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) { - block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + block = rs->rs_start + rs->rs_free; n = n->rb_right; if (n == NULL) break; @@ -1949,7 +1978,7 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, u64 tdiff; tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), - rs->rs_rbm.rgd->rd_gl->gl_dstamp)); + rs->rs_rgd->rd_gl->gl_dstamp)); return tdiff > (msecs * 1000 * 1000); } @@ -2027,45 +2056,45 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) if (gfs2_assert_warn(sdp, ap->target)) return -EINVAL; if (gfs2_rs_active(rs)) { - begin = rs->rs_rbm.rgd; - } else if (rs->rs_rbm.rgd && - rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) { - begin = rs->rs_rbm.rgd; + begin = rs->rs_rgd; + } else if (rs->rs_rgd && + rgrp_contains_block(rs->rs_rgd, ip->i_goal)) { + begin = rs->rs_rgd; } else { check_and_update_goal(ip); - rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) skip = gfs2_orlov_skip(ip); - if (rs->rs_rbm.rgd == NULL) + if (rs->rs_rgd == NULL) return -EBADSLT; while (loops < 3) { rg_locked = 1; - if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { + if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { rg_locked = 0; if (skip && skip--) goto next_rgrp; if (!gfs2_rs_active(rs)) { if (loops == 0 && - !fast_to_acquire(rs->rs_rbm.rgd)) + !fast_to_acquire(rs->rs_rgd)) goto next_rgrp; if ((loops < 2) && gfs2_rgrp_used_recently(rs, 1000) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + gfs2_rgrp_congested(rs->rs_rgd, loops)) goto next_rgrp; } - error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &ip->i_rgd_gh); if (unlikely(error)) return error; if (!gfs2_rs_active(rs) && (loops < 2) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rbm.rgd); + error = update_rgrp_lvb(rs->rs_rgd); if (unlikely(error)) { gfs2_glock_dq_uninit(&ip->i_rgd_gh); return error; @@ -2074,24 +2103,24 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) } /* Skip unusable resource groups */ - if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | + if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) || - (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) + (loops == 0 && ap->target > rs->rs_rgd->rd_extfail_pt)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + gfs2_rgrp_bh_get(rs->rs_rgd); /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) - rg_mblk_search(rs->rs_rbm.rgd, ip, ap); + rg_mblk_search(rs->rs_rgd, ip, ap); /* Skip rgrps when we can't get a reservation on first pass */ if (!gfs2_rs_active(rs) && (loops < 1)) goto check_rgrp; /* If rgrp has enough free space, use it */ - free_blocks = rgd_free(rs->rs_rbm.rgd, rs); + free_blocks = rgd_free(rs->rs_rgd, rs); if (free_blocks >= ap->target || (loops == 2 && ap->min_target && free_blocks >= ap->min_target)) { @@ -2100,8 +2129,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) } check_rgrp: /* Check for unlinked inodes which can be reclaimed */ - if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) - try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: /* Drop reservation, if we couldn't use reserved rgrp */ @@ -2113,7 +2142,7 @@ skip_rgrp: gfs2_glock_dq_uninit(&ip->i_rgd_gh); next_rgrp: /* Find the next rgrp, and continue looking */ - if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + if (gfs2_select_rgrp(&rs->rs_rgd, begin)) continue; if (skip) continue; @@ -2284,20 +2313,21 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, { struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rbm->rgd; - unsigned rlen; - u64 block; - int ret; spin_lock(&rgd->rd_rsspin); if (gfs2_rs_active(rs)) { - if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { - block = gfs2_rbm_to_block(rbm); - ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); + u64 start = gfs2_rbm_to_block(rbm); + + if (rs->rs_start == start) { + unsigned int rlen; + + rs->rs_start += len; rlen = min(rs->rs_free, len); rs->rs_free -= rlen; rgd->rd_reserved -= rlen; trace_gfs2_rs(rs, TRACE_RS_CLAIM); - if (rs->rs_free && !ret) + if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && + rs->rs_free) goto out; /* We used up our block reservation, so we should reserve more blocks next time. */ @@ -2326,15 +2356,13 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, u64 goal; if (gfs2_rs_active(&ip->i_res)) { - *rbm = ip->i_res.rs_rbm; - return; + goal = ip->i_res.rs_start; + } else { + if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; } - - if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) - goal = ip->i_goal; - else - goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; - if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) { rbm->bii = 0; rbm->offset = 0; @@ -2357,7 +2385,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, }; + struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, }; unsigned int ndata; u64 block; /* block, within the file system scope */ int error; @@ -2589,7 +2617,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, return; rgd = gfs2_blk2rgrpd(sdp, block, 1); } else { - rgd = ip->i_res.rs_rbm.rgd; + rgd = ip->i_res.rs_rgd; if (!rgd || !rgrp_contains_block(rgd, block)) rgd = gfs2_blk2rgrpd(sdp, block, 1); } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 0b2f858d9a8c5..e461a691e5e87 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -603,13 +603,13 @@ TRACE_EVENT(gfs2_rs, ), TP_fast_assign( - __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev; - __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; - __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; - __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; + __entry->dev = rs->rs_rgd->rd_sbd->sd_vfs->s_dev; + __entry->rd_addr = rs->rs_rgd->rd_addr; + __entry->rd_free_clone = rs->rs_rgd->rd_free_clone; + __entry->rd_reserved = rs->rs_rgd->rd_reserved; __entry->inum = container_of(rs, struct gfs2_inode, i_res)->i_no_addr; - __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); + __entry->start = rs->rs_start; __entry->free = rs->rs_free; __entry->func = func; ), diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 83199ce5a5c51..b812c4958dc4a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -27,7 +27,7 @@ struct gfs2_glock; * block, or all of the blocks in the rg, whichever is smaller */ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) { - struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd; + struct gfs2_rgrpd *rgd = ip->i_res.rs_rgd; if (requested < rgd->rd_length) return requested + 1; -- GitLab From 3ed08befeb09e199c258fc4f3878935f57a2bd9a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 16 Mar 2020 15:36:09 +0100 Subject: [PATCH 1862/1894] gfs2: Get rid of unnecessary variable in gfs2_alloc_blocks Variable ndata is only used inside "if (!dinode)", so it can be replaced entirely with *nblocks. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 391186382ceed..4f3337fd6b23c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2386,7 +2386,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, }; - unsigned int ndata; u64 block; /* block, within the file system scope */ int error; @@ -2412,12 +2411,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; if (gfs2_rs_active(&ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); - ndata = *nblocks; - if (dinode) - ndata--; - if (!dinode) { - ip->i_goal = block + ndata - 1; + ip->i_goal = block + *nblocks - 1; error = gfs2_meta_inode_buffer(ip, &dibh); if (error == 0) { struct gfs2_dinode *di = -- GitLab From 4272006d956f49e428f87c476959318301f2d4d2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 20 Mar 2020 13:31:41 +0100 Subject: [PATCH 1863/1894] gfs2: Minor gfs2_inplace_reserve cleanup Clean up the reservation size computation logic in gfs2_inplace_reserve a little. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4f3337fd6b23c..f759f79d2af76 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2048,12 +2048,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) struct gfs2_blkreserv *rs = &ip->i_res; int error = 0, rg_locked, flags = 0; u64 last_unlinked = NO_BLOCK; + u32 target = ap->target; int loops = 0; u32 free_blocks, skip = 0; if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, ap->target)) + if (gfs2_assert_warn(sdp, target)) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rgd; @@ -2105,7 +2106,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) /* Skip unusable resource groups */ if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) || - (loops == 0 && ap->target > rs->rs_rgd->rd_extfail_pt)) + (loops == 0 && target > rs->rs_rgd->rd_extfail_pt)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) @@ -2121,9 +2122,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) /* If rgrp has enough free space, use it */ free_blocks = rgd_free(rs->rs_rgd, rs); - if (free_blocks >= ap->target || - (loops == 2 && ap->min_target && - free_blocks >= ap->min_target)) { + if (free_blocks >= target) { ap->allowed = free_blocks; return 0; } @@ -2159,9 +2158,12 @@ next_rgrp: return error; } /* Flushing the log may release space */ - if (loops == 2) + if (loops == 2) { + if (ap->min_target) + target = ap->min_target; gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_INPLACE_RESERVE); + } } return -ENOSPC; -- GitLab From 2fdc2fa21bc72ec06c0c9f0e30b88fe1f2486b75 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 6 Oct 2020 14:29:04 +0200 Subject: [PATCH 1864/1894] Revert "gfs2: Don't reject a supposedly full bitmap if we have blocks reserved" This reverts commit e79e0e1428188b24c3b57309ffa54a33c4ae40c4. It turns out that we're only setting the GBF_FULL flag of a bitmap if we've been scanning from the beginning of the bitmap until the end and we haven't found a single free block, and we're not skipping reservations in that process, either. This means that in gfs2_rbm_find, we can always skip bitmaps with the GBF_FULL flag set. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f759f79d2af76..faf9f21f3e98b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1754,8 +1754,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, while(1) { bi = rbm_bi(rbm); - if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && - test_bit(GBF_FULL, &bi->bi_flags) && + if (test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; -- GitLab From a12c6fa1de8472f0eb56f7e5a60de2ffa4f6c043 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 6 Oct 2020 15:16:16 +0200 Subject: [PATCH 1865/1894] gfs2: Don't clear GBF_FULL flags in rs_deltree Removing a reservation doesn't make any actual space available, so don't clear the GBF_FULL flags in that case. Otherwise, we'll only spend more time scanning the bitmaps unnecessarily. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index faf9f21f3e98b..f585a13cebde0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -327,20 +327,6 @@ static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks) } } -static struct gfs2_bitmap *gfs2_block_to_bitmap(struct gfs2_rgrpd *rgd, - u64 block) -{ - unsigned int delta = (sizeof(struct gfs2_rgrp) - - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; - unsigned int rblock, bii; - - if (!rgrp_contains_block(rgd, block)) - return NULL; - rblock = block - rgd->rd_data0; - bii = (rblock + delta) / rgd->rd_sbd->sd_blocks_per_bitmap; - return rgd->rd_bits + bii; -} - /** * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned * @rbm: Position to search (value/result) @@ -660,25 +646,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { - u64 last_block = rs->rs_start + rs->rs_free - 1; - struct gfs2_bitmap *start, *last; - /* return reserved blocks to the rgrp */ BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); rs->rs_rgd->rd_reserved -= rs->rs_free; + /* The rgrp extent failure point is likely not to increase; it will only do so if the freed blocks are somehow contiguous with a span of free blocks that follows. Still, it will force the number to be recalculated later. */ rgd->rd_extfail_pt += rs->rs_free; rs->rs_free = 0; - start = gfs2_block_to_bitmap(rgd, rs->rs_start); - last = gfs2_block_to_bitmap(rgd, last_block); - if (!start || !last) - return; - do - clear_bit(GBF_FULL, &start->bi_flags); - while (start++ != last); } } -- GitLab From 560b8eba6bf685bb2a29fd24e7b3455e7b8f795a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 6 Oct 2020 15:26:22 +0200 Subject: [PATCH 1866/1894] gfs2: Set GBF_FULL flags when reading resource group When reading a resource group from disk or when receiving the resource group statistics from a Lock Value Block (LVB), set/clear the GBF_FULL flags of all bitmaps in that resource group according to whether or not the resource group is full. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f585a13cebde0..f878d82750f7b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1165,6 +1165,23 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) return count; } +static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) +{ + struct gfs2_bitmap *bi; + int x; + + if (rgd->rd_free) { + for (x = 0; x < rgd->rd_length; x++) { + bi = rgd->rd_bits + x; + clear_bit(GBF_FULL, &bi->bi_flags); + } + } else { + for (x = 0; x < rgd->rd_length; x++) { + bi = rgd->rd_bits + x; + set_bit(GBF_FULL, &bi->bi_flags); + } + } +} /** * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps @@ -1208,9 +1225,8 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { - for (x = 0; x < length; x++) - clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); + rgrp_set_bitmap_flags(rgd); rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; /* max out the rgrp allocation failure point */ @@ -1260,6 +1276,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) if (rgd->rd_rgl->rl_unlinked == 0) rgd->rd_flags &= ~GFS2_RDF_CHECK; rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); + rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); -- GitLab From 736b2f778f0a71ce54abe7e6d2c6f7591b95f7b8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 7 Dec 2020 00:06:32 +0100 Subject: [PATCH 1867/1894] gfs2: Un-obfuscate function jdesc_find_i Clean up this function to show that it is trivial. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2f56acc41c049..ed7a829e9ffe3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -81,19 +81,12 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid) { struct gfs2_jdesc *jd; - int found = 0; list_for_each_entry(jd, head, jd_list) { - if (jd->jd_jid == jid) { - found = 1; - break; - } + if (jd->jd_jid == jid) + return jd; } - - if (!found) - jd = NULL; - - return jd; + return NULL; } struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid) -- GitLab From 458094c2c6115cda721fd11a56f7e4f27cddd5b1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 5 Dec 2020 13:21:50 +0100 Subject: [PATCH 1868/1894] gfs2: Simplify the buf_limit and databuf_limit definitions The BUF_OFFSET and DATABUF_OFFSET definitions are only used in buf_limit and databuf_limit, respectively, and the rounding done in those definitions is immediately wiped out by dividing by the element size. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lops.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index fbdbb08dcec6a..3fca5bf239d33 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -10,13 +10,6 @@ #include #include "incore.h" -#define BUF_OFFSET \ - ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \ - ~(sizeof(__be64) - 1)) -#define DATABUF_OFFSET \ - ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \ - ~(2 * sizeof(__be64) - 1)) - extern const struct gfs2_log_operations *gfs2_log_ops[]; extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn); @@ -29,18 +22,12 @@ extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, bool keep_cache); static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { - unsigned int limit; - - limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64); - return limit; + return sdp->sd_ldptrs; } static inline unsigned int databuf_limit(struct gfs2_sbd *sdp) { - unsigned int limit; - - limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64)); - return limit; + return sdp->sd_ldptrs / 2; } static inline void lops_before_commit(struct gfs2_sbd *sdp, -- GitLab From 5a4e9c607e03886815121fa975fcd8e0fa7252fe Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 6 Dec 2020 13:32:54 +0100 Subject: [PATCH 1869/1894] gfs2: Minor gfs2_write_revokes cleanups Clean up the computations in gfs2_write_revokes (no change in functionality). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 2e9314091c81d..ccce17fe605fa 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -712,11 +712,13 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) void gfs2_write_revokes(struct gfs2_sbd *sdp) { /* number of revokes we still have room for */ - int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); + unsigned int max_revokes; gfs2_log_lock(sdp); - while (sdp->sd_log_num_revoke > max_revokes) - max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); + max_revokes = sdp->sd_ldptrs; + if (sdp->sd_log_num_revoke > sdp->sd_ldptrs) + max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs, + sdp->sd_inptrs); max_revokes -= sdp->sd_log_num_revoke; if (!sdp->sd_log_num_revoke) { atomic_dec(&sdp->sd_log_blks_free); -- GitLab From 6188e8777de48c8404b0bcca74a70ac83c37d773 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 6 Dec 2020 13:30:14 +0100 Subject: [PATCH 1870/1894] gfs2: Some documentation updates The calc_reserved description claims that buf_limit is 502 (on 4k filesystems), but it is actually 503. Fix / clarify the entire description. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index ccce17fe605fa..12e8280f0806f 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -50,10 +50,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct) unsigned int blks; unsigned int first, second; + /* The initial struct gfs2_log_descriptor block */ blks = 1; first = sdp->sd_ldptrs; if (nstruct > first) { + /* Subsequent struct gfs2_meta_header blocks */ second = sdp->sd_inptrs; blks += DIV_ROUND_UP(nstruct - first, second); } @@ -507,24 +509,20 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer } /** - * calc_reserved - Calculate the number of blocks to reserve when - * refunding a transaction's unused buffers. + * calc_reserved - Calculate the number of blocks to keep reserved * @sdp: The GFS2 superblock * * This is complex. We need to reserve room for all our currently used - * metadata buffers (e.g. normal file I/O rewriting file time stamps) and - * all our journaled data buffers for journaled files (e.g. files in the + * metadata blocks (e.g. normal file I/O rewriting file time stamps) and + * all our journaled data blocks for journaled files (e.g. files in the * meta_fs like rindex, or files for which chattr +j was done.) - * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush - * will count it as free space (sd_log_blks_free) and corruption will follow. + * If we don't reserve enough space, corruption will follow. * - * We can have metadata bufs and jdata bufs in the same journal. So each - * type gets its own log header, for which we need to reserve a block. - * In fact, each type has the potential for needing more than one header - * in cases where we have more buffers than will fit on a journal page. + * We can have metadata blocks and jdata blocks in the same journal. Each + * type gets its own log descriptor, for which we need to reserve a block. + * In fact, each type has the potential for needing more than one log descriptor + * in cases where we have more blocks than will fit in a log descriptor. * Metadata journal entries take up half the space of journaled buffer entries. - * Thus, metadata entries have buf_limit (502) and journaled buffers have - * databuf_limit (251) before they cause a wrap around. * * Also, we need to reserve blocks for revoke journal entries and one for an * overall header for the lot. @@ -1008,7 +1006,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) if (sdp->sd_log_head != sdp->sd_log_flush_head) { log_flush_wait(sdp); log_write_header(sdp, flags); - } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ + } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) { atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); -- GitLab From 625a8edd5e00e7c4c70a125a433ec7598d9f0c27 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 6 Dec 2020 20:10:51 +0100 Subject: [PATCH 1871/1894] gfs2: Minor debugging improvement Split the assert in gfs2_trans_end into two parts. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/trans.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 6d4bf7ea7b3be..7705f04621f4e 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -109,8 +109,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) nbuf -= tr->tr_num_buf_rm; nbuf -= tr->tr_num_databuf_rm; - if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) && - (tr->tr_num_revoke <= tr->tr_revokes))) + if (gfs2_assert_withdraw(sdp, nbuf <= tr->tr_blocks) || + gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) gfs2_print_trans(sdp, tr); gfs2_log_commit(sdp, tr); -- GitLab From e7501bf88cd77ed3a1bc65c451600a847c80485b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 19 Dec 2020 03:11:51 +0100 Subject: [PATCH 1872/1894] gfs2: Rename gfs2_{write => flush}_revokes Function gfs2_write_revokes doesn't actually write any revokes; instead, it adds revokes to the system transaction during a flush. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 4 ++-- fs/gfs2/log.h | 2 +- fs/gfs2/lops.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 12e8280f0806f..7375c007bde5c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -696,7 +696,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) } /** - * gfs2_write_revokes - Add as many revokes to the system transaction as we can + * gfs2_flush_revokes - Add as many revokes to the system transaction as we can * @sdp: The GFS2 superblock * * Our usual strategy is to defer writing revokes as much as we can in the hope @@ -707,7 +707,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) * been written back. This will basically come at no cost now, and will save * us from having to keep track of those blocks on the AIL2 list later. */ -void gfs2_write_revokes(struct gfs2_sbd *sdp) +void gfs2_flush_revokes(struct gfs2_sbd *sdp) { /* number of revokes we still have room for */ unsigned int max_revokes; diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 79f97290146ef..a9cdbc990edf1 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -78,6 +78,6 @@ extern void log_flush_wait(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl); -extern void gfs2_write_revokes(struct gfs2_sbd *sdp); +extern void gfs2_flush_revokes(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3922b26264f5a..8658ebbcb4a90 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -845,7 +845,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) struct page *page; unsigned int length; - gfs2_write_revokes(sdp); + gfs2_flush_revokes(sdp); if (!sdp->sd_log_num_revoke) return; -- GitLab From 6e80674af06c0dca6e0153d492d437f3f07fe3e9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 11 Dec 2020 14:45:23 +0100 Subject: [PATCH 1873/1894] gfs2: Clean up ail2_empty Clean up the logic in ail2_empty (no functional change). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 7375c007bde5c..e4dc23a245694 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -367,29 +367,33 @@ static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr, } } +static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +{ + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); + list_del(&tr->tr_list); + gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); + gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); + gfs2_trans_free(sdp, tr); +} + static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) { - struct gfs2_trans *tr, *safe; + struct list_head *ail2_list = &sdp->sd_ail2_list; unsigned int old_tail = sdp->sd_log_tail; - int wrap = (new_tail < old_tail); - int a, b, rm; + struct gfs2_trans *tr, *safe; spin_lock(&sdp->sd_ail_lock); - - list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) { - a = (old_tail <= tr->tr_first); - b = (tr->tr_first < new_tail); - rm = (wrap) ? (a || b) : (a && b); - if (!rm) - continue; - - gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); - list_del(&tr->tr_list); - gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); - gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); - gfs2_trans_free(sdp, tr); + if (old_tail <= new_tail) { + list_for_each_entry_safe(tr, safe, ail2_list, tr_list) { + if (old_tail <= tr->tr_first && tr->tr_first < new_tail) + __ail2_empty(sdp, tr); + } + } else { + list_for_each_entry_safe(tr, safe, ail2_list, tr_list) { + if (old_tail <= tr->tr_first || tr->tr_first < new_tail) + __ail2_empty(sdp, tr); + } } - spin_unlock(&sdp->sd_ail_lock); } -- GitLab From 15e20a301ab06575482c7ab3b442a6830cec928e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 3 Feb 2021 16:15:27 +0100 Subject: [PATCH 1874/1894] gfs2: Use sb_start_intwrite in gfs2_ail_empty_gl Commit 2e60d7683c8d ("GFS2: update freeze code to use freeze/thaw_super on all nodes") optimized away the sb_start_intwrite ... sb_end_intwrite protection for the on-stack transactions in gfs2_ail_empty_gl with no explanation. I can't think of a valid reason for doing that, so revert that change. This simplifies the next commit. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 5 ++++- fs/gfs2/trans.c | 8 +++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 3faa421568b0a..fd1f52fff1709 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -127,9 +127,12 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes); tr.tr_ip = _RET_IP_; + sb_start_intwrite(sdp->sd_vfs); ret = gfs2_log_reserve(sdp, tr.tr_reserved); - if (ret < 0) + if (ret < 0) { + sb_end_intwrite(sdp->sd_vfs); return ret; + } WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 7705f04621f4e..ae040b5708680 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -98,10 +98,9 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { gfs2_log_release(sdp, tr->tr_reserved); - if (alloced) { + if (alloced) gfs2_trans_free(sdp, tr); - sb_end_intwrite(sdp->sd_vfs); - } + sb_end_intwrite(sdp->sd_vfs); return; } @@ -121,8 +120,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS) gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_TRANS_END); - if (alloced) - sb_end_intwrite(sdp->sd_vfs); + sb_end_intwrite(sdp->sd_vfs); } static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, -- GitLab From c968f5788bc91fe4f86df1a68f0d6471396b4d78 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 29 Jan 2021 16:45:33 +0100 Subject: [PATCH 1875/1894] gfs2: Clean up on-stack transactions Replace the TR_ALLOCED flag by its inverse, TR_ONSTACK: that way, the flag only needs to be set in the exceptional case of on-stack transactions. Split off __gfs2_trans_begin from gfs2_trans_begin and use it to replace the open-coded version in gfs2_ail_empty_gl. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 33 ++++++++++----------------------- fs/gfs2/incore.h | 2 +- fs/gfs2/log.c | 2 +- fs/gfs2/trans.c | 40 +++++++++++++++++++++++----------------- fs/gfs2/trans.h | 3 +++ 5 files changed, 38 insertions(+), 42 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index fd1f52fff1709..a067924341e3f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -86,16 +86,12 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_trans tr; + unsigned int revokes; int ret; - memset(&tr, 0, sizeof(tr)); - INIT_LIST_HEAD(&tr.tr_buf); - INIT_LIST_HEAD(&tr.tr_databuf); - INIT_LIST_HEAD(&tr.tr_ail1_list); - INIT_LIST_HEAD(&tr.tr_ail2_list); - tr.tr_revokes = atomic_read(&gl->gl_ail_count); + revokes = atomic_read(&gl->gl_ail_count); - if (!tr.tr_revokes) { + if (!revokes) { bool have_revokes; bool log_in_flight; @@ -122,23 +118,14 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) return 0; } - /* A shortened, inline version of gfs2_trans_begin() - * tr->alloced is not set since the transaction structure is - * on the stack */ - tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes); - tr.tr_ip = _RET_IP_; - sb_start_intwrite(sdp->sd_vfs); - ret = gfs2_log_reserve(sdp, tr.tr_reserved); - if (ret < 0) { - sb_end_intwrite(sdp->sd_vfs); - return ret; - } - WARN_ON_ONCE(current->journal_info); - current->journal_info = &tr; - - __gfs2_ail_flush(gl, 0, tr.tr_revokes); - + memset(&tr, 0, sizeof(tr)); + set_bit(TR_ONSTACK, &tr.tr_flags); + ret = __gfs2_trans_begin(&tr, sdp, 0, revokes, _RET_IP_); + if (ret) + goto flush; + __gfs2_ail_flush(gl, 0, revokes); gfs2_trans_end(sdp); + flush: gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_AIL_EMPTY_GL); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8e1ab8ed4abc7..c3f6dd378b10a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -490,7 +490,7 @@ struct gfs2_quota_data { enum { TR_TOUCHED = 1, TR_ATTACHED = 2, - TR_ALLOCED = 3, + TR_ONSTACK = 3, }; struct gfs2_trans { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e4dc23a245694..0fceb60907a2b 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1114,7 +1114,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) if (sdp->sd_log_tr) { gfs2_merge_trans(sdp, tr); } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { - gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags)); + gfs2_assert_withdraw(sdp, !test_bit(TR_ONSTACK, &tr->tr_flags)); sdp->sd_log_tr = tr; set_bit(TR_ATTACHED, &tr->tr_flags); } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ae040b5708680..db29ca2538537 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -37,10 +37,10 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr) tr->tr_num_revoke, tr->tr_num_revoke_rm); } -int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, - unsigned int revokes) +int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, + unsigned int blocks, unsigned int revokes, + unsigned long ip) { - struct gfs2_trans *tr; int error; if (current->journal_info) { @@ -52,15 +52,10 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) return -EROFS; - tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS); - if (!tr) - return -ENOMEM; - - tr->tr_ip = _RET_IP_; + tr->tr_ip = ip; tr->tr_blocks = blocks; tr->tr_revokes = revokes; tr->tr_reserved = 1; - set_bit(TR_ALLOCED, &tr->tr_flags); if (blocks) tr->tr_reserved += 6 + blocks; if (revokes) @@ -74,17 +69,28 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, sb_start_intwrite(sdp->sd_vfs); error = gfs2_log_reserve(sdp, tr->tr_reserved); - if (error) - goto fail; + if (error) { + sb_end_intwrite(sdp->sd_vfs); + return error; + } current->journal_info = tr; return 0; +} -fail: - sb_end_intwrite(sdp->sd_vfs); - kmem_cache_free(gfs2_trans_cachep, tr); +int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, + unsigned int revokes) +{ + struct gfs2_trans *tr; + int error; + tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS); + if (!tr) + return -ENOMEM; + error = __gfs2_trans_begin(tr, sdp, blocks, revokes, _RET_IP_); + if (error) + kmem_cache_free(gfs2_trans_cachep, tr); return error; } @@ -92,13 +98,12 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) { struct gfs2_trans *tr = current->journal_info; s64 nbuf; - int alloced = test_bit(TR_ALLOCED, &tr->tr_flags); current->journal_info = NULL; if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { gfs2_log_release(sdp, tr->tr_reserved); - if (alloced) + if (!test_bit(TR_ONSTACK, &tr->tr_flags)) gfs2_trans_free(sdp, tr); sb_end_intwrite(sdp->sd_vfs); return; @@ -113,7 +118,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_print_trans(sdp, tr); gfs2_log_commit(sdp, tr); - if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags)) + if (!test_bit(TR_ONSTACK, &tr->tr_flags) && + !test_bit(TR_ATTACHED, &tr->tr_flags)) gfs2_trans_free(sdp, tr); up_read(&sdp->sd_log_flush_lock); diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 83199ce5a5c51..55f253015cf8a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -34,6 +34,9 @@ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned return rgd->rd_length; } +extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, + unsigned int blocks, unsigned int revokes, + unsigned long ip); extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, unsigned int revokes); -- GitLab From f3708fb59f6c2498e8ec4f29010375f600b68642 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 13 Dec 2020 09:21:34 +0100 Subject: [PATCH 1876/1894] gfs2: Get rid of sd_reserving_log This counter and the associated wait queue are only used so that gfs2_make_fs_ro can efficiently wait for all pending log space allocations to fail after setting the filesystem to read-only. This comes at the cost of waking up that wait queue very frequently. Instead, when gfs2_log_reserve fails because the filesystem has become read-only, Wake up sd_log_waitq. In gfs2_make_fs_ro, set the file system read-only and then wait until all the log space has been released. Give up and report the problem after a while. With that, sd_reserving_log and sd_reserving_log_wait can be removed. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 3 --- fs/gfs2/log.c | 17 ++++++++++------- fs/gfs2/log.h | 1 + fs/gfs2/ops_fstype.c | 2 -- fs/gfs2/super.c | 12 ++++++------ fs/gfs2/trans.c | 2 ++ 6 files changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c3f6dd378b10a..8f8676cf72edc 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -849,9 +849,6 @@ struct gfs2_sbd { int sd_log_error; /* First log error */ wait_queue_head_t sd_withdraw_wait; - atomic_t sd_reserving_log; - wait_queue_head_t sd_reserving_log_wait; - unsigned int sd_log_flush_head; spinlock_t sd_ail_lock; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0fceb60907a2b..0da05492e8b85 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -397,6 +397,15 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) spin_unlock(&sdp->sd_ail_lock); } +/** + * gfs2_log_is_empty - Check if the log is empty + * @sdp: The GFS2 superblock + */ + +bool gfs2_log_is_empty(struct gfs2_sbd *sdp) { + return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks; +} + /** * gfs2_log_release - Release a given number of log blocks * @sdp: The GFS2 superblock @@ -461,13 +470,9 @@ retry: } while(free_blocks <= wanted); finish_wait(&sdp->sd_log_waitq, &wait); } - atomic_inc(&sdp->sd_reserving_log); if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks, - free_blocks - blks) != free_blocks) { - if (atomic_dec_and_test(&sdp->sd_reserving_log)) - wake_up(&sdp->sd_reserving_log_wait); + free_blocks - blks) != free_blocks) goto retry; - } atomic_sub(blks, &sdp->sd_log_blks_needed); trace_gfs2_log_blocks(sdp, -blks); @@ -483,8 +488,6 @@ retry: gfs2_log_release(sdp, blks); ret = -EROFS; } - if (atomic_dec_and_test(&sdp->sd_reserving_log)) - wake_up(&sdp->sd_reserving_log_wait); return ret; } diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index a9cdbc990edf1..16efbe6142799 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -64,6 +64,7 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); +extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp); extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 61fce59cb4d38..986dc2ebebf0d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -136,8 +136,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); - atomic_set(&sdp->sd_reserving_log, 0); - init_waitqueue_head(&sdp->sd_reserving_log_wait); init_waitqueue_head(&sdp->sd_log_flush_wait); atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); mutex_init(&sdp->sd_freeze_mutex); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ed7a829e9ffe3..f188277f7d481 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -645,13 +645,13 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | GFS2_LFC_MAKE_FS_RO); - wait_event(sdp->sd_reserving_log_wait, - atomic_read(&sdp->sd_reserving_log) == 0); - gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == - sdp->sd_jdesc->jd_blocks); + wait_event_timeout(sdp->sd_log_waitq, + gfs2_log_is_empty(sdp), + HZ * 5); + gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp)); } else { - wait_event_timeout(sdp->sd_reserving_log_wait, - atomic_read(&sdp->sd_reserving_log) == 0, + wait_event_timeout(sdp->sd_log_waitq, + gfs2_log_is_empty(sdp), HZ * 5); } if (gfs2_holder_initialized(&freeze_gh)) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index db29ca2538537..aefe450e009ed 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -71,6 +71,8 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, error = gfs2_log_reserve(sdp, tr->tr_reserved); if (error) { sb_end_intwrite(sdp->sd_vfs); + if (error == -EROFS) + wake_up(&sdp->sd_log_waitq); return error; } -- GitLab From c1eba1b0bca59316f34aa6f70fe5004abba8082d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 12 Dec 2020 23:30:22 +0100 Subject: [PATCH 1877/1894] gfs2: Move lock flush locking to gfs2_trans_{begin,end} Move the read locking of sd_log_flush_lock from gfs2_log_reserve to gfs2_trans_begin, and its unlocking from gfs2_log_release to gfs2_trans_end. Use gfs2_log_release in two places in which it was open coded before. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 28 +++------------------------- fs/gfs2/log.h | 2 +- fs/gfs2/trans.c | 19 ++++++++++++------- 3 files changed, 16 insertions(+), 33 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0da05492e8b85..e7183c84ffc0e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -420,7 +420,6 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) trace_gfs2_log_blocks(sdp, blks); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); - up_read(&sdp->sd_log_flush_lock); } /** @@ -439,22 +438,16 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) * with queued waiters, we use an exclusive wait. This means that when we * get woken with enough journal space to get our reservation, we need to * wake the next waiter on the list. - * - * Returns: errno */ -int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) +void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) { - int ret = 0; unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); unsigned wanted = blks + reserved_blks; DEFINE_WAIT(wait); int did_wait = 0; unsigned int free_blocks; - if (gfs2_assert_warn(sdp, blks) || - gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) - return -EINVAL; atomic_add(blks, &sdp->sd_log_blks_needed); retry: free_blocks = atomic_read(&sdp->sd_log_blks_free); @@ -482,13 +475,6 @@ retry: */ if (unlikely(did_wait)) wake_up(&sdp->sd_log_waitq); - - down_read(&sdp->sd_log_flush_lock); - if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { - gfs2_log_release(sdp, blks); - ret = -EROFS; - } - return ret; } /** @@ -585,12 +571,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); ail2_empty(sdp, new_tail); - - atomic_add(dist, &sdp->sd_log_blks_free); - trace_gfs2_log_blocks(sdp, dist); - gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= - sdp->sd_jdesc->jd_blocks); - + gfs2_log_release(sdp, dist); sdp->sd_log_tail = new_tail; } @@ -1127,10 +1108,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; gfs2_assert_withdraw(sdp, maxres >= reserved); unused = maxres - reserved; - atomic_add(unused, &sdp->sd_log_blks_free); - trace_gfs2_log_blocks(sdp, unused); - gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= - sdp->sd_jdesc->jd_blocks); + gfs2_log_release(sdp, unused); sdp->sd_log_blks_reserved = reserved; gfs2_log_unlock(sdp); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 16efbe6142799..cbc097ca92441 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -66,7 +66,7 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp); extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); -extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); +extern void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, int op_flags); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index aefe450e009ed..2269aa7ad69d0 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -41,8 +41,6 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, unsigned int blocks, unsigned int revokes, unsigned long ip) { - int error; - if (current->journal_info) { gfs2_print_trans(sdp, current->journal_info); BUG(); @@ -66,14 +64,20 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, INIT_LIST_HEAD(&tr->tr_ail1_list); INIT_LIST_HEAD(&tr->tr_ail2_list); + if (gfs2_assert_warn(sdp, tr->tr_reserved <= sdp->sd_jdesc->jd_blocks)) + return -EINVAL; + sb_start_intwrite(sdp->sd_vfs); - error = gfs2_log_reserve(sdp, tr->tr_reserved); - if (error) { + gfs2_log_reserve(sdp, tr->tr_reserved); + + down_read(&sdp->sd_log_flush_lock); + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + gfs2_log_release(sdp, tr->tr_reserved); + up_read(&sdp->sd_log_flush_lock); sb_end_intwrite(sdp->sd_vfs); - if (error == -EROFS) - wake_up(&sdp->sd_log_waitq); - return error; + wake_up(&sdp->sd_log_waitq); + return -EROFS; } current->journal_info = tr; @@ -105,6 +109,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { gfs2_log_release(sdp, tr->tr_reserved); + up_read(&sdp->sd_log_flush_lock); if (!test_bit(TR_ONSTACK, &tr->tr_flags)) gfs2_trans_free(sdp, tr); sb_end_intwrite(sdp->sd_vfs); -- GitLab From 4a3d049db42b42a36ae84eb8b59d2f5119737253 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 11 Dec 2020 01:36:25 +0100 Subject: [PATCH 1878/1894] gfs2: Don't wait for journal flush in clean_journal Commit 588bff95c94e added gfs2_write_log_header() and started using it in clean_journal(), with an additional call to log_flush_wait() at the end of gfs2_write_log_header() which is unnecessary for clean_journal(). Move that call out of gfs2_write_log_header() to restore the previous behavior. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e7183c84ffc0e..cff95889b6f4b 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -757,7 +757,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 dblock; if (gfs2_withdrawn(sdp)) - goto out; + return; page = mempool_alloc(gfs2_page_pool, GFP_NOIO); lh = page_address(page); @@ -812,8 +812,6 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock); gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags); -out: - log_flush_wait(sdp); } /** @@ -842,6 +840,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail, sdp->sd_log_flush_head, flags, op_flags); gfs2_log_incr_head(sdp); + log_flush_wait(sdp); if (sdp->sd_log_tail != tail) log_pull_tail(sdp, tail); -- GitLab From 5ae8fff8d031b5728f4c0e36e971bba42bb78bea Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 13 Dec 2020 11:37:17 +0100 Subject: [PATCH 1879/1894] gfs2: Clean up gfs2_log_reserve Wake up log waiters in gfs2_log_release when log space has actually become available. This is a much better place for the wakeup than gfs2_logd. Check if enough log space is immeditely available before anything else. If there isn't, use io_wait_event to wait instead of open-coding it. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 58 +++++++++++++++++++++---------------------------- fs/gfs2/trans.c | 3 +-- 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index cff95889b6f4b..ca9fa481913d1 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -415,11 +415,12 @@ bool gfs2_log_is_empty(struct gfs2_sbd *sdp) { void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) { - atomic_add(blks, &sdp->sd_log_blks_free); trace_gfs2_log_blocks(sdp, blks); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); + if (atomic_read(&sdp->sd_log_blks_needed)) + wake_up(&sdp->sd_log_waitq); } /** @@ -444,36 +445,33 @@ void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) { unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); unsigned wanted = blks + reserved_blks; - DEFINE_WAIT(wait); - int did_wait = 0; unsigned int free_blocks; - atomic_add(blks, &sdp->sd_log_blks_needed); -retry: free_blocks = atomic_read(&sdp->sd_log_blks_free); - if (unlikely(free_blocks <= wanted)) { - do { - prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait, - TASK_UNINTERRUPTIBLE); + while (free_blocks >= wanted) { + if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks, + free_blocks - blks)) + return; + } + + atomic_add(blks, &sdp->sd_log_blks_needed); + for (;;) { + if (current != sdp->sd_logd_process) wake_up(&sdp->sd_logd_waitq); - did_wait = 1; - if (atomic_read(&sdp->sd_log_blks_free) <= wanted) - io_schedule(); - free_blocks = atomic_read(&sdp->sd_log_blks_free); - } while(free_blocks <= wanted); - finish_wait(&sdp->sd_log_waitq, &wait); + io_wait_event(sdp->sd_log_waitq, + (free_blocks = atomic_read(&sdp->sd_log_blks_free), + free_blocks >= wanted)); + do { + if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, + &free_blocks, + free_blocks - blks)) + goto reserved; + } while (free_blocks >= wanted); } - if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks, - free_blocks - blks) != free_blocks) - goto retry; - atomic_sub(blks, &sdp->sd_log_blks_needed); - trace_gfs2_log_blocks(sdp, -blks); - /* - * If we waited, then so might others, wake them up _after_ we get - * our share of the log. - */ - if (unlikely(did_wait)) +reserved: + trace_gfs2_log_blocks(sdp, -blks); + if (atomic_sub_return(blks, &sdp->sd_log_blks_needed)) wake_up(&sdp->sd_log_waitq); } @@ -1107,7 +1105,8 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; gfs2_assert_withdraw(sdp, maxres >= reserved); unused = maxres - reserved; - gfs2_log_release(sdp, unused); + if (unused) + gfs2_log_release(sdp, unused); sdp->sd_log_blks_reserved = reserved; gfs2_log_unlock(sdp); @@ -1192,7 +1191,6 @@ int gfs2_logd(void *data) struct gfs2_sbd *sdp = data; unsigned long t = 1; DEFINE_WAIT(wait); - bool did_flush; while (!kthread_should_stop()) { @@ -1211,12 +1209,10 @@ int gfs2_logd(void *data) continue; } - did_flush = false; if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_LOGD_JFLUSH_REQD); - did_flush = true; } if (gfs2_ail_flush_reqd(sdp)) { @@ -1225,12 +1221,8 @@ int gfs2_logd(void *data) gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_LOGD_AIL_FLUSH_REQD); - did_flush = true; } - if (!gfs2_ail_flush_reqd(sdp) || did_flush) - wake_up(&sdp->sd_log_waitq); - t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; try_to_freeze(); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 2269aa7ad69d0..cac93b2004cf0 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -73,10 +73,9 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, down_read(&sdp->sd_log_flush_lock); if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { - gfs2_log_release(sdp, tr->tr_reserved); up_read(&sdp->sd_log_flush_lock); + gfs2_log_release(sdp, tr->tr_reserved); sb_end_intwrite(sdp->sd_vfs); - wake_up(&sdp->sd_log_waitq); return -EROFS; } -- GitLab From 297de3180dd7ecbb3798f32e58691168587a8f85 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 6 Dec 2020 14:04:36 +0100 Subject: [PATCH 1880/1894] gfs2: Use a tighter bound in gfs2_trans_begin Use a tighter bound for the number of blocks required by transactions in gfs2_trans_begin: in the worst case, we'll have mixed data and metadata, so we'll need a log desciptor for each type. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/trans.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index cac93b2004cf0..f73d6b8f3b53b 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -54,8 +54,14 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, tr->tr_blocks = blocks; tr->tr_revokes = revokes; tr->tr_reserved = 1; - if (blocks) - tr->tr_reserved += 6 + blocks; + if (blocks) { + /* + * The reserved blocks are either used for data or metadata. + * We can have mixed data and metadata, each with its own log + * descriptor block; see calc_reserved(). + */ + tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp)); + } if (revokes) tr->tr_reserved += gfs2_struct2blk(sdp, revokes); INIT_LIST_HEAD(&tr->tr_databuf); -- GitLab From 5cb738b5fbd2f3ebe9dec0e428577a4f2128adbe Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 19 Dec 2020 07:15:17 +0100 Subject: [PATCH 1881/1894] gfs2: Get rid of current_tail() Keep the current value of the updated log tail in the super block as sb_log_flush_tail instead of computing it on the fly. This avoids unnecessary sd_ail_lock taking and cleans up the code. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 5 ++-- fs/gfs2/log.c | 72 +++++++++++++++++++++++++----------------------- fs/gfs2/log.h | 4 ++- 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8f8676cf72edc..51656b0531706 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -838,8 +838,6 @@ struct gfs2_sbd { wait_queue_head_t sd_logd_waitq; u64 sd_log_sequence; - unsigned int sd_log_head; - unsigned int sd_log_tail; int sd_log_idle; struct rw_semaphore sd_log_flush_lock; @@ -849,6 +847,9 @@ struct gfs2_sbd { int sd_log_error; /* First log error */ wait_queue_head_t sd_withdraw_wait; + unsigned int sd_log_tail; + unsigned int sd_log_flush_tail; + unsigned int sd_log_head; unsigned int sd_log_flush_head; spinlock_t sd_ail_lock; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index ca9fa481913d1..6b5584fd8ff7f 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -242,6 +242,28 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) return gfs2_ail1_flush(sdp, &wbc); } +static void gfs2_log_update_flush_tail(struct gfs2_sbd *sdp) +{ + unsigned int new_flush_tail = sdp->sd_log_head; + struct gfs2_trans *tr; + + if (!list_empty(&sdp->sd_ail1_list)) { + tr = list_last_entry(&sdp->sd_ail1_list, + struct gfs2_trans, tr_list); + new_flush_tail = tr->tr_first; + } + sdp->sd_log_flush_tail = new_flush_tail; +} + +static void gfs2_log_update_head(struct gfs2_sbd *sdp) +{ + unsigned int new_head = sdp->sd_log_flush_head; + + if (sdp->sd_log_flush_tail == sdp->sd_log_head) + sdp->sd_log_flush_tail = new_head; + sdp->sd_log_head = new_head; +} + /** * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced * @sdp: the filesystem @@ -317,6 +339,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) else oldest_tr = 0; } + gfs2_log_update_flush_tail(sdp); ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); @@ -544,30 +567,14 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp) return reserved; } -static unsigned int current_tail(struct gfs2_sbd *sdp) -{ - struct gfs2_trans *tr; - unsigned int tail; - - spin_lock(&sdp->sd_ail_lock); - - if (list_empty(&sdp->sd_ail1_list)) { - tail = sdp->sd_log_head; - } else { - tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans, - tr_list); - tail = tr->tr_first; - } - - spin_unlock(&sdp->sd_ail_lock); - - return tail; -} - -static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) +static void log_pull_tail(struct gfs2_sbd *sdp) { - unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); + unsigned int new_tail = sdp->sd_log_flush_tail; + unsigned int dist; + if (new_tail == sdp->sd_log_tail) + return; + dist = log_distance(sdp, new_tail, sdp->sd_log_tail); ail2_empty(sdp, new_tail); gfs2_log_release(sdp, dist); sdp->sd_log_tail = new_tail; @@ -822,26 +829,23 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, static void log_write_header(struct gfs2_sbd *sdp, u32 flags) { - unsigned int tail; int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); - tail = current_tail(sdp); if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); log_flush_wait(sdp); op_flags = REQ_SYNC | REQ_META | REQ_PRIO; } - sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); - gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail, - sdp->sd_log_flush_head, flags, op_flags); + sdp->sd_log_idle = (sdp->sd_log_flush_tail == sdp->sd_log_flush_head); + gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, + sdp->sd_log_flush_tail, sdp->sd_log_flush_head, + flags, op_flags); gfs2_log_incr_head(sdp); log_flush_wait(sdp); - - if (sdp->sd_log_tail != tail) - log_pull_tail(sdp, tail); + log_pull_tail(sdp); } /** @@ -991,7 +995,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) if (sdp->sd_log_head != sdp->sd_log_flush_head) { log_flush_wait(sdp); log_write_header(sdp, flags); - } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) { + } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) { atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); @@ -1001,7 +1005,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) lops_after_commit(sdp, tr); gfs2_log_lock(sdp); - sdp->sd_log_head = sdp->sd_log_flush_head; + gfs2_log_update_head(sdp); sdp->sd_log_blks_reserved = 0; sdp->sd_log_committed_revoke = 0; @@ -1021,7 +1025,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); - sdp->sd_log_head = sdp->sd_log_flush_head; + gfs2_log_update_head(sdp); } if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN | GFS2_LOG_HEAD_FLUSH_FREEZE)) @@ -1156,7 +1160,7 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); - sdp->sd_log_head = sdp->sd_log_flush_head; + gfs2_log_update_head(sdp); sdp->sd_log_tail = sdp->sd_log_head; } diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index cbc097ca92441..b36a3539f3524 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -43,7 +43,9 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, if (++value == sdp->sd_jdesc->jd_blocks) { value = 0; } - sdp->sd_log_head = sdp->sd_log_tail = value; + sdp->sd_log_tail = value; + sdp->sd_log_flush_tail = value; + sdp->sd_log_head = value; } static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) -- GitLab From 76fce6548961a0c6246c4796e71800cdc63d5851 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 19 Dec 2020 10:54:51 +0100 Subject: [PATCH 1882/1894] gfs2: Move function gfs2_ail_empty_tr Move this function further up in log.c so that we can use it in the next patch. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6b5584fd8ff7f..6fd4ded1e357c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -264,6 +264,23 @@ static void gfs2_log_update_head(struct gfs2_sbd *sdp) sdp->sd_log_head = new_head; } +/** + * gfs2_ail_empty_tr - empty one of the ail lists of a transaction + */ + +static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + struct list_head *head) +{ + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_first_entry(head, struct gfs2_bufdata, + bd_ail_st_list); + gfs2_assert(sdp, bd->bd_tr == tr); + gfs2_remove_from_ail(bd); + } +} + /** * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced * @sdp: the filesystem @@ -373,23 +390,6 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp) spin_unlock(&sdp->sd_ail_lock); } -/** - * gfs2_ail_empty_tr - empty one of the ail lists for a transaction - */ - -static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr, - struct list_head *head) -{ - struct gfs2_bufdata *bd; - - while (!list_empty(head)) { - bd = list_first_entry(head, struct gfs2_bufdata, - bd_ail_st_list); - gfs2_assert(sdp, bd->bd_tr == tr); - gfs2_remove_from_ail(bd); - } -} - static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); -- GitLab From f38e998fbbb5da6a097ecd4b2700ba95eabab0c9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Oct 2020 19:39:18 +0200 Subject: [PATCH 1883/1894] gfs2: Also reflect single-block allocations in rgd->rd_extfail_pt Pass a non-NULL minext to gfs2_rbm_find even for single-block allocations. In gfs2_rbm_find, also set rgd->rd_extfail_pt when a single-block allocation fails in a resource group: there is no reason for treating that case differently. In gfs2_reservation_check_and_update, only check how many free blocks we have if more than one block is requested; we already know there's at least one free block. In addition, when allocating N blocks fails in gfs2_rbm_find, we need to set rd_extfail_pt to N - 1 rather than N: rd_extfail_pt defines the biggest allocation that might still succeed. Finally, reset rd_extfail_pt when updating the resource group statistics in update_rgrp_lvb, as we already do in gfs2_rgrp_bh_get. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f878d82750f7b..ad14a920f3210 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1278,6 +1278,8 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); return 0; @@ -1676,7 +1678,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * If we have a minimum extent length, then skip over any extent * which is less than the min extent length in size. */ - if (minext) { + if (minext > 1) { extlen = gfs2_free_extlen(rbm, minext); if (extlen <= maxext->len) goto fail; @@ -1711,7 +1713,7 @@ fail: * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find - * @minext: Pointer to the requested extent length (NULL for a single block) + * @minext: Pointer to the requested extent length * This is updated to be the actual reservation size. * @ip: If set, check for reservations * @nowrap: Stop looking at the end of the rgrp, rather than wrapping @@ -1767,8 +1769,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, if (ip == NULL) return 0; - ret = gfs2_reservation_check_and_update(rbm, ip, - minext ? *minext : 0, + ret = gfs2_reservation_check_and_update(rbm, ip, *minext, &maxext); if (ret == 0) return 0; @@ -1800,7 +1801,7 @@ next_iter: break; } - if (minext == NULL || state != GFS2_BLKST_FREE) + if (state != GFS2_BLKST_FREE) return -ENOSPC; /* If the extent was too small, and it's smaller than the smallest @@ -1808,7 +1809,7 @@ next_iter: useless to search this rgrp again for this amount or more. */ if (wrapped && (scan_from_start || rbm->bii > last_bii) && *minext < rbm->rgd->rd_extfail_pt) - rbm->rgd->rd_extfail_pt = *minext; + rbm->rgd->rd_extfail_pt = *minext - 1; /* If the maximum extent we found is big enough to fulfill the minimum requirements, use it anyway. */ @@ -2382,14 +2383,15 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, }; u64 block; /* block, within the file system scope */ + u32 minext = 1; int error; gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, ip, false); if (error == -ENOSPC) { gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false); } /* Since all blocks are reserved in advance, this shouldn't happen */ -- GitLab From 3d39fcd16d885ebb0d9a2e345accb0e5ae13fed9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 9 Oct 2020 19:30:16 +0200 Subject: [PATCH 1884/1894] gfs2: Only pass reservation down to gfs2_rbm_find Only pass the current reservation down to gfs2_rbm_find rather than the entire inode; we don't need any of the other information. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 26 +++++++++++++------------- fs/gfs2/rgrp.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ad14a920f3210..deb553624b3c9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -79,7 +79,7 @@ static const char valid_change[16] = { }; static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, - const struct gfs2_inode *ip, bool nowrap); + struct gfs2_blkreserv *rs, bool nowrap); /** @@ -1590,7 +1590,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) return; - ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true); + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true); if (ret == 0) { rs->rs_start = gfs2_rbm_to_block(&rbm); rs->rs_free = extlen; @@ -1606,7 +1606,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, * @rgd: The resource group * @block: The starting block * @length: The required length - * @ip: Ignore any reservations for this inode + * @ignore_rs: Reservation to ignore * * If the block does not appear in any reservation, then return the * block number unchanged. If it does appear in the reservation, then @@ -1616,7 +1616,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, u32 length, - const struct gfs2_inode *ip) + struct gfs2_blkreserv *ignore_rs) { struct gfs2_blkreserv *rs; struct rb_node *n; @@ -1636,7 +1636,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, } if (n) { - while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) { + while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) { block = rs->rs_start + rs->rs_free; n = n->rb_right; if (n == NULL) @@ -1652,7 +1652,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, /** * gfs2_reservation_check_and_update - Check for reservations during block alloc * @rbm: The current position in the resource group - * @ip: The inode for which we are searching for blocks + * @rs: Our own reservation * @minext: The minimum extent length * @maxext: A pointer to the maximum extent structure * @@ -1666,7 +1666,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, */ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, - const struct gfs2_inode *ip, + struct gfs2_blkreserv *rs, u32 minext, struct gfs2_extent *maxext) { @@ -1688,7 +1688,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * Check the extent which has been found against the reservations * and skip if parts of it are already reserved */ - nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs); if (nblock == block) { if (!minext || extlen >= minext) return 0; @@ -1715,7 +1715,7 @@ fail: * @state: The state which we want to find * @minext: Pointer to the requested extent length * This is updated to be the actual reservation size. - * @ip: If set, check for reservations + * @rs: Our own reservation (NULL to skip checking for reservations) * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. * @@ -1729,7 +1729,7 @@ fail: */ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, - const struct gfs2_inode *ip, bool nowrap) + struct gfs2_blkreserv *rs, bool nowrap) { bool scan_from_start = rbm->bii == 0 && rbm->offset == 0; struct buffer_head *bh; @@ -1766,10 +1766,10 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, goto next_bitmap; } rbm->offset = offset; - if (ip == NULL) + if (!rs) return 0; - ret = gfs2_reservation_check_and_update(rbm, ip, *minext, + ret = gfs2_reservation_check_and_update(rbm, rs, *minext, &maxext); if (ret == 0) return 0; @@ -2387,7 +2387,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, int error; gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, ip, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); if (error == -ENOSPC) { gfs2_set_alloc_start(&rbm, ip, dinode); diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 9a587ada51eda..be1b2034f5ee4 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -77,7 +77,7 @@ extern int gfs2_fitrim(struct file *filp, void __user *argp); /* This is how to tell if a reservation is in the rgrp tree: */ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs) { - return rs && !RB_EMPTY_NODE(&rs->rs_node); + return !RB_EMPTY_NODE(&rs->rs_node); } static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) -- GitLab From b2598965dc84bbcf8dd54accc80f60820e5d4965 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 12 Oct 2020 17:23:50 +0200 Subject: [PATCH 1885/1894] gfs2: Don't search for unreserved space twice If gfs2_inplace_reserve has chosen a resource group but it couldn't make a reservation there, there are too many other reservations in that resource group. In that case, don't even try to respect existing reservations in gfs2_alloc_blocks. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index deb553624b3c9..95ff4a3c89de9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2384,11 +2384,12 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, }; u64 block; /* block, within the file system scope */ u32 minext = 1; - int error; - - gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); + int error = -ENOSPC; + if (gfs2_rs_active(&ip->i_res)) { + gfs2_set_alloc_start(&rbm, ip, dinode); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); + } if (error == -ENOSPC) { gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false); -- GitLab From 0ec9b9ea4f83303bfd8f052a3d8b2bd179b002e1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 21 Oct 2020 16:37:54 +0200 Subject: [PATCH 1886/1894] gfs2: Check for active reservation in gfs2_release In gfs2_release, check if the inode has an active reservation to avoid unnecessary lock taking. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index b39b339feddc9..177c4d74ca303 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -716,10 +716,10 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if (file->f_mode & FMODE_WRITE) { + if (gfs2_rs_active(&ip->i_res)) gfs2_rs_delete(ip, &inode->i_writecount); + if (file->f_mode & FMODE_WRITE) gfs2_qa_put(ip); - } return 0; } -- GitLab From 07974d2a2a98bc3ce15f3411ebe5175c0af8f1c3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 22 Oct 2020 20:34:29 +0200 Subject: [PATCH 1887/1894] gfs2: Rename rs_{free -> requested} and rd_{reserved -> requested} We keep track of what we've so far been referring to as reservations in rd_rstree: the nodes in that tree indicate where in a resource group we'd like to allocate the next couple of blocks for a particular inode. Local processes take those as hints, but they may still "steal" blocks from those extents, so when actually allocating a block, we must double check in the bitmap whether that block is actually still free. Likewise, other cluster nodes may "steal" such blocks as well. One of the following patches introduces resource group glock sharing, i.e., sharing of an exclusively locked resource group glock among local processes to speed up allocations. To make that work, we'll need to keep track of how many blocks we've actually reserved for each inode, so we end up with two different kinds of reservations. Distinguish these two kinds by referring to blocks which are reserved but may still be "stolen" as "requested". This rename also makes it more obvious that rs_requested and rd_requested are strongly related. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 6 +++--- fs/gfs2/rgrp.c | 42 +++++++++++++++++++++--------------------- fs/gfs2/trace_gfs2.h | 18 +++++++++--------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index cca806ff611ce..0640d0c70a75e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -106,7 +106,7 @@ struct gfs2_rgrpd { u32 rd_data; /* num of data blocks in rgrp */ u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; - u32 rd_reserved; /* number of blocks reserved */ + u32 rd_requested; /* number of blocks in rd_rstree */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; @@ -290,8 +290,8 @@ struct gfs2_qadata { /* quota allocation data */ struct gfs2_blkreserv { struct rb_node rs_node; /* node within rd_rstree */ struct gfs2_rgrpd *rs_rgd; - u64 rs_start; /* start of reservation */ - u32 rs_free; /* how many blocks are still free */ + u64 rs_start; + u32 rs_requested; }; /* diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 95ff4a3c89de9..bc8d1ab9e07f1 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -203,7 +203,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) */ static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs) { - if (start >= rs->rs_start + rs->rs_free) + if (start >= rs->rs_start + rs->rs_requested) return 1; if (rs->rs_start >= start + len) return -1; @@ -625,7 +625,7 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, fs_id_buf, (unsigned long long)ip->i_no_addr, (unsigned long long)rs->rs_start, - rs->rs_free); + rs->rs_requested); } /** @@ -645,17 +645,17 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); - if (rs->rs_free) { - /* return reserved blocks to the rgrp */ - BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); - rs->rs_rgd->rd_reserved -= rs->rs_free; + if (rs->rs_requested) { + /* return requested blocks to the rgrp */ + BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested); + rs->rs_rgd->rd_requested -= rs->rs_requested; /* The rgrp extent failure point is likely not to increase; it will only do so if the freed blocks are somehow contiguous with a span of free blocks that follows. Still, it will force the number to be recalculated later. */ - rgd->rd_extfail_pt += rs->rs_free; - rs->rs_free = 0; + rgd->rd_extfail_pt += rs->rs_requested; + rs->rs_requested = 0; } } @@ -672,7 +672,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) if (rgd) { spin_lock(&rgd->rd_rsspin); __rs_deltree(rs); - BUG_ON(rs->rs_free); + BUG_ON(rs->rs_requested); spin_unlock(&rgd->rd_rsspin); } } @@ -1504,7 +1504,7 @@ static void rs_insert(struct gfs2_inode *ip) rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(rs->rs_start, rs->rs_free, cur); + rc = rs_cmp(rs->rs_start, rs->rs_requested, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) @@ -1520,7 +1520,7 @@ static void rs_insert(struct gfs2_inode *ip) rb_insert_color(&rs->rs_node, &rgd->rd_rstree); /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += rs->rs_free; /* blocks reserved */ + rgd->rd_requested += rs->rs_requested; /* blocks requested */ spin_unlock(&rgd->rd_rsspin); trace_gfs2_rs(rs, TRACE_RS_INSERT); } @@ -1541,9 +1541,9 @@ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) { u32 tot_reserved, tot_free; - if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free)) + if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested)) return 0; - tot_reserved = rgd->rd_reserved - rs->rs_free; + tot_reserved = rgd->rd_requested - rs->rs_requested; if (rgd->rd_free_clone < tot_reserved) tot_reserved = 0; @@ -1578,7 +1578,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target); extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks); } - if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) + if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen)) return; /* Find bitmap block that contains bits for goal block */ @@ -1593,7 +1593,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true); if (ret == 0) { rs->rs_start = gfs2_rbm_to_block(&rbm); - rs->rs_free = extlen; + rs->rs_requested = extlen; rs_insert(ip); } else { if (goal == rgd->rd_last_alloc + rgd->rd_data0) @@ -1637,7 +1637,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, if (n) { while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) { - block = rs->rs_start + rs->rs_free; + block = rs->rs_start + rs->rs_requested; n = n->rb_right; if (n == NULL) break; @@ -2263,7 +2263,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_reserved, rgd->rd_extfail_pt); + rgd->rd_requested, rgd->rd_extfail_pt); if (rgd->rd_sbd->sd_args.ar_rgrplvb) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; @@ -2318,12 +2318,12 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, unsigned int rlen; rs->rs_start += len; - rlen = min(rs->rs_free, len); - rs->rs_free -= rlen; - rgd->rd_reserved -= rlen; + rlen = min(rs->rs_requested, len); + rs->rs_requested -= rlen; + rgd->rd_requested -= rlen; trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && - rs->rs_free) + rs->rs_requested) goto out; /* We used up our block reservation, so we should reserve more blocks next time. */ diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index e461a691e5e87..d24bdcdd42e50 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -560,7 +560,7 @@ TRACE_EVENT(gfs2_block_alloc, __field( u8, block_state ) __field( u64, rd_addr ) __field( u32, rd_free_clone ) - __field( u32, rd_reserved ) + __field( u32, rd_requested ) ), TP_fast_assign( @@ -571,7 +571,7 @@ TRACE_EVENT(gfs2_block_alloc, __entry->block_state = block_state; __entry->rd_addr = rgd->rd_addr; __entry->rd_free_clone = rgd->rd_free_clone; - __entry->rd_reserved = rgd->rd_reserved; + __entry->rd_requested = rgd->rd_requested; ), TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", @@ -581,7 +581,7 @@ TRACE_EVENT(gfs2_block_alloc, (unsigned long)__entry->len, block_state_name(__entry->block_state), (unsigned long long)__entry->rd_addr, - __entry->rd_free_clone, (unsigned long)__entry->rd_reserved) + __entry->rd_free_clone, (unsigned long)__entry->rd_requested) ); /* Keep track of multi-block reservations as they are allocated/freed */ @@ -595,10 +595,10 @@ TRACE_EVENT(gfs2_rs, __field( dev_t, dev ) __field( u64, rd_addr ) __field( u32, rd_free_clone ) - __field( u32, rd_reserved ) + __field( u32, rd_requested ) __field( u64, inum ) __field( u64, start ) - __field( u32, free ) + __field( u32, requested ) __field( u8, func ) ), @@ -606,11 +606,11 @@ TRACE_EVENT(gfs2_rs, __entry->dev = rs->rs_rgd->rd_sbd->sd_vfs->s_dev; __entry->rd_addr = rs->rs_rgd->rd_addr; __entry->rd_free_clone = rs->rs_rgd->rd_free_clone; - __entry->rd_reserved = rs->rs_rgd->rd_reserved; + __entry->rd_requested = rs->rs_rgd->rd_requested; __entry->inum = container_of(rs, struct gfs2_inode, i_res)->i_no_addr; __entry->start = rs->rs_start; - __entry->free = rs->rs_free; + __entry->requested = rs->rs_requested; __entry->func = func; ), @@ -620,8 +620,8 @@ TRACE_EVENT(gfs2_rs, (unsigned long long)__entry->start, (unsigned long long)__entry->rd_addr, (unsigned long)__entry->rd_free_clone, - (unsigned long)__entry->rd_reserved, - rs_func_name(__entry->func), (unsigned long)__entry->free) + (unsigned long)__entry->rd_requested, + rs_func_name(__entry->func), (unsigned long)__entry->requested) ); #endif /* _TRACE_GFS2_H */ -- GitLab From 725d0e9d464d567cd9290e29879d8bffc92013f8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 2 Oct 2018 14:59:54 +0100 Subject: [PATCH 1888/1894] gfs2: Add per-reservation reserved block accounting Add a rs_reserved field to struct gfs2_blkreserv to keep track of the number of blocks reserved by this particular reservation, and a rd_reserved field to struct gfs2_rgrpd to keep track of the total number of reserved blocks in the resource group. Those blocks are exclusively reserved, as opposed to the rs_requested / rd_requested blocks which are tracked in the reservation tree (rd_rstree) and which can be stolen if necessary. When making a reservation with gfs2_inplace_reserve, rs_reserved is set to somewhere between ap->min_target and ap->target depending on the number of free blocks in the resource group. When allocating blocks with gfs2_alloc_blocks, rs_reserved is decremented accordingly. Eventually, any reserved but not consumed blocks are returned to the resource group by gfs2_inplace_release. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 4 +-- fs/gfs2/incore.h | 2 ++ fs/gfs2/lops.c | 1 + fs/gfs2/rgrp.c | 80 +++++++++++++++++++++++++++++++++----------- fs/gfs2/trace_gfs2.h | 23 +++++++++---- 5 files changed, 82 insertions(+), 28 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 177c4d74ca303..294087516ce0e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1115,8 +1115,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t goto out_qunlock; /* check if the selected rgrp limits our max_blks further */ - if (ap.allowed && ap.allowed < max_blks) - max_blks = ap.allowed; + if (ip->i_res.rs_reserved < max_blks) + max_blks = ip->i_res.rs_reserved; /* Almost done. Calculate bytes that can be written using * max_blks. We also recompute max_bytes, data_blocks and diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0640d0c70a75e..2679ba54798c8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -107,6 +107,7 @@ struct gfs2_rgrpd { u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; u32 rd_requested; /* number of blocks in rd_rstree */ + u32 rd_reserved; /* number of reserved blocks */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; @@ -292,6 +293,7 @@ struct gfs2_blkreserv { struct gfs2_rgrpd *rs_rgd; u64 rs_start; u32 rs_requested; + u32 rs_reserved; /* number of reserved blocks */ }; /* diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3922b26264f5a..802bc15f9f11a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -84,6 +84,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes); clear_bit(GBF_FULL, &bi->bi_flags); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_free_clone < rgd->rd_reserved); rgd->rd_extfail_pt = rgd->rd_free; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bc8d1ab9e07f1..f1df5e75364ab 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1229,6 +1229,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgrp_set_bitmap_flags(rgd); rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; } @@ -1278,6 +1279,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); @@ -1568,17 +1570,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u64 goal; struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; - u32 free_blocks = rgd_free(rgd, rs); + u32 free_blocks, blocks_available; int ret; struct inode *inode = &ip->i_inode; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + if (rgd->rd_free_clone < rgd->rd_requested) + free_blocks = 0; + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (rgd == rs->rs_rgd) + blocks_available += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + if (S_ISDIR(inode->i_mode)) extlen = 1; else { extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target); extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks); } - if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen)) + if (free_blocks < extlen || blocks_available < extlen) return; /* Find bitmap block that contains bits for goal block */ @@ -2027,8 +2038,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * We try our best to find an rgrp that has at least ap->target blocks * available. After a couple of passes (loops == 2), the prospects of finding * such an rgrp diminish. At this stage, we return the first rgrp that has - * at least ap->min_target blocks available. Either way, we set ap->allowed to - * the number of blocks available in the chosen rgrp. + * at least ap->min_target blocks available. * * Returns: 0 on success, * -ENOMEM if a suitable rgrp can't be found @@ -2044,7 +2054,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) u64 last_unlinked = NO_BLOCK; u32 target = ap->target; int loops = 0; - u32 free_blocks, skip = 0; + u32 free_blocks, blocks_available, skip = 0; + + BUG_ON(rs->rs_reserved); if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; @@ -2065,6 +2077,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) return -EBADSLT; while (loops < 3) { + struct gfs2_rgrpd *rgd; + rg_locked = 1; if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { @@ -2115,11 +2129,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) goto check_rgrp; /* If rgrp has enough free space, use it */ - free_blocks = rgd_free(rs->rs_rgd, rs); - if (free_blocks >= target) { - ap->allowed = free_blocks; - return 0; + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (free_blocks < target || blocks_available < target) { + spin_unlock(&rgd->rd_rsspin); + goto check_rgrp; } + rs->rs_reserved = ap->target; + if (rs->rs_reserved > blocks_available) + rs->rs_reserved = blocks_available; + rgd->rd_reserved += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + return 0; check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) @@ -2172,6 +2195,17 @@ next_rgrp: void gfs2_inplace_release(struct gfs2_inode *ip) { + struct gfs2_blkreserv *rs = &ip->i_res; + + if (rs->rs_reserved) { + struct gfs2_rgrpd *rgd = rs->rs_rgd; + + spin_lock(&rgd->rd_rsspin); + BUG_ON(rgd->rd_reserved < rs->rs_reserved); + rgd->rd_reserved -= rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + rs->rs_reserved = 0; + } if (gfs2_holder_initialized(&ip->i_rgd_gh)) gfs2_glock_dq_uninit(&ip->i_rgd_gh); } @@ -2259,11 +2293,11 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *trs; const struct rb_node *n; - gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n", fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_requested, rgd->rd_extfail_pt); + rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt); if (rgd->rd_sbd->sd_args.ar_rgrplvb) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; @@ -2310,7 +2344,8 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rbm->rgd; - spin_lock(&rgd->rd_rsspin); + BUG_ON(rs->rs_reserved < len); + rs->rs_reserved -= len; if (gfs2_rs_active(rs)) { u64 start = gfs2_rbm_to_block(rbm); @@ -2324,15 +2359,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && rs->rs_requested) - goto out; + return; /* We used up our block reservation, so we should reserve more blocks next time. */ atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint); } __rs_deltree(rs); } -out: - spin_unlock(&rgd->rd_rsspin); } /** @@ -2386,6 +2419,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, u32 minext = 1; int error = -ENOSPC; + BUG_ON(ip->i_res.rs_reserved < *nblocks); + if (gfs2_rs_active(&ip->i_res)) { gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); @@ -2407,8 +2442,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; - if (gfs2_rs_active(&ip->i_res)) - gfs2_adjust_reservation(ip, &rbm, *nblocks); if (!dinode) { ip->i_goal = block + *nblocks - 1; error = gfs2_meta_inode_buffer(ip, &dibh); @@ -2421,12 +2454,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rbm.rgd->rd_free < *nblocks) { + spin_lock(&rbm.rgd->rd_rsspin); + gfs2_adjust_reservation(ip, &rbm, *nblocks); + if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) { fs_warn(sdp, "nblocks=%u\n", *nblocks); + spin_unlock(&rbm.rgd->rd_rsspin); goto rgrp_error; } - + BUG_ON(rbm.rgd->rd_reserved < *nblocks); + BUG_ON(rbm.rgd->rd_free_clone < *nblocks); + BUG_ON(rbm.rgd->rd_free < *nblocks); + rbm.rgd->rd_reserved -= *nblocks; + rbm.rgd->rd_free_clone -= *nblocks; rbm.rgd->rd_free -= *nblocks; + spin_unlock(&rbm.rgd->rd_rsspin); if (dinode) { rbm.rgd->rd_dinodes++; *generation = rbm.rgd->rd_igeneration++; @@ -2443,7 +2484,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); - rbm.rgd->rd_free_clone -= *nblocks; trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index d24bdcdd42e50..bd6c8e9e49db0 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -561,6 +561,7 @@ TRACE_EVENT(gfs2_block_alloc, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_requested ) + __field( u32, rd_reserved ) ), TP_fast_assign( @@ -572,16 +573,19 @@ TRACE_EVENT(gfs2_block_alloc, __entry->rd_addr = rgd->rd_addr; __entry->rd_free_clone = rgd->rd_free_clone; __entry->rd_requested = rgd->rd_requested; + __entry->rd_reserved = rgd->rd_reserved; ), - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long)__entry->len, block_state_name(__entry->block_state), (unsigned long long)__entry->rd_addr, - __entry->rd_free_clone, (unsigned long)__entry->rd_requested) + __entry->rd_free_clone, + __entry->rd_requested, + __entry->rd_reserved) ); /* Keep track of multi-block reservations as they are allocated/freed */ @@ -596,9 +600,11 @@ TRACE_EVENT(gfs2_rs, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_requested ) + __field( u32, rd_reserved ) __field( u64, inum ) __field( u64, start ) __field( u32, requested ) + __field( u32, reserved ) __field( u8, func ) ), @@ -607,21 +613,26 @@ TRACE_EVENT(gfs2_rs, __entry->rd_addr = rs->rs_rgd->rd_addr; __entry->rd_free_clone = rs->rs_rgd->rd_free_clone; __entry->rd_requested = rs->rs_rgd->rd_requested; + __entry->rd_reserved = rs->rs_rgd->rd_reserved; __entry->inum = container_of(rs, struct gfs2_inode, i_res)->i_no_addr; __entry->start = rs->rs_start; __entry->requested = rs->rs_requested; + __entry->reserved = rs->rs_reserved; __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long long)__entry->rd_addr, - (unsigned long)__entry->rd_free_clone, - (unsigned long)__entry->rd_requested, - rs_func_name(__entry->func), (unsigned long)__entry->requested) + __entry->rd_free_clone, + __entry->rd_requested, + __entry->rd_reserved, + rs_func_name(__entry->func), + __entry->requested, + __entry->reserved) ); #endif /* _TRACE_GFS2_H */ -- GitLab From 9e514605c77451745ea9fca5a26fc3153893686a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 8 Feb 2021 20:44:26 +0100 Subject: [PATCH 1889/1894] gfs2: Add local resource group locking Prepare for treating resource group glocks as exclusive among nodes but shared among all tasks running on a node: introduce another layer of node-specific locking that the local tasks can use to coordinate their accesses. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 2 ++ fs/gfs2/lops.c | 6 +++++- fs/gfs2/rgrp.c | 54 ++++++++++++++++++++++++++++++++++++++++++------ fs/gfs2/rgrp.h | 4 ++++ 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 2679ba54798c8..92f5a920ce61d 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -20,6 +20,7 @@ #include #include #include +#include #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -123,6 +124,7 @@ struct gfs2_rgrpd { #define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ + struct mutex rd_mutex; struct rb_root rd_rstree; /* multi-block reservation tree */ }; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 802bc15f9f11a..dffa6cc8b5b83 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd) unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; + rgrp_lock_local(rgd); if (bi->bi_clone == NULL) - return; + goto out; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); memcpy(bi->bi_clone + bi->bi_offset, @@ -86,6 +87,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd) rgd->rd_free_clone = rgd->rd_free; BUG_ON(rgd->rd_free_clone < rgd->rd_reserved); rgd->rd_extfail_pt = rgd->rd_free; + +out: + rgrp_unlock_local(rgd); } /** diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f1df5e75364ab..f45b787307d02 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -920,6 +920,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); spin_lock_init(&rgd->rd_rsspin); + mutex_init(&rgd->rd_mutex); error = compute_bitstructs(rgd); if (error) @@ -1449,9 +1450,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x < rgd->rd_length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; + rgrp_lock_local(rgd); ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, minlen, &amt); + rgrp_unlock_local(rgd); if (ret) { gfs2_glock_dq_uninit(&gh); goto out; @@ -1463,9 +1466,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); if (ret == 0) { bh = rgd->rd_bits[0].bi_bh; + rgrp_lock_local(rgd); rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); + rgrp_unlock_local(rgd); gfs2_trans_end(sdp); } } @@ -2050,7 +2055,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = &ip->i_res; - int error = 0, rg_locked, flags = 0; + int error = 0, flags = 0; + bool rg_locked; u64 last_unlinked = NO_BLOCK; u32 target = ap->target; int loops = 0; @@ -2079,10 +2085,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) while (loops < 3) { struct gfs2_rgrpd *rgd; - rg_locked = 1; - - if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { - rg_locked = 0; + rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl); + if (rg_locked) { + rgrp_lock_local(rs->rs_rgd); + } else { if (skip && skip--) goto next_rgrp; if (!gfs2_rs_active(rs)) { @@ -2099,12 +2105,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) &ip->i_rgd_gh); if (unlikely(error)) return error; + rgrp_lock_local(rs->rs_rgd); if (!gfs2_rs_active(rs) && (loops < 2) && gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rgd); if (unlikely(error)) { + rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); return error; } @@ -2142,6 +2150,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) rs->rs_reserved = blocks_available; rgd->rd_reserved += rs->rs_reserved; spin_unlock(&rgd->rd_rsspin); + rgrp_unlock_local(rs->rs_rgd); return 0; check_rgrp: /* Check for unlinked inodes which can be reclaimed */ @@ -2149,6 +2158,8 @@ check_rgrp: try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: + rgrp_unlock_local(rs->rs_rgd); + /* Drop reservation, if we couldn't use reserved rgrp */ if (gfs2_rs_active(rs)) gfs2_rs_deltree(rs); @@ -2293,6 +2304,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *trs; const struct rb_node *n; + spin_lock(&rgd->rd_rsspin); gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n", fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, @@ -2306,7 +2318,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, be32_to_cpu(rgl->rl_free), be32_to_cpu(rgl->rl_dinodes)); } - spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); dump_rs(seq, trs, fs_id_buf); @@ -2421,6 +2432,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, BUG_ON(ip->i_res.rs_reserved < *nblocks); + rgrp_lock_local(rbm.rgd); if (gfs2_rs_active(&ip->i_res)) { gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); @@ -2477,6 +2489,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rbm.rgd); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2490,6 +2503,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, return 0; rgrp_error: + rgrp_unlock_local(rbm.rgd); gfs2_rgrp_error(rbm.rgd); return -EIO; } @@ -2509,12 +2523,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE); trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth || gfs2_is_jdata(ip)) @@ -2550,17 +2566,20 @@ void gfs2_unlink_di(struct inode *inode) rgd = gfs2_blk2rgrpd(sdp, blkno, true); if (!rgd) return; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED); trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); + rgrp_unlock_local(rgd); } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = rgd->rd_sbd; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); if (!rgd->rd_dinodes) gfs2_consist_rgrpd(rgd); @@ -2569,6 +2588,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); gfs2_statfs_change(sdp, 0, +1, -1); @@ -2583,6 +2603,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) * @no_addr: The block number to check * @type: The block type we are looking for * + * The inode glock of @no_addr must be held. The @type to check for is either + * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE + * or GFS2_BLKST_USED would make no sense. + * * Returns: 0 if the block type matches the expected type * -ESTALE if it doesn't match * or -ve errno if something went wrong while checking @@ -2606,6 +2630,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) rbm.rgd = rgd; error = gfs2_rbm_from_block(&rbm, no_addr); if (!WARN_ON_ONCE(error)) { + /* + * No need to take the local resource group lock here; the + * inode glock of @no_addr provides the necessary + * synchronization in case the block is an inode. (In case + * the block is not an inode, the block type will not match + * the @type we are looking for.) + */ if (gfs2_testbit(&rbm, false) != type) error = -ESTALE; } @@ -2730,3 +2761,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) } } +void rgrp_lock_local(struct gfs2_rgrpd *rgd) +{ + BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) && + !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); + mutex_lock(&rgd->rd_mutex); +} + +void rgrp_unlock_local(struct gfs2_rgrpd *rgd) +{ + mutex_unlock(&rgd->rd_mutex); +} diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index be1b2034f5ee4..a6855fd796e03 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -88,4 +88,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) } extern void check_and_update_goal(struct gfs2_inode *ip); + +extern void rgrp_lock_local(struct gfs2_rgrpd *rgd); +extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd); + #endif /* __RGRP_DOT_H__ */ -- GitLab From 06e908cd9ead1f215cc30132aac5ce132a352fbe Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 18 Apr 2018 13:58:19 -0700 Subject: [PATCH 1890/1894] gfs2: Allow node-wide exclusive glock sharing Introduce a new LM_FLAG_NODE_SCOPE glock holder flag: when taking a glock in LM_ST_EXCLUSIVE (EX) mode and with the LM_FLAG_NODE_SCOPE flag set, the exclusive lock is shared among all local processes who are holding the glock in EX mode and have the LM_FLAG_NODE_SCOPE flag set. From the point of view of other nodes, the lock is still held exclusively. A future patch will start using this flag to improve performance with rgrp sharing. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 22 +++++++++++++++++++--- fs/gfs2/glock.h | 6 ++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d87a5bc3607b7..9567520d79f79 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -313,9 +313,23 @@ void gfs2_glock_put(struct gfs2_glock *gl) static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) { const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list); - if ((gh->gh_state == LM_ST_EXCLUSIVE || - gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) - return 0; + + if (gh != gh_head) { + /** + * Here we make a special exception to grant holders who agree + * to share the EX lock with other holders who also have the + * bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit + * is set, we grant more holders with the bit set. + */ + if (gh_head->gh_state == LM_ST_EXCLUSIVE && + (gh_head->gh_flags & LM_FLAG_NODE_SCOPE) && + gh->gh_state == LM_ST_EXCLUSIVE && + (gh->gh_flags & LM_FLAG_NODE_SCOPE)) + return 1; + if ((gh->gh_state == LM_ST_EXCLUSIVE || + gh_head->gh_state == LM_ST_EXCLUSIVE)) + return 0; + } if (gl->gl_state == gh->gh_state) return 1; if (gh->gh_flags & GL_EXACT) @@ -2030,6 +2044,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) *p++ = 'A'; if (flags & LM_FLAG_PRIORITY) *p++ = 'p'; + if (flags & LM_FLAG_NODE_SCOPE) + *p++ = 'n'; if (flags & GL_ASYNC) *p++ = 'a'; if (flags & GL_EXACT) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 53813364517bd..31a8f2f649b52 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -75,6 +75,11 @@ enum { * request and directly join the other shared lock. A shared lock request * without the priority flag might be forced to wait until the deferred * requested had acquired and released the lock. + * + * LM_FLAG_NODE_SCOPE + * This holder agrees to share the lock within this node. In other words, + * the glock is held in EX mode according to DLM, but local holders on the + * same node can share it. */ #define LM_FLAG_TRY 0x0001 @@ -82,6 +87,7 @@ enum { #define LM_FLAG_NOEXP 0x0004 #define LM_FLAG_ANY 0x0008 #define LM_FLAG_PRIORITY 0x0010 +#define LM_FLAG_NODE_SCOPE 0x0020 #define GL_ASYNC 0x0040 #define GL_EXACT 0x0080 #define GL_SKIP 0x0100 -- GitLab From 4fc7ec31c3c44031e8a8872bb8432cf4f6cb6ddd Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 24 Apr 2018 10:35:02 -0700 Subject: [PATCH 1891/1894] gfs2: Use resource group glock sharing This patch takes advantage of the new glock holder sharing feature for resource groups. We have already introduced local resource group locking in a previous patch, so competing accesses of local processes are already under control. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 2 +- fs/gfs2/inode.c | 6 +++--- fs/gfs2/rgrp.c | 10 +++++----- fs/gfs2/super.c | 3 ++- fs/gfs2/xattr.c | 6 ++++-- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e2ee3703f7df9..cf6ccdd005871 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1538,7 +1538,7 @@ more_rgrps: goto out; } ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - 0, rd_gh); + LM_FLAG_NODE_SCOPE, rd_gh); if (ret) goto out; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c1b77e8d6b1c3..4d3d8624a95be 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1145,7 +1145,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) if (!rgd) goto out_inodes; - gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, ghs + 2); error = gfs2_glock_nq(ghs); /* parent */ @@ -1450,8 +1450,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = -ENOENT; goto out_gunlock; } - error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, - &rd_gh); + error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &rd_gh); if (error) goto out_gunlock; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f45b787307d02..89c37a845e649 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1442,7 +1442,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) while (1) { - ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); if (ret) goto out; @@ -2055,7 +2056,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = &ip->i_res; - int error = 0, flags = 0; + int error = 0, flags = LM_FLAG_NODE_SCOPE; bool rg_locked; u64 last_unlinked = NO_BLOCK; u32 target = ap->target; @@ -2736,9 +2737,8 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist) sizeof(struct gfs2_holder), GFP_NOFS | __GFP_NOFAIL); for (x = 0; x < rlist->rl_rgrps; x++) - gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, - LM_ST_EXCLUSIVE, 0, - &rlist->rl_ghs[x]); + gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &rlist->rl_ghs[x]); } /** diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2f56acc41c049..e138a2d15c776 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1229,7 +1229,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) goto out_qs; } - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); if (error) goto out_qs; diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 9d7667bc42927..d332bf31f0aee 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -259,7 +259,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, return -EIO; } - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &rg_gh); if (error) return error; @@ -1385,7 +1386,8 @@ static int ea_dealloc_block(struct gfs2_inode *ip) return -EIO; } - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); if (error) return error; -- GitLab From 71b219f4e50b12efffbc8107408e17904f9c47e6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 29 Jan 2021 19:56:37 +0100 Subject: [PATCH 1892/1894] gfs2: Minor calc_reserved cleanup No functional change. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6fd4ded1e357c..0c28ddd0e4103 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -546,17 +546,14 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer static unsigned int calc_reserved(struct gfs2_sbd *sdp) { unsigned int reserved = 0; - unsigned int mbuf; - unsigned int dbuf; + unsigned int blocks; struct gfs2_trans *tr = sdp->sd_log_tr; if (tr) { - mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; - dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; - reserved = mbuf + dbuf; - /* Account for header blocks */ - reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); - reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); + blocks = tr->tr_num_buf_new - tr->tr_num_buf_rm; + reserved += blocks + DIV_ROUND_UP(blocks, buf_limit(sdp)); + blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; + reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp)); } if (sdp->sd_log_committed_revoke > 0) -- GitLab From fe3e397668775e20ad0962459733158838b926af Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 10 Dec 2020 12:49:56 +0100 Subject: [PATCH 1893/1894] gfs2: Rework the log space allocation logic The current log space allocation logic is hard to understand or extend. The principle it that when the log is flushed, we may or may not have a transaction active that has space allocated in the log. To deal with that, we set aside a magical number of blocks to be used in case we don't have an active transaction. It isn't clear that the pool will always be big enough. In addition, we can't return unused log space at the end of a transaction, so the number of blocks allocated must exactly match the number of blocks used. Simplify this as follows: * When transactions are allocated or merged, always reserve enough blocks to flush the transaction (err on the safe side). * In gfs2_log_flush, return any allocated blocks that haven't been used. * Maintain a pool of spare blocks big enough to do one log flush, as before. * In gfs2_log_flush, when we have no active transaction, allocate a suitable number of blocks. For that, use the spare pool when called from logd, and leave the pool alone otherwise. This means that when the log is almost full, logd will still be able to do one more log flush, which will result in more log space becoming available. This will make the log space allocator code easier to work with in the future. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 162 ++++++++++++++++++++++++++++-------------------- fs/gfs2/log.h | 7 +++ fs/gfs2/trans.c | 4 +- 3 files changed, 104 insertions(+), 69 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0c28ddd0e4103..913150e60f525 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -447,15 +447,42 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) } /** - * gfs2_log_reserve - Make a log reservation + * __gfs2_log_try_reserve - Try to make a log reservation + * @sdp: The GFS2 superblock + * @blks: The number of blocks to reserve + * @taboo_blks: The number of blocks to leave free + * + * Try to do the same as __gfs2_log_reserve(), but fail if no more log + * space is immediately available. + */ +static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks, + unsigned int taboo_blks) +{ + unsigned wanted = blks + taboo_blks; + unsigned int free_blocks; + + free_blocks = atomic_read(&sdp->sd_log_blks_free); + while (free_blocks >= wanted) { + if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks, + free_blocks - blks)) { + trace_gfs2_log_blocks(sdp, -blks); + return true; + } + } + return false; +} + +/** + * __gfs2_log_reserve - Make a log reservation * @sdp: The GFS2 superblock * @blks: The number of blocks to reserve + * @taboo_blks: The number of blocks to leave free * - * Note that we never give out the last few blocks of the journal. Thats - * due to the fact that there is a small number of header blocks - * associated with each log flush. The exact number can't be known until - * flush time, so we ensure that we have just enough free blocks at all - * times to avoid running out during a log flush. + * @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS + * for all other processes. This ensures that when the log is almost full, + * logd will still be able to call gfs2_log_flush one more time without + * blocking, which will advance the tail and make some more log space + * available. * * We no longer flush the log here, instead we wake up logd to do that * for us. To avoid the thundering herd and to ensure that we deal fairly @@ -464,19 +491,12 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) * wake the next waiter on the list. */ -void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) +static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks, + unsigned int taboo_blks) { - unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); - unsigned wanted = blks + reserved_blks; + unsigned wanted = blks + taboo_blks; unsigned int free_blocks; - free_blocks = atomic_read(&sdp->sd_log_blks_free); - while (free_blocks >= wanted) { - if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks, - free_blocks - blks)) - return; - } - atomic_add(blks, &sdp->sd_log_blks_needed); for (;;) { if (current != sdp->sd_logd_process) @@ -498,6 +518,19 @@ reserved: wake_up(&sdp->sd_log_waitq); } +/** + * gfs2_log_reserve - Make a log reservation + * @sdp: The GFS2 superblock + * @blks: The number of blocks to reserve + */ + +void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) +{ + if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS)) + return; + __gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS); +} + /** * log_distance - Compute distance between two journal blocks * @sdp: The GFS2 superblock @@ -545,7 +578,7 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer */ static unsigned int calc_reserved(struct gfs2_sbd *sdp) { - unsigned int reserved = 0; + unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS; unsigned int blocks; struct gfs2_trans *tr = sdp->sd_log_tr; @@ -557,10 +590,7 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp) } if (sdp->sd_log_committed_revoke > 0) - reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke); - /* One for the overall header */ - if (reserved) - reserved++; + reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke) - 1; return reserved; } @@ -708,29 +738,8 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp) max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs, sdp->sd_inptrs); max_revokes -= sdp->sd_log_num_revoke; - if (!sdp->sd_log_num_revoke) { - atomic_dec(&sdp->sd_log_blks_free); - /* If no blocks have been reserved, we need to also - * reserve a block for the header */ - if (!sdp->sd_log_blks_reserved) { - atomic_dec(&sdp->sd_log_blks_free); - trace_gfs2_log_blocks(sdp, -2); - } else { - trace_gfs2_log_blocks(sdp, -1); - } - } gfs2_ail1_empty(sdp, max_revokes); gfs2_log_unlock(sdp); - - if (!sdp->sd_log_num_revoke) { - atomic_inc(&sdp->sd_log_blks_free); - if (!sdp->sd_log_blks_reserved) { - atomic_inc(&sdp->sd_log_blks_free); - trace_gfs2_log_blocks(sdp, 2); - } else { - trace_gfs2_log_blocks(sdp, 1); - } - } } /** @@ -843,6 +852,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) gfs2_log_incr_head(sdp); log_flush_wait(sdp); log_pull_tail(sdp); + gfs2_log_update_head(sdp); } /** @@ -942,10 +952,14 @@ static void trans_drain(struct gfs2_trans *tr) void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) { struct gfs2_trans *tr = NULL; + unsigned int reserved_blocks = 0, used_blocks = 0; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); + unsigned int first_log_head; down_write(&sdp->sd_log_flush_lock); + trace_gfs2_log_flush(sdp, 1, flags); +repeat: /* * Do this check while holding the log_flush_lock to prevent new * buffers from being added to the ail via gfs2_pin() @@ -956,22 +970,41 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) /* Log might have been flushed while we waited for the flush lock */ if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) goto out; - trace_gfs2_log_flush(sdp, 1, flags); - if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + first_log_head = sdp->sd_log_head; + sdp->sd_log_flush_head = first_log_head; - sdp->sd_log_flush_head = sdp->sd_log_head; tr = sdp->sd_log_tr; - if (tr) { - sdp->sd_log_tr = NULL; - tr->tr_first = sdp->sd_log_flush_head; - if (unlikely (state == SFS_FROZEN)) - if (gfs2_assert_withdraw_delayed(sdp, - !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) - goto out_withdraw; + if (tr || sdp->sd_log_num_revoke) { + if (reserved_blocks) + gfs2_log_release(sdp, reserved_blocks); + reserved_blocks = sdp->sd_log_blks_reserved; + if (tr) { + sdp->sd_log_tr = NULL; + tr->tr_first = first_log_head; + if (unlikely (state == SFS_FROZEN)) + if (gfs2_assert_withdraw_delayed(sdp, + !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) + goto out_withdraw; + } + } else if (!reserved_blocks) { + unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS; + + reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS; + if (current == sdp->sd_logd_process) + taboo_blocks = 0; + + if (!__gfs2_log_try_reserve(sdp, reserved_blocks, taboo_blocks)) { + up_write(&sdp->sd_log_flush_lock); + __gfs2_log_reserve(sdp, reserved_blocks, taboo_blocks); + down_write(&sdp->sd_log_flush_lock); + goto repeat; + } } + if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + if (unlikely(state == SFS_FROZEN)) if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke)) goto out_withdraw; @@ -993,8 +1026,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) log_flush_wait(sdp); log_write_header(sdp, flags); } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) { - atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ - trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); } if (gfs2_withdrawn(sdp)) @@ -1002,7 +1033,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) lops_after_commit(sdp, tr); gfs2_log_lock(sdp); - gfs2_log_update_head(sdp); sdp->sd_log_blks_reserved = 0; sdp->sd_log_committed_revoke = 0; @@ -1019,10 +1049,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) empty_ail1_list(sdp); if (gfs2_withdrawn(sdp)) goto out_withdraw; - atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ - trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); - gfs2_log_update_head(sdp); } if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN | GFS2_LOG_HEAD_FLUSH_FREEZE)) @@ -1032,12 +1059,17 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) } out_end: - trace_gfs2_log_flush(sdp, 0, flags); + used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head); + if (gfs2_assert_withdraw_delayed(sdp, used_blocks <= reserved_blocks)) + goto out; out: + if (used_blocks != reserved_blocks) + gfs2_log_release(sdp, reserved_blocks - used_blocks); up_write(&sdp->sd_log_flush_lock); gfs2_trans_free(sdp, tr); if (gfs2_withdrawing(sdp)) gfs2_withdraw(sdp); + trace_gfs2_log_flush(sdp, 0, flags); return; out_withdraw: @@ -1150,15 +1182,11 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); - sdp->sd_log_flush_head = sdp->sd_log_head; - log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN); + log_pull_tail(sdp); gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); - - gfs2_log_update_head(sdp); - sdp->sd_log_tail = sdp->sd_log_head; } static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) @@ -1213,7 +1241,7 @@ int gfs2_logd(void *data) if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | - GFS2_LFC_LOGD_JFLUSH_REQD); + GFS2_LFC_LOGD_JFLUSH_REQD); } if (gfs2_ail_flush_reqd(sdp)) { @@ -1221,7 +1249,7 @@ int gfs2_logd(void *data) gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | - GFS2_LFC_LOGD_AIL_FLUSH_REQD); + GFS2_LFC_LOGD_AIL_FLUSH_REQD); } t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index b36a3539f3524..e7f4a8d6be643 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -13,6 +13,13 @@ #include "incore.h" #include "inode.h" +/* + * The minimum amount of log space required for a log flush is one block for + * revokes and one block for the log header. Log flushes other than + * GFS2_LOG_HEAD_FLUSH_NORMAL may write one or two more log headers. + */ +#define GFS2_LOG_FLUSH_MIN_BLOCKS 4 + /** * gfs2_log_lock - acquire the right to mess with the log manager * @sdp: the filesystem diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index f73d6b8f3b53b..231ca1a41a736 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -53,7 +53,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, tr->tr_ip = ip; tr->tr_blocks = blocks; tr->tr_revokes = revokes; - tr->tr_reserved = 1; + tr->tr_reserved = GFS2_LOG_FLUSH_MIN_BLOCKS; if (blocks) { /* * The reserved blocks are either used for data or metadata. @@ -63,7 +63,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp)); } if (revokes) - tr->tr_reserved += gfs2_struct2blk(sdp, revokes); + tr->tr_reserved += gfs2_struct2blk(sdp, revokes) - 1; INIT_LIST_HEAD(&tr->tr_databuf); INIT_LIST_HEAD(&tr->tr_buf); INIT_LIST_HEAD(&tr->tr_list); -- GitLab From 2129b4288852cf872c42870c7f6e813ce0611199 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Dec 2020 16:14:30 +0100 Subject: [PATCH 1894/1894] gfs2: Per-revoke accounting in transactions In the log, revokes are stored as a revoke descriptor (struct gfs2_log_descriptor), followed by zero or more additional revoke blocks (struct gfs2_meta_header). On filesystems with a blocksize of 4k, the revoke descriptor contains up to 503 revokes, and the metadata blocks contain up to 509 revokes each. We've so far been reserving space for revokes in transactions in block granularity, so a lot more space than necessary was being allocated and then released again. This patch switches to assigning revokes to transactions individually instead. Initially, space for the revoke descriptor is reserved and handed out to transactions. When more revokes than that are reserved, additional revoke blocks are added. When the log is flushed, the space for the additional revoke blocks is released, but we keep the space for the revoke descriptor block allocated. Transactions may still reserve more revokes than they will actually need in the end, but now we won't overshoot the target as much, and by only returning the space for excess revokes at log flush time, we further reduce the amount of contention between processes. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 8 +-- fs/gfs2/incore.h | 3 +- fs/gfs2/log.c | 114 ++++++++++++++++++++++++++++++++++--------- fs/gfs2/log.h | 6 ++- fs/gfs2/lops.c | 1 - fs/gfs2/ops_fstype.c | 7 +++ fs/gfs2/trans.c | 34 ++++++++++--- 7 files changed, 131 insertions(+), 42 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index a067924341e3f..8e32d569c8bf0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -136,19 +136,15 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned int revokes = atomic_read(&gl->gl_ail_count); - unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); int ret; if (!revokes) return; - while (revokes > max_revokes) - max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); - - ret = gfs2_trans_begin(sdp, 0, max_revokes); + ret = gfs2_trans_begin(sdp, 0, revokes); if (ret) return; - __gfs2_ail_flush(gl, fsync, max_revokes); + __gfs2_ail_flush(gl, fsync, revokes); gfs2_trans_end(sdp); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_AIL_FLUSH); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 51656b0531706..30f67c3c87b04 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -506,7 +506,6 @@ struct gfs2_trans { unsigned int tr_num_buf_rm; unsigned int tr_num_databuf_rm; unsigned int tr_num_revoke; - unsigned int tr_num_revoke_rm; struct list_head tr_list; struct list_head tr_databuf; @@ -821,7 +820,6 @@ struct gfs2_sbd { struct gfs2_trans *sd_log_tr; unsigned int sd_log_blks_reserved; - int sd_log_committed_revoke; atomic_t sd_log_pinned; unsigned int sd_log_num_revoke; @@ -834,6 +832,7 @@ struct gfs2_sbd { atomic_t sd_log_thresh2; atomic_t sd_log_blks_free; atomic_t sd_log_blks_needed; + atomic_t sd_log_revokes_available; wait_queue_head_t sd_log_waitq; wait_queue_head_t sd_logd_waitq; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 913150e60f525..68e7afdd19a87 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -429,6 +429,32 @@ bool gfs2_log_is_empty(struct gfs2_sbd *sdp) { return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks; } +static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes) +{ + unsigned int available; + + available = atomic_read(&sdp->sd_log_revokes_available); + while (available >= revokes) { + if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available, + &available, available - revokes)) + return true; + } + return false; +} + +/** + * gfs2_log_release_revokes - Release a given number of revokes + * @sdp: The GFS2 superblock + * @revokes: The number of revokes to release + * + * sdp->sd_log_flush_lock must be held. + */ +void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes) +{ + if (revokes) + atomic_add(revokes, &sdp->sd_log_revokes_available); +} + /** * gfs2_log_release - Release a given number of log blocks * @sdp: The GFS2 superblock @@ -519,15 +545,59 @@ reserved: } /** - * gfs2_log_reserve - Make a log reservation + * gfs2_log_try_reserve - Try to make a log reservation * @sdp: The GFS2 superblock - * @blks: The number of blocks to reserve + * @tr: The transaction + * @extra_revokes: The number of additional revokes reserved (output) + * + * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be + * held for correct revoke accounting. */ -void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) +bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes) { + unsigned int blks = tr->tr_reserved; + unsigned int revokes = tr->tr_revokes; + unsigned int revoke_blks = 0; + + *extra_revokes = 0; + if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) { + revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs); + *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes; + blks += revoke_blks; + } + if (!blks) + return true; if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS)) - return; + return true; + if (!revoke_blks) + gfs2_log_release_revokes(sdp, revokes); + return false; +} + +/** + * gfs2_log_reserve - Make a log reservation + * @sdp: The GFS2 superblock + * @tr: The transaction + * @extra_revokes: The number of additional revokes reserved (output) + * + * sdp->sd_log_flush_lock must not be held. + */ + +void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes) +{ + unsigned int blks = tr->tr_reserved; + unsigned int revokes = tr->tr_revokes; + unsigned int revoke_blks = 0; + + *extra_revokes = 0; + if (revokes) { + revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs); + *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes; + blks += revoke_blks; + } __gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS); } @@ -588,9 +658,6 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp) blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp)); } - - if (sdp->sd_log_committed_revoke > 0) - reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke) - 1; return reserved; } @@ -730,14 +797,9 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) void gfs2_flush_revokes(struct gfs2_sbd *sdp) { /* number of revokes we still have room for */ - unsigned int max_revokes; + unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available); gfs2_log_lock(sdp); - max_revokes = sdp->sd_ldptrs; - if (sdp->sd_log_num_revoke > sdp->sd_ldptrs) - max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs, - sdp->sd_inptrs); - max_revokes -= sdp->sd_log_num_revoke; gfs2_ail1_empty(sdp, max_revokes); gfs2_log_unlock(sdp); } @@ -955,6 +1017,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) unsigned int reserved_blocks = 0, used_blocks = 0; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); unsigned int first_log_head; + unsigned int reserved_revokes = 0; down_write(&sdp->sd_log_flush_lock); trace_gfs2_log_flush(sdp, 1, flags); @@ -979,13 +1042,15 @@ repeat: if (reserved_blocks) gfs2_log_release(sdp, reserved_blocks); reserved_blocks = sdp->sd_log_blks_reserved; + reserved_revokes = sdp->sd_log_num_revoke; if (tr) { sdp->sd_log_tr = NULL; tr->tr_first = first_log_head; - if (unlikely (state == SFS_FROZEN)) + if (unlikely (state == SFS_FROZEN)) { if (gfs2_assert_withdraw_delayed(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) goto out_withdraw; + } } } else if (!reserved_blocks) { unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS; @@ -1000,17 +1065,15 @@ repeat: down_write(&sdp->sd_log_flush_lock); goto repeat; } + BUG_ON(sdp->sd_log_num_revoke); } if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); if (unlikely(state == SFS_FROZEN)) - if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke)) + if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes)) goto out_withdraw; - if (gfs2_assert_withdraw_delayed(sdp, - sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke)) - goto out_withdraw; gfs2_ordered_write(sdp); if (gfs2_withdrawn(sdp)) @@ -1034,7 +1097,6 @@ repeat: gfs2_log_lock(sdp); sdp->sd_log_blks_reserved = 0; - sdp->sd_log_committed_revoke = 0; spin_lock(&sdp->sd_ail_lock); if (tr && !list_empty(&tr->tr_ail1_list)) { @@ -1060,11 +1122,16 @@ repeat: out_end: used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head); - if (gfs2_assert_withdraw_delayed(sdp, used_blocks <= reserved_blocks)) - goto out; + reserved_revokes += atomic_read(&sdp->sd_log_revokes_available); + atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs); + gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs); + if (reserved_revokes > sdp->sd_ldptrs) + reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs; out: - if (used_blocks != reserved_blocks) + if (used_blocks != reserved_blocks) { + gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks); gfs2_log_release(sdp, reserved_blocks - used_blocks); + } up_write(&sdp->sd_log_flush_lock); gfs2_trans_free(sdp, tr); if (gfs2_withdrawing(sdp)) @@ -1105,8 +1172,8 @@ static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new) old->tr_num_databuf_new += new->tr_num_databuf_new; old->tr_num_buf_rm += new->tr_num_buf_rm; old->tr_num_databuf_rm += new->tr_num_databuf_rm; + old->tr_revokes += new->tr_revokes; old->tr_num_revoke += new->tr_num_revoke; - old->tr_num_revoke_rm += new->tr_num_revoke_rm; list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); list_splice_tail_init(&new->tr_buf, &old->tr_buf); @@ -1133,7 +1200,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) set_bit(TR_ATTACHED, &tr->tr_flags); } - sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; reserved = calc_reserved(sdp); maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; gfs2_assert_withdraw(sdp, maxres >= reserved); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index e7f4a8d6be643..eea58015710e7 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -74,8 +74,12 @@ extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp); +extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes); extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); -extern void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); +extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); +extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, int op_flags); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 8658ebbcb4a90..dfe8537cb88eb 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -857,7 +857,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - gfs2_log_write_page(sdp, page); page = mempool_alloc(gfs2_page_pool, GFP_NOIO); mh = page_address(page); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 986dc2ebebf0d..316ca5fc99e89 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -315,6 +315,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) sizeof(struct gfs2_meta_header)) * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */ + /* + * We always keep at least one block reserved for revokes in + * transactions. This greatly simplifies allocating additional + * revoke blocks. + */ + atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs); + /* Compute maximum reservation required to add a entry to a directory */ hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH), diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 231ca1a41a736..ab96cf0bf26be 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -31,16 +31,18 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr) fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n", tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, test_bit(TR_TOUCHED, &tr->tr_flags)); - fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n", tr->tr_num_buf_new, tr->tr_num_buf_rm, tr->tr_num_databuf_new, tr->tr_num_databuf_rm, - tr->tr_num_revoke, tr->tr_num_revoke_rm); + tr->tr_num_revoke); } int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, unsigned int blocks, unsigned int revokes, unsigned long ip) { + unsigned int extra_revokes; + if (current->journal_info) { gfs2_print_trans(sdp, current->journal_info); BUG(); @@ -62,8 +64,6 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, */ tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp)); } - if (revokes) - tr->tr_reserved += gfs2_struct2blk(sdp, revokes) - 1; INIT_LIST_HEAD(&tr->tr_databuf); INIT_LIST_HEAD(&tr->tr_buf); INIT_LIST_HEAD(&tr->tr_list); @@ -75,10 +75,26 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, sb_start_intwrite(sdp->sd_vfs); - gfs2_log_reserve(sdp, tr->tr_reserved); + /* + * Try the reservations under sd_log_flush_lock to prevent log flushes + * from creating inconsistencies between the number of allocated and + * reserved revokes. If that fails, do a full-block allocation outside + * of the lock to avoid stalling log flushes. Then, allot the + * appropriate number of blocks to revokes, use as many revokes locally + * as needed, and "release" the surplus into the revokes pool. + */ down_read(&sdp->sd_log_flush_lock); + if (gfs2_log_try_reserve(sdp, tr, &extra_revokes)) + goto reserved; + up_read(&sdp->sd_log_flush_lock); + gfs2_log_reserve(sdp, tr, &extra_revokes); + down_read(&sdp->sd_log_flush_lock); + +reserved: + gfs2_log_release_revokes(sdp, extra_revokes); if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + gfs2_log_release_revokes(sdp, tr->tr_revokes); up_read(&sdp->sd_log_flush_lock); gfs2_log_release(sdp, tr->tr_reserved); sb_end_intwrite(sdp->sd_vfs); @@ -113,14 +129,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) current->journal_info = NULL; if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { - gfs2_log_release(sdp, tr->tr_reserved); + gfs2_log_release_revokes(sdp, tr->tr_revokes); up_read(&sdp->sd_log_flush_lock); + gfs2_log_release(sdp, tr->tr_reserved); if (!test_bit(TR_ONSTACK, &tr->tr_flags)) gfs2_trans_free(sdp, tr); sb_end_intwrite(sdp->sd_vfs); return; } + gfs2_log_release_revokes(sdp, tr->tr_revokes - tr->tr_num_revoke); + nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new; nbuf -= tr->tr_num_buf_rm; nbuf -= tr->tr_num_databuf_rm; @@ -278,7 +297,6 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) { struct gfs2_bufdata *bd, *tmp; - struct gfs2_trans *tr = current->journal_info; unsigned int n = len; gfs2_log_lock(sdp); @@ -290,7 +308,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) if (bd->bd_gl) gfs2_glock_remove_revoke(bd->bd_gl); kmem_cache_free(gfs2_bufdata_cachep, bd); - tr->tr_num_revoke_rm++; + gfs2_log_release_revokes(sdp, 1); if (--n == 0) break; } -- GitLab