diff --git a/.mailmap b/.mailmap index 825fae8e6b7bd56ce75561ef54a66110c7f1b8f7..13e4f504e17fbb7f91d6f87ae5589b989157ee73 100644 --- a/.mailmap +++ b/.mailmap @@ -10,6 +10,8 @@ # Please keep this list dictionary sorted. # Aaron Durbin +Abel Vesa +Abel Vesa Abhinav Kumar Adam Oldham Adam Radford @@ -62,6 +64,9 @@ Bart Van Assche Bart Van Assche Ben Gardner Ben M Cahill +Ben Widawsky +Ben Widawsky +Ben Widawsky Björn Steinbrink Björn Töpel Björn Töpel @@ -85,6 +90,7 @@ Christian Borntraeger Christian Brauner Christian Brauner Christian Brauner +Christian Marangi Christophe Ricard Christoph Hellwig Colin Ian King @@ -165,6 +171,7 @@ Jan Glauber Jan Glauber Jan Glauber Jarkko Sakkinen +Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe diff --git a/CREDITS b/CREDITS index 7e85a53b6a880942e9379125bae8ad4c9a08f13b..40d3c655b567a0b914fd9809dabcb8cdf5cc0f3c 100644 --- a/CREDITS +++ b/CREDITS @@ -627,6 +627,10 @@ S: 48287 Sawleaf S: Fremont, California 94539 S: USA +N: Tomas Cech +E: sleep_walker@suse.com +D: arm/palm treo support + N: Florent Chabaud E: florent.chabaud@polytechnique.org D: software suspend diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610 index 308a6756d3bf3fab4812b2edb735b5978ccc4e5a..491ead804488814b493d423949ea642006b50ffd 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-vf610 +++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610 @@ -1,4 +1,4 @@ -What: /sys/bus/iio/devices/iio:deviceX/conversion_mode +What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode KernelVersion: 4.2 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 2ad01cad7f1c8286530a7a959aaf655b990a6c98..bcc974d276dc4521367b3a12038526f1cc49e166 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -526,6 +526,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 8cbc711cda9359a9e99cc62f51a02e46fb65b0d0..4df436e7c4177996cb5a7f02b0dfc136c7d41d21 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -17,3 +17,4 @@ are configurable at compile, boot or run time. special-register-buffer-data-sampling.rst core-scheduling.rst l1d_flush.rst + processor_mmio_stale_data.rst diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst new file mode 100644 index 0000000000000000000000000000000000000000..9393c50b5afc9c9fe8b9ac90ed9fe4774e1d1550 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -0,0 +1,246 @@ +========================================= +Processor MMIO Stale Data Vulnerabilities +========================================= + +Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O +(MMIO) vulnerabilities that can expose data. The sequences of operations for +exposing data range from simple to very complex. Because most of the +vulnerabilities require the attacker to have access to MMIO, many environments +are not affected. System environments using virtualization where MMIO access is +provided to untrusted guests may need mitigation. These vulnerabilities are +not transient execution attacks. However, these vulnerabilities may propagate +stale data into core fill buffers where the data can subsequently be inferred +by an unmitigated transient execution attack. Mitigation for these +vulnerabilities includes a combination of microcode update and software +changes, depending on the platform and usage model. Some of these mitigations +are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or +those used to mitigate Special Register Buffer Data Sampling (SRBDS). + +Data Propagators +================ +Propagators are operations that result in stale data being copied or moved from +one microarchitectural buffer or register to another. Processor MMIO Stale Data +Vulnerabilities are operations that may result in stale data being directly +read into an architectural, software-visible state or sampled from a buffer or +register. + +Fill Buffer Stale Data Propagator (FBSDP) +----------------------------------------- +Stale data may propagate from fill buffers (FB) into the non-coherent portion +of the uncore on some non-coherent writes. Fill buffer propagation by itself +does not make stale data architecturally visible. Stale data must be propagated +to a location where it is subject to reading or sampling. + +Sideband Stale Data Propagator (SSDP) +------------------------------------- +The sideband stale data propagator (SSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. The sideband response buffer is +shared by all client cores. For non-coherent reads that go to sideband +destinations, the uncore logic returns 64 bytes of data to the core, including +both requested data and unrequested stale data, from a transaction buffer and +the sideband response buffer. As a result, stale data from the sideband +response and transaction buffers may now reside in a core fill buffer. + +Primary Stale Data Propagator (PSDP) +------------------------------------ +The primary stale data propagator (PSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. Similar to the sideband response +buffer, the primary response buffer is shared by all client cores. For some +processors, MMIO primary reads will return 64 bytes of data to the core fill +buffer including both requested data and unrequested stale data. This is +similar to the sideband stale data propagator. + +Vulnerabilities +=============== +Device Register Partial Write (DRPW) (CVE-2022-21166) +----------------------------------------------------- +Some endpoint MMIO registers incorrectly handle writes that are smaller than +the register size. Instead of aborting the write or only copying the correct +subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than +specified by the write transaction may be written to the register. On +processors affected by FBSDP, this may expose stale data from the fill buffers +of the core that created the write transaction. + +Shared Buffers Data Sampling (SBDS) (CVE-2022-21125) +---------------------------------------------------- +After propagators may have moved data around the uncore and copied stale data +into client core fill buffers, processors affected by MFBDS can leak data from +the fill buffer. It is limited to the client (including Intel Xeon server E3) +uncore implementation. + +Shared Buffers Data Read (SBDR) (CVE-2022-21123) +------------------------------------------------ +It is similar to Shared Buffer Data Sampling (SBDS) except that the data is +directly read into the architectural software-visible state. It is limited to +the client (including Intel Xeon server E3) uncore implementation. + +Affected Processors +=================== +Not all the CPUs are affected by all the variants. For instance, most +processors for the server market (excluding Intel Xeon E3 processors) are +impacted by only Device Register Partial Write (DRPW). + +Below is the list of affected Intel processors [#f1]_: + + =================== ============ ========= + Common name Family_Model Steppings + =================== ============ ========= + HASWELL_X 06_3FH 2,4 + SKYLAKE_L 06_4EH 3 + BROADWELL_X 06_4FH All + SKYLAKE_X 06_55H 3,4,6,7,11 + BROADWELL_D 06_56H 3,4,5 + SKYLAKE 06_5EH 3 + ICELAKE_X 06_6AH 4,5,6 + ICELAKE_D 06_6CH 1 + ICELAKE_L 06_7EH 5 + ATOM_TREMONT_D 06_86H All + LAKEFIELD 06_8AH 1 + KABYLAKE_L 06_8EH 9 to 12 + ATOM_TREMONT 06_96H 1 + ATOM_TREMONT_L 06_9CH 0 + KABYLAKE 06_9EH 9 to 13 + COMETLAKE 06_A5H 2,3,5 + COMETLAKE_L 06_A6H 0,1 + ROCKETLAKE 06_A7H 1 + =================== ============ ========= + +If a CPU is in the affected processor list, but not affected by a variant, it +is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later +section, mitigation largely remains the same for all the variants, i.e. to +clear the CPU fill buffers via VERW instruction. + +New bits in MSRs +================ +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +specific variants of Processor MMIO Stale Data vulnerabilities and mitigation +capability. + +MSR IA32_ARCH_CAPABILITIES +-------------------------- +Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the + Shared Buffers Data Read (SBDR) vulnerability or the sideband stale + data propagator (SSDP). +Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer + Stale Data Propagator (FBSDP). +Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data + Propagator (PSDP). +Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer + values as part of MD_CLEAR operations. Processors that do not + enumerate MDS_NO (meaning they are affected by MDS) but that do + enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate + FB_CLEAR as part of their MD_CLEAR support. +Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR + IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS + bit can be set to cause the VERW instruction to not perform the + FB_CLEAR action. Not all processors that support FB_CLEAR will support + FB_CLEAR_CTRL. + +MSR IA32_MCU_OPT_CTRL +--------------------- +Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR +action. This may be useful to reduce the performance impact of FB_CLEAR in +cases where system software deems it warranted (for example, when performance +is more critical, or the untrusted software has no MMIO access). Note that +FB_CLEAR_DIS has no impact on enumeration (for example, it does not change +FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors +that enumerate FB_CLEAR. + +Mitigation +========== +Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the +same mitigation strategy to force the CPU to clear the affected buffers before +an attacker can extract the secrets. + +This is achieved by using the otherwise unused and obsolete VERW instruction in +combination with a microcode update. The microcode clears the affected CPU +buffers when the VERW instruction is executed. + +Kernel reuses the MDS function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +On MDS affected CPUs, the kernel already invokes CPU buffer clear on +kernel/userspace, hypervisor/guest and C-state (idle) transitions. No +additional mitigation is needed on such CPUs. + +For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker +with MMIO capability. Therefore, VERW is not required for kernel/userspace. For +virtualization case, VERW is only needed at VMENTER for a guest with MMIO +capability. + +Mitigation points +----------------- +Return to user space +^^^^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation +needed. + +C-State transition +^^^^^^^^^^^^^^^^^^ +Control register writes by CPU during C-state transition can propagate data +from fill buffer to uncore buffers. Execute VERW before C-state transition to +clear CPU fill buffers. + +Guest entry point +^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise +execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by +MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO +Stale Data vulnerabilities, so there is no need to execute VERW for such guests. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control the Processor MMIO Stale Data +mitigations at boot time with the option "mmio_stale_data=". The valid +arguments for this option are: + + ========== ================================================================= + full If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and when entering a VM. Idle transitions are + protected as well. It does not automatically disable SMT. + full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the + complete mitigation. + off Disables mitigation completely. + ========== ================================================================= + +If the CPU is affected and mmio_stale_data=off is not supplied on the kernel +command line, then the kernel selects the appropriate mitigation. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable, but microcode is not updated. The + mitigation is enabled on a best effort basis. + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +If the processor is vulnerable then the following information is appended to +the above information: + + ======================== =========================================== + 'SMT vulnerable' SMT is enabled + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== =========================================== + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8090130b544b0701237a7b657a29c83c000a60f4..f2d26cb7e85391028376424b9daac448df69c015 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2469,7 +2469,6 @@ protected: nVHE-based mode with support for guests whose state is kept private from the host. - Not valid if the kernel is running in EL2. Defaults to VHE/nVHE based on hardware support. Setting mode to "protected" will disable kexec and hibernation @@ -3176,6 +3175,7 @@ srbds=off [X86,INTEL] no_entry_flush [PPC] no_uaccess_flush [PPC] + mmio_stale_data=off [X86] Exceptions: This does not have any effect on @@ -3197,6 +3197,7 @@ Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] tsx_async_abort=full,nosmt [X86] + mmio_stale_data=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -3206,6 +3207,40 @@ log everything. Information is printed at KERN_DEBUG so loglevel=8 may also need to be specified. + mmio_stale_data= + [X86,INTEL] Control mitigation for the Processor + MMIO Stale Data vulnerabilities. + + Processor MMIO Stale Data is a class of + vulnerabilities that may expose data after an MMIO + operation. Exposed data could originate or end in + the same CPU buffers as affected by MDS and TAA. + Therefore, similar to MDS and TAA, the mitigation + is to clear the affected CPU buffers. + + This parameter controls the mitigation. The + options are: + + full - Enable mitigation on vulnerable CPUs + + full,nosmt - Enable mitigation and disable SMT on + vulnerable CPUs. + + off - Unconditionally disable mitigation + + On MDS or TAA affected machines, + mmio_stale_data=off can be prevented by an active + MDS or TAA mitigation as these vulnerabilities are + mitigated with the same mechanism so in order to + disable this mitigation, you need to specify + mds=off and tsx_async_abort=off too. + + Not specifying this option is equivalent to + mmio_stale_data=full. + + For details see: + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst + module.sig_enforce [KNL] When CONFIG_MODULE_SIG is set, this means that modules without (valid) signatures will fail to load. @@ -5162,6 +5197,30 @@ retain_initrd [RAM] Keep initrd memory after extraction + retbleed= [X86] Control mitigation of RETBleed (Arbitrary + Speculative Code Execution with Return Instructions) + vulnerability. + + off - no mitigation + auto - automatically select a migitation + auto,nosmt - automatically select a mitigation, + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). + ibpb - mitigate short speculation windows on + basic block boundaries too. Safe, highest + perf impact. + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. + unret,nosmt - like unret, will disable SMT when STIBP + is not available. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. + + Not specifying this option is equivalent to retbleed=auto. + rfkill.default_state= 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, etc. communication is blocked by default. @@ -5533,6 +5592,7 @@ eibrs - enhanced IBRS eibrs,retpoline - enhanced IBRS + Retpolines eibrs,lfence - enhanced IBRS + LFENCE + ibrs - use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index d6b3f94b9f1fb5959f042fe28fbf16068bd67261..0793c400d4b0594af47b5e56f07387ac123e2692 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -223,7 +223,7 @@ Module Loading Inter Module support -------------------- -Refer to the file kernel/module.c for more information. +Refer to the files in kernel/module/ for more information. Hardware Interfaces =================== diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index 5ad9e0abe42cf3debc2bf88f407c5aa27d9bf410..12e4aecdae94522c30929c427410505bef6f3c4b 100644 --- a/Documentation/core-api/symbol-namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst @@ -51,8 +51,8 @@ namespace ``USB_STORAGE``, use:: The corresponding ksymtab entry struct ``kernel_symbol`` will have the member ``namespace`` set accordingly. A symbol that is exported without a namespace will refer to ``NULL``. There is no default namespace if none is defined. ``modpost`` -and kernel/module.c make use the namespace at build time or module load time, -respectively. +and kernel/module/main.c make use the namespace at build time or module load +time, respectively. 2.2 Using the DEFAULT_SYMBOL_NAMESPACE define ============================================= diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml index c388ae5da1e429303944429f790896d1634adf1d..c9c346e6228e9a9bdf9e9ae835f088d136182ac2 100644 --- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml +++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml @@ -94,6 +94,7 @@ if: - allwinner,sun8i-a83t-display-engine - allwinner,sun8i-r40-display-engine - allwinner,sun9i-a80-display-engine + - allwinner,sun20i-d1-display-engine - allwinner,sun50i-a64-display-engine then: diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml index ff0a5c58d78ccd6537d98dc20a9d07effc3636aa..e712444abff17776e9138908560551bedec4dd11 100644 --- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml +++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml @@ -67,7 +67,7 @@ if: then: properties: clocks: - maxItems: 2 + minItems: 2 required: - clock-names diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml index fe0ac08faa1a7065762616b7babd90318327eb1d..0e8ddf0ad7890dce61b1e14c9417b7aad85869d1 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml @@ -40,9 +40,8 @@ properties: value to be used for converting remote channel measurements to temperature. $ref: /schemas/types.yaml#/definitions/int32 - items: - minimum: -128 - maximum: 127 + minimum: -128 + maximum: 127 ti,beta-compensation: description: diff --git a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml index f89ebde76dab3b4e855c1528da88670370b9a08c..de7c5e59bae14652d3c2b8b679ab15c316ccddda 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml @@ -30,6 +30,7 @@ properties: - socionext,uniphier-ld11-aidet - socionext,uniphier-ld20-aidet - socionext,uniphier-pxs3-aidet + - socionext,uniphier-nx1-aidet reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml index 8cd0adbf7021f88fd62243e41f92a6aa9b064cbb..7029cb1f38ffb8a79f9dece86cf35b97bc2687b3 100644 --- a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Atheros ath9k wireless devices Generic Binding maintainers: - - Kalle Valo + - Toke Høiland-Jørgensen description: | This node provides properties for configuring the ath9k wireless device. diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index 8c01fdba134b922c4e0dea54f32135b1090c1d38..a677b056f11206f8140972a6f9ab02cd92422399 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -9,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath11k wireless devices Generic Binding maintainers: - - Kalle Valo + - Kalle Valo description: | These are dt entries for Qualcomm Technologies, Inc. IEEE 802.11ax diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml index e9a533080b32d530b8a5cf7d31a7005526520f20..ef18a572a1ff385d7ca91726c2e5f2972d1bfb70 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml @@ -25,12 +25,12 @@ properties: - qcom,sc7280-lpass-cpu reg: - minItems: 2 + minItems: 1 maxItems: 6 description: LPAIF core registers reg-names: - minItems: 2 + minItems: 1 maxItems: 6 clocks: @@ -42,12 +42,12 @@ properties: maxItems: 10 interrupts: - minItems: 2 + minItems: 1 maxItems: 4 description: LPAIF DMA buffer interrupt interrupt-names: - minItems: 2 + minItems: 1 maxItems: 4 qcom,adsp: diff --git a/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml index ece261b8e963f0297582ff96329dddd96b7998fd..7326c0a28d160965407efa36357e9c7e291a0ddb 100644 --- a/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml +++ b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml @@ -47,6 +47,5 @@ examples: clocks = <&clkcfg CLK_SPI0>; interrupt-parent = <&plic>; interrupts = <54>; - spi-max-frequency = <25000000>; }; ... diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml index e2c7b934c50d5479adc78a88cdff6cc9724c130c..78ceb9d67754f405635c0f8cf936732cbf92a4b3 100644 --- a/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml +++ b/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml @@ -110,7 +110,6 @@ examples: pinctrl-names = "default"; pinctrl-0 = <&qup_spi1_default>; interrupts = ; - spi-max-frequency = <50000000>; #address-cells = <1>; #size-cells = <0>; }; diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml index 0b4524b6409ec4ec40bd422e1487e0488e7a274c..1e84e1b7ab271fcfc795779f86fa80fe7dd473a8 100644 --- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml @@ -136,7 +136,8 @@ properties: Phandle of a companion. phys: - maxItems: 1 + minItems: 1 + maxItems: 3 phy-names: const: usb diff --git a/Documentation/devicetree/bindings/usb/generic-ohci.yaml b/Documentation/devicetree/bindings/usb/generic-ohci.yaml index e2ac84665316117093b7390e6984cc6113e22769..bb6bbd5f129d44a62863b56acd8331da6f03ae90 100644 --- a/Documentation/devicetree/bindings/usb/generic-ohci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-ohci.yaml @@ -103,7 +103,8 @@ properties: Overrides the detected port count phys: - maxItems: 1 + minItems: 1 + maxItems: 3 phy-names: const: usb diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst index b81794e0cfbb937031d948ff4979d7462ab1c9c9..06ac89adaafba2c6c06718cd314c295ad8d61b82 100644 --- a/Documentation/driver-api/firmware/other_interfaces.rst +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -13,6 +13,12 @@ EDD Interfaces .. kernel-doc:: drivers/firmware/edd.c :internal: +Generic System Framebuffers Interface +------------------------------------- + +.. kernel-doc:: drivers/firmware/sysfb.c + :export: + Intel Stratix10 SoC Service Layer --------------------------------- Some features of the Intel Stratix10 SoC require a level of privilege diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst index 4e3adf31c8d1da7f83fcbae62944c39567660674..b33aa04f213fea2544456495b27d3e9fc6f2eba5 100644 --- a/Documentation/driver-api/gpio/board.rst +++ b/Documentation/driver-api/gpio/board.rst @@ -6,7 +6,7 @@ This document explains how GPIOs can be assigned to given devices and functions. Note that it only applies to the new descriptor-based interface. For a description of the deprecated integer-based GPIO interface please refer to -gpio-legacy.txt (actually, there is no real mapping possible with the old +legacy.rst (actually, there is no real mapping possible with the old interface; you just fetch an integer from somewhere and request the corresponding GPIO). diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst index 47869ca8ccf0320c2443819852755e295f4769a7..72bcf5f5e3a21715d17e8cbeef0f15e3d19ab148 100644 --- a/Documentation/driver-api/gpio/consumer.rst +++ b/Documentation/driver-api/gpio/consumer.rst @@ -4,7 +4,7 @@ GPIO Descriptor Consumer Interface This document describes the consumer interface of the GPIO framework. Note that it describes the new descriptor-based interface. For a description of the -deprecated integer-based GPIO interface please refer to gpio-legacy.txt. +deprecated integer-based GPIO interface please refer to legacy.rst. Guidelines for GPIOs consumers @@ -78,7 +78,7 @@ whether the line is configured active high or active low (see The two last flags are used for use cases where open drain is mandatory, such as I2C: if the line is not already configured as open drain in the mappings -(see board.txt), then open drain will be enforced anyway and a warning will be +(see board.rst), then open drain will be enforced anyway and a warning will be printed that the board configuration needs to be updated to match the use case. Both functions return either a valid GPIO descriptor, or an error code checkable @@ -270,7 +270,7 @@ driven. The same is applicable for open drain or open source output lines: those do not actively drive their output high (open drain) or low (open source), they just switch their output to a high impedance value. The consumer should not need to -care. (For details read about open drain in driver.txt.) +care. (For details read about open drain in driver.rst.) With this, all the gpiod_set_(array)_value_xxx() functions interpret the parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line diff --git a/Documentation/driver-api/gpio/intro.rst b/Documentation/driver-api/gpio/intro.rst index 2e924fb5b3d536d5543ac4a92e01e2b5dd11e808..c9c19243b97f8ef346c1c97e4a9475d8a0172462 100644 --- a/Documentation/driver-api/gpio/intro.rst +++ b/Documentation/driver-api/gpio/intro.rst @@ -14,12 +14,12 @@ Due to the history of GPIO interfaces in the kernel, there are two different ways to obtain and use GPIOs: - The descriptor-based interface is the preferred way to manipulate GPIOs, - and is described by all the files in this directory excepted gpio-legacy.txt. + and is described by all the files in this directory excepted legacy.rst. - The legacy integer-based interface which is considered deprecated (but still - usable for compatibility reasons) is documented in gpio-legacy.txt. + usable for compatibility reasons) is documented in legacy.rst. The remainder of this document applies to the new descriptor-based interface. -gpio-legacy.txt contains the same information applied to the legacy +legacy.rst contains the same information applied to the legacy integer-based interface. diff --git a/Documentation/filesystems/btrfs.rst b/Documentation/filesystems/btrfs.rst index d0904f6028197074e0083e4a3b633a6499f656e9..992eddb0e11b8751c125acc45fdd7fbba2cf3962 100644 --- a/Documentation/filesystems/btrfs.rst +++ b/Documentation/filesystems/btrfs.rst @@ -19,13 +19,23 @@ The main Btrfs features include: * Subvolumes (separate internal filesystem roots) * Object level mirroring and striping * Checksums on data and metadata (multiple algorithms available) - * Compression + * Compression (multiple algorithms available) + * Reflink, deduplication + * Scrub (on-line checksum verification) + * Hierarchical quota groups (subvolume and snapshot support) * Integrated multiple device support, with several raid algorithms * Offline filesystem check - * Efficient incremental backup and FS mirroring + * Efficient incremental backup and FS mirroring (send/receive) + * Trim/discard * Online filesystem defragmentation + * Swapfile support + * Zoned mode + * Read/write metadata verification + * Online resize (shrink, grow) -For more information please refer to the wiki +For more information please refer to the documentation site or wiki + + https://btrfs.readthedocs.io https://btrfs.wiki.kernel.org diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst index 871d2da7a0a91e73f0f791f5627a57855741f20a..87814696a65b59f4d55297c5df7893aef4031f9b 100644 --- a/Documentation/filesystems/ext4/attributes.rst +++ b/Documentation/filesystems/ext4/attributes.rst @@ -13,8 +13,8 @@ disappeared as of Linux 3.0. There are two places where extended attributes can be found. The first place is between the end of each inode entry and the beginning of the -next inode entry. For example, if inode.i\_extra\_isize = 28 and -sb.inode\_size = 256, then there are 256 - (128 + 28) = 100 bytes +next inode entry. For example, if inode.i_extra_isize = 28 and +sb.inode_size = 256, then there are 256 - (128 + 28) = 100 bytes available for in-inode extended attribute storage. The second place where extended attributes can be found is in the block pointed to by ``inode.i_file_acl``. As of Linux 3.11, it is not possible for this @@ -38,8 +38,8 @@ Extended attributes, when stored after the inode, have a header - Name - Description * - 0x0 - - \_\_le32 - - h\_magic + - __le32 + - h_magic - Magic number for identification, 0xEA020000. This value is set by the Linux driver, though e2fsprogs doesn't seem to check it(?) @@ -55,28 +55,28 @@ The beginning of an extended attribute block is in - Name - Description * - 0x0 - - \_\_le32 - - h\_magic + - __le32 + - h_magic - Magic number for identification, 0xEA020000. * - 0x4 - - \_\_le32 - - h\_refcount + - __le32 + - h_refcount - Reference count. * - 0x8 - - \_\_le32 - - h\_blocks + - __le32 + - h_blocks - Number of disk blocks used. * - 0xC - - \_\_le32 - - h\_hash + - __le32 + - h_hash - Hash value of all attributes. * - 0x10 - - \_\_le32 - - h\_checksum + - __le32 + - h_checksum - Checksum of the extended attribute block. * - 0x14 - - \_\_u32 - - h\_reserved[3] + - __u32 + - h_reserved[3] - Zero. The checksum is calculated against the FS UUID, the 64-bit block number @@ -100,46 +100,46 @@ Attributes stored inside an inode do not need be stored in sorted order. - Name - Description * - 0x0 - - \_\_u8 - - e\_name\_len + - __u8 + - e_name_len - Length of name. * - 0x1 - - \_\_u8 - - e\_name\_index + - __u8 + - e_name_index - Attribute name index. There is a discussion of this below. * - 0x2 - - \_\_le16 - - e\_value\_offs + - __le16 + - e_value_offs - Location of this attribute's value on the disk block where it is stored. Multiple attributes can share the same value. For an inode attribute this value is relative to the start of the first entry; for a block this value is relative to the start of the block (i.e. the header). * - 0x4 - - \_\_le32 - - e\_value\_inum + - __le32 + - e_value_inum - The inode where the value is stored. Zero indicates the value is in the same block as this entry. This field is only used if the - INCOMPAT\_EA\_INODE feature is enabled. + INCOMPAT_EA_INODE feature is enabled. * - 0x8 - - \_\_le32 - - e\_value\_size + - __le32 + - e_value_size - Length of attribute value. * - 0xC - - \_\_le32 - - e\_hash + - __le32 + - e_hash - Hash value of attribute name and attribute value. The kernel doesn't update the hash for in-inode attributes, so for that case this value must be zero, because e2fsck validates any non-zero hash regardless of where the xattr lives. * - 0x10 - char - - e\_name[e\_name\_len] + - e_name[e_name_len] - Attribute name. Does not include trailing NULL. Attribute values can follow the end of the entry table. There appears to be a requirement that they be aligned to 4-byte boundaries. The values are stored starting at the end of the block and grow towards the -xattr\_header/xattr\_entry table. When the two collide, the overflow is +xattr_header/xattr_entry table. When the two collide, the overflow is put into a separate disk block. If the disk block fills up, the filesystem returns -ENOSPC. @@ -167,15 +167,15 @@ the key name. Here is a map of name index values to key prefixes: * - 1 - “user.” * - 2 - - “system.posix\_acl\_access” + - “system.posix_acl_access” * - 3 - - “system.posix\_acl\_default” + - “system.posix_acl_default” * - 4 - “trusted.” * - 6 - “security.” * - 7 - - “system.” (inline\_data only?) + - “system.” (inline_data only?) * - 8 - “system.richacl” (SuSE kernels only?) diff --git a/Documentation/filesystems/ext4/bigalloc.rst b/Documentation/filesystems/ext4/bigalloc.rst index 72075aa608e4dd3d00a0f57b7b4ced17aa467cd9..976a180b209c2a3dc9f3a6afeb194fd100bd3931 100644 --- a/Documentation/filesystems/ext4/bigalloc.rst +++ b/Documentation/filesystems/ext4/bigalloc.rst @@ -23,7 +23,7 @@ means that a block group addresses 32 gigabytes instead of 128 megabytes, also shrinking the amount of file system overhead for metadata. The administrator can set a block cluster size at mkfs time (which is -stored in the s\_log\_cluster\_size field in the superblock); from then +stored in the s_log_cluster_size field in the superblock); from then on, the block bitmaps track clusters, not individual blocks. This means that block groups can be several gigabytes in size (instead of just 128MiB); however, the minimum allocation unit becomes a cluster, not a diff --git a/Documentation/filesystems/ext4/bitmaps.rst b/Documentation/filesystems/ext4/bitmaps.rst index c7546dbc197ae4a83862e259c4e70dbdd605a1a1..91c45d86e9bb56b6e3e478037f4c1ab7b57cdcad 100644 --- a/Documentation/filesystems/ext4/bitmaps.rst +++ b/Documentation/filesystems/ext4/bitmaps.rst @@ -9,15 +9,15 @@ group. The inode bitmap records which entries in the inode table are in use. As with most bitmaps, one bit represents the usage status of one data -block or inode table entry. This implies a block group size of 8 \* -number\_of\_bytes\_in\_a\_logical\_block. +block or inode table entry. This implies a block group size of 8 * +number_of_bytes_in_a_logical_block. NOTE: If ``BLOCK_UNINIT`` is set for a given block group, various parts of the kernel and e2fsprogs code pretends that the block bitmap contains zeros (i.e. all blocks in the group are free). However, it is not necessarily the case that no blocks are in use -- if ``meta_bg`` is set, the bitmaps and group descriptor live inside the group. Unfortunately, -ext2fs\_test\_block\_bitmap2() will return '0' for those locations, +ext2fs_test_block_bitmap2() will return '0' for those locations, which produces confusing debugfs output. Inode Table diff --git a/Documentation/filesystems/ext4/blockgroup.rst b/Documentation/filesystems/ext4/blockgroup.rst index d5d652addce5ea62a362a4ecdfcf419443eef110..46d78f860623f146915d518456d7a99991172a42 100644 --- a/Documentation/filesystems/ext4/blockgroup.rst +++ b/Documentation/filesystems/ext4/blockgroup.rst @@ -56,39 +56,39 @@ established that the super block and the group descriptor table, if present, will be at the beginning of the block group. The bitmaps and the inode table can be anywhere, and it is quite possible for the bitmaps to come after the inode table, or for both to be in different -groups (flex\_bg). Leftover space is used for file data blocks, indirect +groups (flex_bg). Leftover space is used for file data blocks, indirect block maps, extent tree blocks, and extended attributes. Flexible Block Groups --------------------- Starting in ext4, there is a new feature called flexible block groups -(flex\_bg). In a flex\_bg, several block groups are tied together as one +(flex_bg). In a flex_bg, several block groups are tied together as one logical block group; the bitmap spaces and the inode table space in the -first block group of the flex\_bg are expanded to include the bitmaps -and inode tables of all other block groups in the flex\_bg. For example, -if the flex\_bg size is 4, then group 0 will contain (in order) the +first block group of the flex_bg are expanded to include the bitmaps +and inode tables of all other block groups in the flex_bg. For example, +if the flex_bg size is 4, then group 0 will contain (in order) the superblock, group descriptors, data block bitmaps for groups 0-3, inode bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining space in group 0 is for file data. The effect of this is to group the block group metadata close together for faster loading, and to enable large files to be continuous on disk. Backup copies of the superblock and group descriptors are always at the beginning of block groups, even -if flex\_bg is enabled. The number of block groups that make up a -flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``. +if flex_bg is enabled. The number of block groups that make up a +flex_bg is given by 2 ^ ``sb.s_log_groups_per_flex``. Meta Block Groups ----------------- -Without the option META\_BG, for safety concerns, all block group +Without the option META_BG, for safety concerns, all block group descriptors copies are kept in the first block group. Given the default 128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4 can have at most 2^27/64 = 2^21 block groups. This limits the entire filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB. The solution to this problem is to use the metablock group feature -(META\_BG), which is already in ext3 for all 2.6 releases. With the -META\_BG feature, ext4 filesystems are partitioned into many metablock +(META_BG), which is already in ext3 for all 2.6 releases. With the +META_BG feature, ext4 filesystems are partitioned into many metablock groups. Each metablock group is a cluster of block groups whose group descriptor structures can be stored in a single disk block. For ext4 filesystems with 4 KB block size, a single metablock group partition @@ -110,7 +110,7 @@ bytes, a meta-block group contains 32 block groups for filesystems with a 1KB block size, and 128 block groups for filesystems with a 4KB blocksize. Filesystems can either be created using this new block group descriptor layout, or existing filesystems can be resized on-line, and -the field s\_first\_meta\_bg in the superblock will indicate the first +the field s_first_meta_bg in the superblock will indicate the first block group using this new layout. Please see an important note about ``BLOCK_UNINIT`` in the section about @@ -121,15 +121,15 @@ Lazy Block Group Initialization A new feature for ext4 are three block group descriptor flags that enable mkfs to skip initializing other parts of the block group -metadata. Specifically, the INODE\_UNINIT and BLOCK\_UNINIT flags mean +metadata. Specifically, the INODE_UNINIT and BLOCK_UNINIT flags mean that the inode and block bitmaps for that group can be calculated and therefore the on-disk bitmap blocks are not initialized. This is generally the case for an empty block group or a block group containing -only fixed-location block group metadata. The INODE\_ZEROED flag means +only fixed-location block group metadata. The INODE_ZEROED flag means that the inode table has been initialized; mkfs will unset this flag and rely on the kernel to initialize the inode tables in the background. By not writing zeroes to the bitmaps and inode table, mkfs time is -reduced considerably. Note the feature flag is RO\_COMPAT\_GDT\_CSUM, -but the dumpe2fs output prints this as “uninit\_bg”. They are the same +reduced considerably. Note the feature flag is RO_COMPAT_GDT_CSUM, +but the dumpe2fs output prints this as “uninit_bg”. They are the same thing. diff --git a/Documentation/filesystems/ext4/blockmap.rst b/Documentation/filesystems/ext4/blockmap.rst index 30e25750d88a4fd302b65d0d43970a0671e98f98..2bd990402a5c49cc59621142068a9bca9d9fd2d1 100644 --- a/Documentation/filesystems/ext4/blockmap.rst +++ b/Documentation/filesystems/ext4/blockmap.rst @@ -1,7 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0 +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| i.i\_block Offset | Where It Points | +| i.i_block Offset | Where It Points | +=====================+==============================================================================================================================================================================================================================+ | 0 to 11 | Direct map to file blocks 0 to 11. | +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/Documentation/filesystems/ext4/checksums.rst b/Documentation/filesystems/ext4/checksums.rst index 5519e253810d6c3488546020713d7ddadc83fbd8..e232749daf5f3046ca4311406f9fb78e1ea8c7aa 100644 --- a/Documentation/filesystems/ext4/checksums.rst +++ b/Documentation/filesystems/ext4/checksums.rst @@ -4,7 +4,7 @@ Checksums --------- Starting in early 2012, metadata checksums were added to all major ext4 -and jbd2 data structures. The associated feature flag is metadata\_csum. +and jbd2 data structures. The associated feature flag is metadata_csum. The desired checksum algorithm is indicated in the superblock, though as of October 2012 the only supported algorithm is crc32c. Some data structures did not have space to fit a full 32-bit checksum, so only the @@ -20,7 +20,7 @@ encounters directory blocks that lack sufficient empty space to add a checksum, it will request that you run ``e2fsck -D`` to have the directories rebuilt with checksums. This has the added benefit of removing slack space from the directory files and rebalancing the htree -indexes. If you \_ignore\_ this step, your directories will not be +indexes. If you _ignore_ this step, your directories will not be protected by a checksum! The following table describes the data elements that go into each type @@ -35,39 +35,39 @@ of checksum. The checksum function is whatever the superblock describes - Length - Ingredients * - Superblock - - \_\_le32 + - __le32 - The entire superblock up to the checksum field. The UUID lives inside the superblock. * - MMP - - \_\_le32 + - __le32 - UUID + the entire MMP block up to the checksum field. * - Extended Attributes - - \_\_le32 + - __le32 - UUID + the entire extended attribute block. The checksum field is set to zero. * - Directory Entries - - \_\_le32 + - __le32 - UUID + inode number + inode generation + the directory block up to the fake entry enclosing the checksum field. * - HTREE Nodes - - \_\_le32 + - __le32 - UUID + inode number + inode generation + all valid extents + HTREE tail. The checksum field is set to zero. * - Extents - - \_\_le32 + - __le32 - UUID + inode number + inode generation + the entire extent block up to the checksum field. * - Bitmaps - - \_\_le32 or \_\_le16 + - __le32 or __le16 - UUID + the entire bitmap. Checksums are stored in the group descriptor, and truncated if the group descriptor size is 32 bytes (i.e. ^64bit) * - Inodes - - \_\_le32 + - __le32 - UUID + inode number + inode generation + the entire inode. The checksum field is set to zero. Each inode has its own checksum. * - Group Descriptors - - \_\_le16 - - If metadata\_csum, then UUID + group number + the entire descriptor; - else if gdt\_csum, then crc16(UUID + group number + the entire + - __le16 + - If metadata_csum, then UUID + group number + the entire descriptor; + else if gdt_csum, then crc16(UUID + group number + the entire descriptor). In all cases, only the lower 16 bits are stored. diff --git a/Documentation/filesystems/ext4/directory.rst b/Documentation/filesystems/ext4/directory.rst index 55f618b371445ac4f8acd81eafe499743364a0e2..6eece8e31df8b734a5e9b41e49f1a27a8bac49c2 100644 --- a/Documentation/filesystems/ext4/directory.rst +++ b/Documentation/filesystems/ext4/directory.rst @@ -42,24 +42,24 @@ is at most 263 bytes long, though on disk you'll need to reference - Name - Description * - 0x0 - - \_\_le32 + - __le32 - inode - Number of the inode that this directory entry points to. * - 0x4 - - \_\_le16 - - rec\_len + - __le16 + - rec_len - Length of this directory entry. Must be a multiple of 4. * - 0x6 - - \_\_le16 - - name\_len + - __le16 + - name_len - Length of the file name. * - 0x8 - char - - name[EXT4\_NAME\_LEN] + - name[EXT4_NAME_LEN] - File name. Since file names cannot be longer than 255 bytes, the new directory -entry format shortens the name\_len field and uses the space for a file +entry format shortens the name_len field and uses the space for a file type flag, probably to avoid having to load every inode during directory tree traversal. This format is ``ext4_dir_entry_2``, which is at most 263 bytes long, though on disk you'll need to reference @@ -74,24 +74,24 @@ tree traversal. This format is ``ext4_dir_entry_2``, which is at most - Name - Description * - 0x0 - - \_\_le32 + - __le32 - inode - Number of the inode that this directory entry points to. * - 0x4 - - \_\_le16 - - rec\_len + - __le16 + - rec_len - Length of this directory entry. * - 0x6 - - \_\_u8 - - name\_len + - __u8 + - name_len - Length of the file name. * - 0x7 - - \_\_u8 - - file\_type + - __u8 + - file_type - File type code, see ftype_ table below. * - 0x8 - char - - name[EXT4\_NAME\_LEN] + - name[EXT4_NAME_LEN] - File name. .. _ftype: @@ -137,19 +137,19 @@ entry uses this extension, it may be up to 271 bytes. - Name - Description * - 0x0 - - \_\_le32 + - __le32 - hash - The hash of the directory name * - 0x4 - - \_\_le32 - - minor\_hash + - __le32 + - minor_hash - The minor hash of the directory name In order to add checksums to these classic directory blocks, a phony ``struct ext4_dir_entry`` is placed at the end of each leaf block to hold the checksum. The directory entry is 12 bytes long. The inode -number and name\_len fields are set to zero to fool old software into +number and name_len fields are set to zero to fool old software into ignoring an apparently empty directory entry, and the checksum is stored in the place where the name normally goes. The structure is ``struct ext4_dir_entry_tail``: @@ -163,24 +163,24 @@ in the place where the name normally goes. The structure is - Name - Description * - 0x0 - - \_\_le32 - - det\_reserved\_zero1 + - __le32 + - det_reserved_zero1 - Inode number, which must be zero. * - 0x4 - - \_\_le16 - - det\_rec\_len + - __le16 + - det_rec_len - Length of this directory entry, which must be 12. * - 0x6 - - \_\_u8 - - det\_reserved\_zero2 + - __u8 + - det_reserved_zero2 - Length of the file name, which must be zero. * - 0x7 - - \_\_u8 - - det\_reserved\_ft + - __u8 + - det_reserved_ft - File type, which must be 0xDE. * - 0x8 - - \_\_le32 - - det\_checksum + - __le32 + - det_checksum - Directory leaf block checksum. The leaf directory block checksum is calculated against the FS UUID, the @@ -194,7 +194,7 @@ Hash Tree Directories A linear array of directory entries isn't great for performance, so a new feature was added to ext3 to provide a faster (but peculiar) balanced tree keyed off a hash of the directory entry name. If the -EXT4\_INDEX\_FL (0x1000) flag is set in the inode, this directory uses a +EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a hashed btree (htree) to organize and find directory entries. For backwards read-only compatibility with ext2, this tree is actually hidden inside the directory file, masquerading as “empty” directory data @@ -206,14 +206,14 @@ rest of the directory block is empty so that it moves on. The root of the tree always lives in the first data block of the directory. By ext2 custom, the '.' and '..' entries must appear at the beginning of this first block, so they are put here as two -``struct ext4_dir_entry_2``\ s and not stored in the tree. The rest of +``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of the root node contains metadata about the tree and finally a hash->block map to find nodes that are lower in the htree. If ``dx_root.info.indirect_levels`` is non-zero then the htree has two levels; the data block pointed to by the root node's map is an interior node, which is indexed by a minor hash. Interior nodes in this tree contains a zeroed out ``struct ext4_dir_entry_2`` followed by a -minor\_hash->block map to find leafe nodes. Leaf nodes contain a linear +minor_hash->block map to find leafe nodes. Leaf nodes contain a linear array of all ``struct ext4_dir_entry_2``; all of these entries (presumably) hash to the same value. If there is an overflow, the entries simply overflow into the next leaf node, and the @@ -245,83 +245,83 @@ of a data block: - Name - Description * - 0x0 - - \_\_le32 + - __le32 - dot.inode - inode number of this directory. * - 0x4 - - \_\_le16 - - dot.rec\_len + - __le16 + - dot.rec_len - Length of this record, 12. * - 0x6 - u8 - - dot.name\_len + - dot.name_len - Length of the name, 1. * - 0x7 - u8 - - dot.file\_type + - dot.file_type - File type of this entry, 0x2 (directory) (if the feature flag is set). * - 0x8 - char - dot.name[4] - - “.\\0\\0\\0” + - “.\0\0\0” * - 0xC - - \_\_le32 + - __le32 - dotdot.inode - inode number of parent directory. * - 0x10 - - \_\_le16 - - dotdot.rec\_len - - block\_size - 12. The record length is long enough to cover all htree + - __le16 + - dotdot.rec_len + - block_size - 12. The record length is long enough to cover all htree data. * - 0x12 - u8 - - dotdot.name\_len + - dotdot.name_len - Length of the name, 2. * - 0x13 - u8 - - dotdot.file\_type + - dotdot.file_type - File type of this entry, 0x2 (directory) (if the feature flag is set). * - 0x14 - char - - dotdot\_name[4] - - “..\\0\\0” + - dotdot_name[4] + - “..\0\0” * - 0x18 - - \_\_le32 - - struct dx\_root\_info.reserved\_zero + - __le32 + - struct dx_root_info.reserved_zero - Zero. * - 0x1C - u8 - - struct dx\_root\_info.hash\_version + - struct dx_root_info.hash_version - Hash type, see dirhash_ table below. * - 0x1D - u8 - - struct dx\_root\_info.info\_length + - struct dx_root_info.info_length - Length of the tree information, 0x8. * - 0x1E - u8 - - struct dx\_root\_info.indirect\_levels - - Depth of the htree. Cannot be larger than 3 if the INCOMPAT\_LARGEDIR + - struct dx_root_info.indirect_levels + - Depth of the htree. Cannot be larger than 3 if the INCOMPAT_LARGEDIR feature is set; cannot be larger than 2 otherwise. * - 0x1F - u8 - - struct dx\_root\_info.unused\_flags + - struct dx_root_info.unused_flags - * - 0x20 - - \_\_le16 + - __le16 - limit - - Maximum number of dx\_entries that can follow this header, plus 1 for + - Maximum number of dx_entries that can follow this header, plus 1 for the header itself. * - 0x22 - - \_\_le16 + - __le16 - count - - Actual number of dx\_entries that follow this header, plus 1 for the + - Actual number of dx_entries that follow this header, plus 1 for the header itself. * - 0x24 - - \_\_le32 + - __le32 - block - The block number (within the directory file) that goes with hash=0. * - 0x28 - - struct dx\_entry + - struct dx_entry - entries[0] - As many 8-byte ``struct dx_entry`` as fits in the rest of the data block. @@ -362,38 +362,38 @@ also the full length of a data block: - Name - Description * - 0x0 - - \_\_le32 + - __le32 - fake.inode - Zero, to make it look like this entry is not in use. * - 0x4 - - \_\_le16 - - fake.rec\_len - - The size of the block, in order to hide all of the dx\_node data. + - __le16 + - fake.rec_len + - The size of the block, in order to hide all of the dx_node data. * - 0x6 - u8 - - name\_len + - name_len - Zero. There is no name for this “unused” directory entry. * - 0x7 - u8 - - file\_type + - file_type - Zero. There is no file type for this “unused” directory entry. * - 0x8 - - \_\_le16 + - __le16 - limit - - Maximum number of dx\_entries that can follow this header, plus 1 for + - Maximum number of dx_entries that can follow this header, plus 1 for the header itself. * - 0xA - - \_\_le16 + - __le16 - count - - Actual number of dx\_entries that follow this header, plus 1 for the + - Actual number of dx_entries that follow this header, plus 1 for the header itself. * - 0xE - - \_\_le32 + - __le32 - block - The block number (within the directory file) that goes with the lowest hash value of this block. This value is stored in the parent block. * - 0x12 - - struct dx\_entry + - struct dx_entry - entries[0] - As many 8-byte ``struct dx_entry`` as fits in the rest of the data block. @@ -410,11 +410,11 @@ long: - Name - Description * - 0x0 - - \_\_le32 + - __le32 - hash - Hash code. * - 0x4 - - \_\_le32 + - __le32 - block - Block number (within the directory file, not filesystem blocks) of the next node in the htree. @@ -423,13 +423,13 @@ long: author.) If metadata checksums are enabled, the last 8 bytes of the directory -block (precisely the length of one dx\_entry) are used to store a +block (precisely the length of one dx_entry) are used to store a ``struct dx_tail``, which contains the checksum. The ``limit`` and -``count`` entries in the dx\_root/dx\_node structures are adjusted as -necessary to fit the dx\_tail into the block. If there is no space for -the dx\_tail, the user is notified to run e2fsck -D to rebuild the +``count`` entries in the dx_root/dx_node structures are adjusted as +necessary to fit the dx_tail into the block. If there is no space for +the dx_tail, the user is notified to run e2fsck -D to rebuild the directory index (which will ensure that there's space for the checksum. -The dx\_tail structure is 8 bytes long and looks like this: +The dx_tail structure is 8 bytes long and looks like this: .. list-table:: :widths: 8 8 24 40 @@ -441,13 +441,13 @@ The dx\_tail structure is 8 bytes long and looks like this: - Description * - 0x0 - u32 - - dt\_reserved + - dt_reserved - Zero. * - 0x4 - - \_\_le32 - - dt\_checksum + - __le32 + - dt_checksum - Checksum of the htree directory block. The checksum is calculated against the FS UUID, the htree index header -(dx\_root or dx\_node), all of the htree indices (dx\_entry) that are in -use, and the tail block (dx\_tail). +(dx_root or dx_node), all of the htree indices (dx_entry) that are in +use, and the tail block (dx_tail). diff --git a/Documentation/filesystems/ext4/eainode.rst b/Documentation/filesystems/ext4/eainode.rst index ecc0d01a0a72cea90bf9d00af363f39069dba18b..7a2ef26b064ac055e24066cec26ac0938072038c 100644 --- a/Documentation/filesystems/ext4/eainode.rst +++ b/Documentation/filesystems/ext4/eainode.rst @@ -5,14 +5,14 @@ Large Extended Attribute Values To enable ext4 to store extended attribute values that do not fit in the inode or in the single extended attribute block attached to an inode, -the EA\_INODE feature allows us to store the value in the data blocks of +the EA_INODE feature allows us to store the value in the data blocks of a regular file inode. This “EA inode” is linked only from the extended attribute name index and must not appear in a directory entry. The -inode's i\_atime field is used to store a checksum of the xattr value; -and i\_ctime/i\_version store a 64-bit reference count, which enables +inode's i_atime field is used to store a checksum of the xattr value; +and i_ctime/i_version store a 64-bit reference count, which enables sharing of large xattr values between multiple owning inodes. For backward compatibility with older versions of this feature, the -i\_mtime/i\_generation *may* store a back-reference to the inode number -and i\_generation of the **one** owning inode (in cases where the EA +i_mtime/i_generation *may* store a back-reference to the inode number +and i_generation of the **one** owning inode (in cases where the EA inode is not referenced by multiple inodes) to verify that the EA inode is the correct one being accessed. diff --git a/Documentation/filesystems/ext4/group_descr.rst b/Documentation/filesystems/ext4/group_descr.rst index 7ba6114e7f5c2a84e00f3353e63820958f56c4c5..392ec44f8fb00d4fb5a21fee9907b3fbffd3ca39 100644 --- a/Documentation/filesystems/ext4/group_descr.rst +++ b/Documentation/filesystems/ext4/group_descr.rst @@ -7,34 +7,34 @@ Each block group on the filesystem has one of these descriptors associated with it. As noted in the Layout section above, the group descriptors (if present) are the second item in the block group. The standard configuration is for each block group to contain a full copy of -the block group descriptor table unless the sparse\_super feature flag +the block group descriptor table unless the sparse_super feature flag is set. Notice how the group descriptor records the location of both bitmaps and the inode table (i.e. they can float). This means that within a block group, the only data structures with fixed locations are the superblock -and the group descriptor table. The flex\_bg mechanism uses this +and the group descriptor table. The flex_bg mechanism uses this property to group several block groups into a flex group and lay out all of the groups' bitmaps and inode tables into one long run in the first group of the flex group. -If the meta\_bg feature flag is set, then several block groups are -grouped together into a meta group. Note that in the meta\_bg case, +If the meta_bg feature flag is set, then several block groups are +grouped together into a meta group. Note that in the meta_bg case, however, the first and last two block groups within the larger meta group contain only group descriptors for the groups inside the meta group. -flex\_bg and meta\_bg do not appear to be mutually exclusive features. +flex_bg and meta_bg do not appear to be mutually exclusive features. In ext2, ext3, and ext4 (when the 64bit feature is not enabled), the block group descriptor was only 32 bytes long and therefore ends at -bg\_checksum. On an ext4 filesystem with the 64bit feature enabled, the +bg_checksum. On an ext4 filesystem with the 64bit feature enabled, the block group descriptor expands to at least the 64 bytes described below; the size is stored in the superblock. -If gdt\_csum is set and metadata\_csum is not set, the block group +If gdt_csum is set and metadata_csum is not set, the block group checksum is the crc16 of the FS UUID, the group number, and the group -descriptor structure. If metadata\_csum is set, then the block group +descriptor structure. If metadata_csum is set, then the block group checksum is the lower 16 bits of the checksum of the FS UUID, the group number, and the group descriptor structure. Both block and inode bitmap checksums are calculated against the FS UUID, the group number, and the @@ -51,59 +51,59 @@ The block group descriptor is laid out in ``struct ext4_group_desc``. - Name - Description * - 0x0 - - \_\_le32 - - bg\_block\_bitmap\_lo + - __le32 + - bg_block_bitmap_lo - Lower 32-bits of location of block bitmap. * - 0x4 - - \_\_le32 - - bg\_inode\_bitmap\_lo + - __le32 + - bg_inode_bitmap_lo - Lower 32-bits of location of inode bitmap. * - 0x8 - - \_\_le32 - - bg\_inode\_table\_lo + - __le32 + - bg_inode_table_lo - Lower 32-bits of location of inode table. * - 0xC - - \_\_le16 - - bg\_free\_blocks\_count\_lo + - __le16 + - bg_free_blocks_count_lo - Lower 16-bits of free block count. * - 0xE - - \_\_le16 - - bg\_free\_inodes\_count\_lo + - __le16 + - bg_free_inodes_count_lo - Lower 16-bits of free inode count. * - 0x10 - - \_\_le16 - - bg\_used\_dirs\_count\_lo + - __le16 + - bg_used_dirs_count_lo - Lower 16-bits of directory count. * - 0x12 - - \_\_le16 - - bg\_flags + - __le16 + - bg_flags - Block group flags. See the bgflags_ table below. * - 0x14 - - \_\_le32 - - bg\_exclude\_bitmap\_lo + - __le32 + - bg_exclude_bitmap_lo - Lower 32-bits of location of snapshot exclusion bitmap. * - 0x18 - - \_\_le16 - - bg\_block\_bitmap\_csum\_lo + - __le16 + - bg_block_bitmap_csum_lo - Lower 16-bits of the block bitmap checksum. * - 0x1A - - \_\_le16 - - bg\_inode\_bitmap\_csum\_lo + - __le16 + - bg_inode_bitmap_csum_lo - Lower 16-bits of the inode bitmap checksum. * - 0x1C - - \_\_le16 - - bg\_itable\_unused\_lo + - __le16 + - bg_itable_unused_lo - Lower 16-bits of unused inode count. If set, we needn't scan past the - ``(sb.s_inodes_per_group - gdt.bg_itable_unused)``\ th entry in the + ``(sb.s_inodes_per_group - gdt.bg_itable_unused)`` th entry in the inode table for this group. * - 0x1E - - \_\_le16 - - bg\_checksum - - Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the - RO\_COMPAT\_GDT\_CSUM feature is set, or - crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the - RO\_COMPAT\_METADATA\_CSUM feature is set. The bg\_checksum - field in bg\_desc is skipped when calculating crc16 checksum, + - __le16 + - bg_checksum + - Group descriptor checksum; crc16(sb_uuid+group_num+bg_desc) if the + RO_COMPAT_GDT_CSUM feature is set, or + crc32c(sb_uuid+group_num+bg_desc) & 0xFFFF if the + RO_COMPAT_METADATA_CSUM feature is set. The bg_checksum + field in bg_desc is skipped when calculating crc16 checksum, and set to zero if crc32c checksum is used. * - - @@ -111,48 +111,48 @@ The block group descriptor is laid out in ``struct ext4_group_desc``. - These fields only exist if the 64bit feature is enabled and s_desc_size > 32. * - 0x20 - - \_\_le32 - - bg\_block\_bitmap\_hi + - __le32 + - bg_block_bitmap_hi - Upper 32-bits of location of block bitmap. * - 0x24 - - \_\_le32 - - bg\_inode\_bitmap\_hi + - __le32 + - bg_inode_bitmap_hi - Upper 32-bits of location of inodes bitmap. * - 0x28 - - \_\_le32 - - bg\_inode\_table\_hi + - __le32 + - bg_inode_table_hi - Upper 32-bits of location of inodes table. * - 0x2C - - \_\_le16 - - bg\_free\_blocks\_count\_hi + - __le16 + - bg_free_blocks_count_hi - Upper 16-bits of free block count. * - 0x2E - - \_\_le16 - - bg\_free\_inodes\_count\_hi + - __le16 + - bg_free_inodes_count_hi - Upper 16-bits of free inode count. * - 0x30 - - \_\_le16 - - bg\_used\_dirs\_count\_hi + - __le16 + - bg_used_dirs_count_hi - Upper 16-bits of directory count. * - 0x32 - - \_\_le16 - - bg\_itable\_unused\_hi + - __le16 + - bg_itable_unused_hi - Upper 16-bits of unused inode count. * - 0x34 - - \_\_le32 - - bg\_exclude\_bitmap\_hi + - __le32 + - bg_exclude_bitmap_hi - Upper 32-bits of location of snapshot exclusion bitmap. * - 0x38 - - \_\_le16 - - bg\_block\_bitmap\_csum\_hi + - __le16 + - bg_block_bitmap_csum_hi - Upper 16-bits of the block bitmap checksum. * - 0x3A - - \_\_le16 - - bg\_inode\_bitmap\_csum\_hi + - __le16 + - bg_inode_bitmap_csum_hi - Upper 16-bits of the inode bitmap checksum. * - 0x3C - - \_\_u32 - - bg\_reserved + - __u32 + - bg_reserved - Padding to 64 bytes. .. _bgflags: @@ -166,8 +166,8 @@ Block group flags can be any combination of the following: * - Value - Description * - 0x1 - - inode table and bitmap are not initialized (EXT4\_BG\_INODE\_UNINIT). + - inode table and bitmap are not initialized (EXT4_BG_INODE_UNINIT). * - 0x2 - - block bitmap is not initialized (EXT4\_BG\_BLOCK\_UNINIT). + - block bitmap is not initialized (EXT4_BG_BLOCK_UNINIT). * - 0x4 - - inode table is zeroed (EXT4\_BG\_INODE\_ZEROED). + - inode table is zeroed (EXT4_BG_INODE_ZEROED). diff --git a/Documentation/filesystems/ext4/ifork.rst b/Documentation/filesystems/ext4/ifork.rst index b9816d5a896b79fdac0c1c44abc768706fd0edc3..dc31f505e6c835bf590998a88676e9fa48d4c0f9 100644 --- a/Documentation/filesystems/ext4/ifork.rst +++ b/Documentation/filesystems/ext4/ifork.rst @@ -1,6 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 -The Contents of inode.i\_block +The Contents of inode.i_block ------------------------------ Depending on the type of file an inode describes, the 60 bytes of @@ -47,7 +47,7 @@ In ext4, the file to logical block map has been replaced with an extent tree. Under the old scheme, allocating a contiguous run of 1,000 blocks requires an indirect block to map all 1,000 entries; with extents, the mapping is reduced to a single ``struct ext4_extent`` with -``ee_len = 1000``. If flex\_bg is enabled, it is possible to allocate +``ee_len = 1000``. If flex_bg is enabled, it is possible to allocate very large files with a single extent, at a considerable reduction in metadata block use, and some improvement in disk efficiency. The inode must have the extents flag (0x80000) flag set for this feature to be in @@ -76,28 +76,28 @@ which is 12 bytes long: - Name - Description * - 0x0 - - \_\_le16 - - eh\_magic + - __le16 + - eh_magic - Magic number, 0xF30A. * - 0x2 - - \_\_le16 - - eh\_entries + - __le16 + - eh_entries - Number of valid entries following the header. * - 0x4 - - \_\_le16 - - eh\_max + - __le16 + - eh_max - Maximum number of entries that could follow the header. * - 0x6 - - \_\_le16 - - eh\_depth + - __le16 + - eh_depth - Depth of this extent node in the extent tree. 0 = this extent node points to data blocks; otherwise, this extent node points to other extent nodes. The extent tree can be at most 5 levels deep: a logical block number can be at most ``2^32``, and the smallest ``n`` that satisfies ``4*(((blocksize - 12)/12)^n) >= 2^32`` is 5. * - 0x8 - - \_\_le32 - - eh\_generation + - __le32 + - eh_generation - Generation of the tree. (Used by Lustre, but not standard ext4). Internal nodes of the extent tree, also known as index nodes, are @@ -112,22 +112,22 @@ recorded as ``struct ext4_extent_idx``, and are 12 bytes long: - Name - Description * - 0x0 - - \_\_le32 - - ei\_block + - __le32 + - ei_block - This index node covers file blocks from 'block' onward. * - 0x4 - - \_\_le32 - - ei\_leaf\_lo + - __le32 + - ei_leaf_lo - Lower 32-bits of the block number of the extent node that is the next level lower in the tree. The tree node pointed to can be either another internal node or a leaf node, described below. * - 0x8 - - \_\_le16 - - ei\_leaf\_hi + - __le16 + - ei_leaf_hi - Upper 16-bits of the previous field. * - 0xA - - \_\_u16 - - ei\_unused + - __u16 + - ei_unused - Leaf nodes of the extent tree are recorded as ``struct ext4_extent``, @@ -142,24 +142,24 @@ and are also 12 bytes long: - Name - Description * - 0x0 - - \_\_le32 - - ee\_block + - __le32 + - ee_block - First file block number that this extent covers. * - 0x4 - - \_\_le16 - - ee\_len + - __le16 + - ee_len - Number of blocks covered by extent. If the value of this field is <= 32768, the extent is initialized. If the value of the field is > 32768, the extent is uninitialized and the actual extent length is ``ee_len`` - 32768. Therefore, the maximum length of a initialized extent is 32768 blocks, and the maximum length of an uninitialized extent is 32767. * - 0x6 - - \_\_le16 - - ee\_start\_hi + - __le16 + - ee_start_hi - Upper 16-bits of the block number to which this extent points. * - 0x8 - - \_\_le32 - - ee\_start\_lo + - __le32 + - ee_start_lo - Lower 32-bits of the block number to which this extent points. Prior to the introduction of metadata checksums, the extent header + @@ -182,8 +182,8 @@ including) the checksum itself. - Name - Description * - 0x0 - - \_\_le32 - - eb\_checksum + - __le32 + - eb_checksum - Checksum of the extent block, crc32c(uuid+inum+igeneration+extentblock) Inline Data diff --git a/Documentation/filesystems/ext4/inlinedata.rst b/Documentation/filesystems/ext4/inlinedata.rst index d1075178ce0b2fe691f66ea04ab80c16a6c6a0f9..a728af0d2fd0c5810be515d14a36f11932616db7 100644 --- a/Documentation/filesystems/ext4/inlinedata.rst +++ b/Documentation/filesystems/ext4/inlinedata.rst @@ -11,12 +11,12 @@ file is smaller than 60 bytes, then the data are stored inline in attribute space, then it might be found as an extended attribute “system.data” within the inode body (“ibody EA”). This of course constrains the amount of extended attributes one can attach to an inode. -If the data size increases beyond i\_block + ibody EA, a regular block +If the data size increases beyond i_block + ibody EA, a regular block is allocated and the contents moved to that block. Pending a change to compact the extended attribute key used to store inline data, one ought to be able to store 160 bytes of data in a -256-byte inode (as of June 2015, when i\_extra\_isize is 28). Prior to +256-byte inode (as of June 2015, when i_extra_isize is 28). Prior to that, the limit was 156 bytes due to inefficient use of inode space. The inline data feature requires the presence of an extended attribute @@ -25,12 +25,12 @@ for “system.data”, even if the attribute value is zero length. Inline Directories ~~~~~~~~~~~~~~~~~~ -The first four bytes of i\_block are the inode number of the parent +The first four bytes of i_block are the inode number of the parent directory. Following that is a 56-byte space for an array of directory entries; see ``struct ext4_dir_entry``. If there is a “system.data” attribute in the inode body, the EA value is an array of ``struct ext4_dir_entry`` as well. Note that for inline directories, the -i\_block and EA space are treated as separate dirent blocks; directory +i_block and EA space are treated as separate dirent blocks; directory entries cannot span the two. Inline directory entries are not checksummed, as the inode checksum diff --git a/Documentation/filesystems/ext4/inodes.rst b/Documentation/filesystems/ext4/inodes.rst index 6c5ce666e63f3de5854bab4e1f2d565d19899fc1..cfc6c16599312ab3dc9be7f1ef904286b9b44248 100644 --- a/Documentation/filesystems/ext4/inodes.rst +++ b/Documentation/filesystems/ext4/inodes.rst @@ -38,138 +38,138 @@ The inode table entry is laid out in ``struct ext4_inode``. - Name - Description * - 0x0 - - \_\_le16 - - i\_mode + - __le16 + - i_mode - File mode. See the table i_mode_ below. * - 0x2 - - \_\_le16 - - i\_uid + - __le16 + - i_uid - Lower 16-bits of Owner UID. * - 0x4 - - \_\_le32 - - i\_size\_lo + - __le32 + - i_size_lo - Lower 32-bits of size in bytes. * - 0x8 - - \_\_le32 - - i\_atime - - Last access time, in seconds since the epoch. However, if the EA\_INODE + - __le32 + - i_atime + - Last access time, in seconds since the epoch. However, if the EA_INODE inode flag is set, this inode stores an extended attribute value and this field contains the checksum of the value. * - 0xC - - \_\_le32 - - i\_ctime + - __le32 + - i_ctime - Last inode change time, in seconds since the epoch. However, if the - EA\_INODE inode flag is set, this inode stores an extended attribute + EA_INODE inode flag is set, this inode stores an extended attribute value and this field contains the lower 32 bits of the attribute value's reference count. * - 0x10 - - \_\_le32 - - i\_mtime + - __le32 + - i_mtime - Last data modification time, in seconds since the epoch. However, if the - EA\_INODE inode flag is set, this inode stores an extended attribute + EA_INODE inode flag is set, this inode stores an extended attribute value and this field contains the number of the inode that owns the extended attribute. * - 0x14 - - \_\_le32 - - i\_dtime + - __le32 + - i_dtime - Deletion Time, in seconds since the epoch. * - 0x18 - - \_\_le16 - - i\_gid + - __le16 + - i_gid - Lower 16-bits of GID. * - 0x1A - - \_\_le16 - - i\_links\_count + - __le16 + - i_links_count - Hard link count. Normally, ext4 does not permit an inode to have more than 65,000 hard links. This applies to files as well as directories, which means that there cannot be more than 64,998 subdirectories in a directory (each subdirectory's '..' entry counts as a hard link, as does - the '.' entry in the directory itself). With the DIR\_NLINK feature + the '.' entry in the directory itself). With the DIR_NLINK feature enabled, ext4 supports more than 64,998 subdirectories by setting this field to 1 to indicate that the number of hard links is not known. * - 0x1C - - \_\_le32 - - i\_blocks\_lo - - Lower 32-bits of “block” count. If the huge\_file feature flag is not + - __le32 + - i_blocks_lo + - Lower 32-bits of “block” count. If the huge_file feature flag is not set on the filesystem, the file consumes ``i_blocks_lo`` 512-byte blocks - on disk. If huge\_file is set and EXT4\_HUGE\_FILE\_FL is NOT set in + on disk. If huge_file is set and EXT4_HUGE_FILE_FL is NOT set in ``inode.i_flags``, then the file consumes ``i_blocks_lo + (i_blocks_hi - << 32)`` 512-byte blocks on disk. If huge\_file is set and - EXT4\_HUGE\_FILE\_FL IS set in ``inode.i_flags``, then this file + << 32)`` 512-byte blocks on disk. If huge_file is set and + EXT4_HUGE_FILE_FL IS set in ``inode.i_flags``, then this file consumes (``i_blocks_lo + i_blocks_hi`` << 32) filesystem blocks on disk. * - 0x20 - - \_\_le32 - - i\_flags + - __le32 + - i_flags - Inode flags. See the table i_flags_ below. * - 0x24 - 4 bytes - - i\_osd1 + - i_osd1 - See the table i_osd1_ for more details. * - 0x28 - 60 bytes - - i\_block[EXT4\_N\_BLOCKS=15] - - Block map or extent tree. See the section “The Contents of inode.i\_block”. + - i_block[EXT4_N_BLOCKS=15] + - Block map or extent tree. See the section “The Contents of inode.i_block”. * - 0x64 - - \_\_le32 - - i\_generation + - __le32 + - i_generation - File version (for NFS). * - 0x68 - - \_\_le32 - - i\_file\_acl\_lo + - __le32 + - i_file_acl_lo - Lower 32-bits of extended attribute block. ACLs are of course one of many possible extended attributes; I think the name of this field is a result of the first use of extended attributes being for ACLs. * - 0x6C - - \_\_le32 - - i\_size\_high / i\_dir\_acl + - __le32 + - i_size_high / i_dir_acl - Upper 32-bits of file/directory size. In ext2/3 this field was named - i\_dir\_acl, though it was usually set to zero and never used. + i_dir_acl, though it was usually set to zero and never used. * - 0x70 - - \_\_le32 - - i\_obso\_faddr + - __le32 + - i_obso_faddr - (Obsolete) fragment address. * - 0x74 - 12 bytes - - i\_osd2 + - i_osd2 - See the table i_osd2_ for more details. * - 0x80 - - \_\_le16 - - i\_extra\_isize + - __le16 + - i_extra_isize - Size of this inode - 128. Alternately, the size of the extended inode fields beyond the original ext2 inode, including this field. * - 0x82 - - \_\_le16 - - i\_checksum\_hi + - __le16 + - i_checksum_hi - Upper 16-bits of the inode checksum. * - 0x84 - - \_\_le32 - - i\_ctime\_extra + - __le32 + - i_ctime_extra - Extra change time bits. This provides sub-second precision. See Inode Timestamps section. * - 0x88 - - \_\_le32 - - i\_mtime\_extra + - __le32 + - i_mtime_extra - Extra modification time bits. This provides sub-second precision. * - 0x8C - - \_\_le32 - - i\_atime\_extra + - __le32 + - i_atime_extra - Extra access time bits. This provides sub-second precision. * - 0x90 - - \_\_le32 - - i\_crtime + - __le32 + - i_crtime - File creation time, in seconds since the epoch. * - 0x94 - - \_\_le32 - - i\_crtime\_extra + - __le32 + - i_crtime_extra - Extra file creation time bits. This provides sub-second precision. * - 0x98 - - \_\_le32 - - i\_version\_hi + - __le32 + - i_version_hi - Upper 32-bits for version number. * - 0x9C - - \_\_le32 - - i\_projid + - __le32 + - i_projid - Project ID. .. _i_mode: @@ -183,45 +183,45 @@ The ``i_mode`` value is a combination of the following flags: * - Value - Description * - 0x1 - - S\_IXOTH (Others may execute) + - S_IXOTH (Others may execute) * - 0x2 - - S\_IWOTH (Others may write) + - S_IWOTH (Others may write) * - 0x4 - - S\_IROTH (Others may read) + - S_IROTH (Others may read) * - 0x8 - - S\_IXGRP (Group members may execute) + - S_IXGRP (Group members may execute) * - 0x10 - - S\_IWGRP (Group members may write) + - S_IWGRP (Group members may write) * - 0x20 - - S\_IRGRP (Group members may read) + - S_IRGRP (Group members may read) * - 0x40 - - S\_IXUSR (Owner may execute) + - S_IXUSR (Owner may execute) * - 0x80 - - S\_IWUSR (Owner may write) + - S_IWUSR (Owner may write) * - 0x100 - - S\_IRUSR (Owner may read) + - S_IRUSR (Owner may read) * - 0x200 - - S\_ISVTX (Sticky bit) + - S_ISVTX (Sticky bit) * - 0x400 - - S\_ISGID (Set GID) + - S_ISGID (Set GID) * - 0x800 - - S\_ISUID (Set UID) + - S_ISUID (Set UID) * - - These are mutually-exclusive file types: * - 0x1000 - - S\_IFIFO (FIFO) + - S_IFIFO (FIFO) * - 0x2000 - - S\_IFCHR (Character device) + - S_IFCHR (Character device) * - 0x4000 - - S\_IFDIR (Directory) + - S_IFDIR (Directory) * - 0x6000 - - S\_IFBLK (Block device) + - S_IFBLK (Block device) * - 0x8000 - - S\_IFREG (Regular file) + - S_IFREG (Regular file) * - 0xA000 - - S\_IFLNK (Symbolic link) + - S_IFLNK (Symbolic link) * - 0xC000 - - S\_IFSOCK (Socket) + - S_IFSOCK (Socket) .. _i_flags: @@ -234,56 +234,56 @@ The ``i_flags`` field is a combination of these values: * - Value - Description * - 0x1 - - This file requires secure deletion (EXT4\_SECRM\_FL). (not implemented) + - This file requires secure deletion (EXT4_SECRM_FL). (not implemented) * - 0x2 - This file should be preserved, should undeletion be desired - (EXT4\_UNRM\_FL). (not implemented) + (EXT4_UNRM_FL). (not implemented) * - 0x4 - - File is compressed (EXT4\_COMPR\_FL). (not really implemented) + - File is compressed (EXT4_COMPR_FL). (not really implemented) * - 0x8 - - All writes to the file must be synchronous (EXT4\_SYNC\_FL). + - All writes to the file must be synchronous (EXT4_SYNC_FL). * - 0x10 - - File is immutable (EXT4\_IMMUTABLE\_FL). + - File is immutable (EXT4_IMMUTABLE_FL). * - 0x20 - - File can only be appended (EXT4\_APPEND\_FL). + - File can only be appended (EXT4_APPEND_FL). * - 0x40 - - The dump(1) utility should not dump this file (EXT4\_NODUMP\_FL). + - The dump(1) utility should not dump this file (EXT4_NODUMP_FL). * - 0x80 - - Do not update access time (EXT4\_NOATIME\_FL). + - Do not update access time (EXT4_NOATIME_FL). * - 0x100 - - Dirty compressed file (EXT4\_DIRTY\_FL). (not used) + - Dirty compressed file (EXT4_DIRTY_FL). (not used) * - 0x200 - - File has one or more compressed clusters (EXT4\_COMPRBLK\_FL). (not used) + - File has one or more compressed clusters (EXT4_COMPRBLK_FL). (not used) * - 0x400 - - Do not compress file (EXT4\_NOCOMPR\_FL). (not used) + - Do not compress file (EXT4_NOCOMPR_FL). (not used) * - 0x800 - - Encrypted inode (EXT4\_ENCRYPT\_FL). This bit value previously was - EXT4\_ECOMPR\_FL (compression error), which was never used. + - Encrypted inode (EXT4_ENCRYPT_FL). This bit value previously was + EXT4_ECOMPR_FL (compression error), which was never used. * - 0x1000 - - Directory has hashed indexes (EXT4\_INDEX\_FL). + - Directory has hashed indexes (EXT4_INDEX_FL). * - 0x2000 - - AFS magic directory (EXT4\_IMAGIC\_FL). + - AFS magic directory (EXT4_IMAGIC_FL). * - 0x4000 - File data must always be written through the journal - (EXT4\_JOURNAL\_DATA\_FL). + (EXT4_JOURNAL_DATA_FL). * - 0x8000 - - File tail should not be merged (EXT4\_NOTAIL\_FL). (not used by ext4) + - File tail should not be merged (EXT4_NOTAIL_FL). (not used by ext4) * - 0x10000 - All directory entry data should be written synchronously (see - ``dirsync``) (EXT4\_DIRSYNC\_FL). + ``dirsync``) (EXT4_DIRSYNC_FL). * - 0x20000 - - Top of directory hierarchy (EXT4\_TOPDIR\_FL). + - Top of directory hierarchy (EXT4_TOPDIR_FL). * - 0x40000 - - This is a huge file (EXT4\_HUGE\_FILE\_FL). + - This is a huge file (EXT4_HUGE_FILE_FL). * - 0x80000 - - Inode uses extents (EXT4\_EXTENTS\_FL). + - Inode uses extents (EXT4_EXTENTS_FL). * - 0x100000 - - Verity protected file (EXT4\_VERITY\_FL). + - Verity protected file (EXT4_VERITY_FL). * - 0x200000 - Inode stores a large extended attribute value in its data blocks - (EXT4\_EA\_INODE\_FL). + (EXT4_EA_INODE_FL). * - 0x400000 - - This file has blocks allocated past EOF (EXT4\_EOFBLOCKS\_FL). + - This file has blocks allocated past EOF (EXT4_EOFBLOCKS_FL). (deprecated) * - 0x01000000 - Inode is a snapshot (``EXT4_SNAPFILE_FL``). (not in mainline) @@ -294,21 +294,21 @@ The ``i_flags`` field is a combination of these values: - Snapshot shrink has completed (``EXT4_SNAPFILE_SHRUNK_FL``). (not in mainline) * - 0x10000000 - - Inode has inline data (EXT4\_INLINE\_DATA\_FL). + - Inode has inline data (EXT4_INLINE_DATA_FL). * - 0x20000000 - - Create children with the same project ID (EXT4\_PROJINHERIT\_FL). + - Create children with the same project ID (EXT4_PROJINHERIT_FL). * - 0x80000000 - - Reserved for ext4 library (EXT4\_RESERVED\_FL). + - Reserved for ext4 library (EXT4_RESERVED_FL). * - - Aggregate flags: * - 0x705BDFFF - User-visible flags. * - 0x604BC0FF - - User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and - EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's - EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of + - User-modifiable flags. Note that while EXT4_JOURNAL_DATA_FL and + EXT4_EXTENTS_FL can be set with setattr, they are not in the kernel's + EXT4_FL_USER_MODIFIABLE mask, since it needs to handle the setting of these flags in a special manner and they are masked out of the set of - flags that are saved directly to i\_flags. + flags that are saved directly to i_flags. .. _i_osd1: @@ -325,9 +325,9 @@ Linux: - Name - Description * - 0x0 - - \_\_le32 - - l\_i\_version - - Inode version. However, if the EA\_INODE inode flag is set, this inode + - __le32 + - l_i_version + - Inode version. However, if the EA_INODE inode flag is set, this inode stores an extended attribute value and this field contains the upper 32 bits of the attribute value's reference count. @@ -342,8 +342,8 @@ Hurd: - Name - Description * - 0x0 - - \_\_le32 - - h\_i\_translator + - __le32 + - h_i_translator - ?? Masix: @@ -357,8 +357,8 @@ Masix: - Name - Description * - 0x0 - - \_\_le32 - - m\_i\_reserved + - __le32 + - m_i_reserved - ?? .. _i_osd2: @@ -376,30 +376,30 @@ Linux: - Name - Description * - 0x0 - - \_\_le16 - - l\_i\_blocks\_high + - __le16 + - l_i_blocks_high - Upper 16-bits of the block count. Please see the note attached to - i\_blocks\_lo. + i_blocks_lo. * - 0x2 - - \_\_le16 - - l\_i\_file\_acl\_high + - __le16 + - l_i_file_acl_high - Upper 16-bits of the extended attribute block (historically, the file ACL location). See the Extended Attributes section below. * - 0x4 - - \_\_le16 - - l\_i\_uid\_high + - __le16 + - l_i_uid_high - Upper 16-bits of the Owner UID. * - 0x6 - - \_\_le16 - - l\_i\_gid\_high + - __le16 + - l_i_gid_high - Upper 16-bits of the GID. * - 0x8 - - \_\_le16 - - l\_i\_checksum\_lo + - __le16 + - l_i_checksum_lo - Lower 16-bits of the inode checksum. * - 0xA - - \_\_le16 - - l\_i\_reserved + - __le16 + - l_i_reserved - Unused. Hurd: @@ -413,24 +413,24 @@ Hurd: - Name - Description * - 0x0 - - \_\_le16 - - h\_i\_reserved1 + - __le16 + - h_i_reserved1 - ?? * - 0x2 - - \_\_u16 - - h\_i\_mode\_high + - __u16 + - h_i_mode_high - Upper 16-bits of the file mode. * - 0x4 - - \_\_le16 - - h\_i\_uid\_high + - __le16 + - h_i_uid_high - Upper 16-bits of the Owner UID. * - 0x6 - - \_\_le16 - - h\_i\_gid\_high + - __le16 + - h_i_gid_high - Upper 16-bits of the GID. * - 0x8 - - \_\_u32 - - h\_i\_author + - __u32 + - h_i_author - Author code? Masix: @@ -444,17 +444,17 @@ Masix: - Name - Description * - 0x0 - - \_\_le16 - - h\_i\_reserved1 + - __le16 + - h_i_reserved1 - ?? * - 0x2 - - \_\_u16 - - m\_i\_file\_acl\_high + - __u16 + - m_i_file_acl_high - Upper 16-bits of the extended attribute block (historically, the file ACL location). * - 0x4 - - \_\_u32 - - m\_i\_reserved2[2] + - __u32 + - m_i_reserved2[2] - ?? Inode Size @@ -466,11 +466,11 @@ In ext2 and ext3, the inode structure size was fixed at 128 bytes on-disk inode at format time for all inodes in the filesystem to provide space beyond the end of the original ext2 inode. The on-disk inode record size is recorded in the superblock as ``s_inode_size``. The -number of bytes actually used by struct ext4\_inode beyond the original +number of bytes actually used by struct ext4_inode beyond the original 128-byte ext2 inode is recorded in the ``i_extra_isize`` field for each -inode, which allows struct ext4\_inode to grow for a new kernel without +inode, which allows struct ext4_inode to grow for a new kernel without having to upgrade all of the on-disk inodes. Access to fields beyond -EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within +EXT2_GOOD_OLD_INODE_SIZE should be verified to be within ``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as of August 2019) the inode structure is 160 bytes (``i_extra_isize = 32``). The extra space between the end of the inode @@ -516,7 +516,7 @@ creation time (crtime); this field is 64-bits wide and decoded in the same manner as 64-bit [cma]time. Neither crtime nor dtime are accessible through the regular stat() interface, though debugfs will report them. -We use the 32-bit signed time value plus (2^32 \* (extra epoch bits)). +We use the 32-bit signed time value plus (2^32 * (extra epoch bits)). In other words: .. list-table:: @@ -525,8 +525,8 @@ In other words: * - Extra epoch bits - MSB of 32-bit time - - Adjustment for signed 32-bit to 64-bit tv\_sec - - Decoded 64-bit tv\_sec + - Adjustment for signed 32-bit to 64-bit tv_sec + - Decoded 64-bit tv_sec - valid time range * - 0 0 - 1 diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst index 5fad38860f17432b537bf33aa6c5df7d2c54d085..a6bef5293a6005117fe8c34eeb1b7591a6223e2d 100644 --- a/Documentation/filesystems/ext4/journal.rst +++ b/Documentation/filesystems/ext4/journal.rst @@ -63,8 +63,8 @@ Generally speaking, the journal has this format: :header-rows: 1 * - Superblock - - descriptor\_block (data\_blocks or revocation\_block) [more data or - revocations] commmit\_block + - descriptor_block (data_blocks or revocation_block) [more data or + revocations] commmit_block - [more transactions...] * - - One transaction @@ -93,8 +93,8 @@ superblock. * - 1024 bytes of padding - ext4 Superblock - Journal Superblock - - descriptor\_block (data\_blocks or revocation\_block) [more data or - revocations] commmit\_block + - descriptor_block (data_blocks or revocation_block) [more data or + revocations] commmit_block - [more transactions...] * - - @@ -117,17 +117,17 @@ Every block in the journal starts with a common 12-byte header - Name - Description * - 0x0 - - \_\_be32 - - h\_magic + - __be32 + - h_magic - jbd2 magic number, 0xC03B3998. * - 0x4 - - \_\_be32 - - h\_blocktype + - __be32 + - h_blocktype - Description of what this block contains. See the jbd2_blocktype_ table below. * - 0x8 - - \_\_be32 - - h\_sequence + - __be32 + - h_sequence - The transaction ID that goes with this block. .. _jbd2_blocktype: @@ -177,99 +177,99 @@ which is 1024 bytes long: - - Static information describing the journal. * - 0x0 - - journal\_header\_t (12 bytes) - - s\_header + - journal_header_t (12 bytes) + - s_header - Common header identifying this as a superblock. * - 0xC - - \_\_be32 - - s\_blocksize + - __be32 + - s_blocksize - Journal device block size. * - 0x10 - - \_\_be32 - - s\_maxlen + - __be32 + - s_maxlen - Total number of blocks in this journal. * - 0x14 - - \_\_be32 - - s\_first + - __be32 + - s_first - First block of log information. * - - - - Dynamic information describing the current state of the log. * - 0x18 - - \_\_be32 - - s\_sequence + - __be32 + - s_sequence - First commit ID expected in log. * - 0x1C - - \_\_be32 - - s\_start + - __be32 + - s_start - Block number of the start of log. Contrary to the comments, this field being zero does not imply that the journal is clean! * - 0x20 - - \_\_be32 - - s\_errno - - Error value, as set by jbd2\_journal\_abort(). + - __be32 + - s_errno + - Error value, as set by jbd2_journal_abort(). * - - - - The remaining fields are only valid in a v2 superblock. * - 0x24 - - \_\_be32 - - s\_feature\_compat; + - __be32 + - s_feature_compat; - Compatible feature set. See the table jbd2_compat_ below. * - 0x28 - - \_\_be32 - - s\_feature\_incompat + - __be32 + - s_feature_incompat - Incompatible feature set. See the table jbd2_incompat_ below. * - 0x2C - - \_\_be32 - - s\_feature\_ro\_compat + - __be32 + - s_feature_ro_compat - Read-only compatible feature set. There aren't any of these currently. * - 0x30 - - \_\_u8 - - s\_uuid[16] + - __u8 + - s_uuid[16] - 128-bit uuid for journal. This is compared against the copy in the ext4 super block at mount time. * - 0x40 - - \_\_be32 - - s\_nr\_users + - __be32 + - s_nr_users - Number of file systems sharing this journal. * - 0x44 - - \_\_be32 - - s\_dynsuper + - __be32 + - s_dynsuper - Location of dynamic super block copy. (Not used?) * - 0x48 - - \_\_be32 - - s\_max\_transaction + - __be32 + - s_max_transaction - Limit of journal blocks per transaction. (Not used?) * - 0x4C - - \_\_be32 - - s\_max\_trans\_data + - __be32 + - s_max_trans_data - Limit of data blocks per transaction. (Not used?) * - 0x50 - - \_\_u8 - - s\_checksum\_type + - __u8 + - s_checksum_type - Checksum algorithm used for the journal. See jbd2_checksum_type_ for more info. * - 0x51 - - \_\_u8[3] - - s\_padding2 + - __u8[3] + - s_padding2 - * - 0x54 - - \_\_be32 - - s\_num\_fc\_blocks + - __be32 + - s_num_fc_blocks - Number of fast commit blocks in the journal. * - 0x58 - - \_\_u32 - - s\_padding[42] + - __u32 + - s_padding[42] - * - 0xFC - - \_\_be32 - - s\_checksum + - __be32 + - s_checksum - Checksum of the entire superblock, with this field set to zero. * - 0x100 - - \_\_u8 - - s\_users[16\*48] + - __u8 + - s_users[16*48] - ids of all file systems sharing the log. e2fsprogs/Linux don't allow shared external journals, but I imagine Lustre (or ocfs2?), which use the jbd2 code, might. @@ -286,7 +286,7 @@ The journal compat features are any combination of the following: - Description * - 0x1 - Journal maintains checksums on the data blocks. - (JBD2\_FEATURE\_COMPAT\_CHECKSUM) + (JBD2_FEATURE_COMPAT_CHECKSUM) .. _jbd2_incompat: @@ -299,23 +299,23 @@ The journal incompat features are any combination of the following: * - Value - Description * - 0x1 - - Journal has block revocation records. (JBD2\_FEATURE\_INCOMPAT\_REVOKE) + - Journal has block revocation records. (JBD2_FEATURE_INCOMPAT_REVOKE) * - 0x2 - Journal can deal with 64-bit block numbers. - (JBD2\_FEATURE\_INCOMPAT\_64BIT) + (JBD2_FEATURE_INCOMPAT_64BIT) * - 0x4 - - Journal commits asynchronously. (JBD2\_FEATURE\_INCOMPAT\_ASYNC\_COMMIT) + - Journal commits asynchronously. (JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) * - 0x8 - This journal uses v2 of the checksum on-disk format. Each journal metadata block gets its own checksum, and the block tags in the descriptor table contain checksums for each of the data blocks in the - journal. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2) + journal. (JBD2_FEATURE_INCOMPAT_CSUM_V2) * - 0x10 - This journal uses v3 of the checksum on-disk format. This is the same as v2, but the journal block tag size is fixed regardless of the size of - block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3) + block numbers. (JBD2_FEATURE_INCOMPAT_CSUM_V3) * - 0x20 - - Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) + - Journal has fast commit blocks. (JBD2_FEATURE_INCOMPAT_FAST_COMMIT) .. _jbd2_checksum_type: @@ -355,11 +355,11 @@ Descriptor blocks consume at least 36 bytes, but use a full block: - Name - Descriptor * - 0x0 - - journal\_header\_t + - journal_header_t - (open coded) - Common block header. * - 0xC - - struct journal\_block\_tag\_s + - struct journal_block_tag_s - open coded array[] - Enough tags either to fill up the block or to describe all the data blocks that follow this descriptor block. @@ -367,7 +367,7 @@ Descriptor blocks consume at least 36 bytes, but use a full block: Journal block tags have any of the following formats, depending on which journal feature and block tag flags are set. -If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is set, the journal block tag is +If JBD2_FEATURE_INCOMPAT_CSUM_V3 is set, the journal block tag is defined as ``struct journal_block_tag3_s``, which looks like the following. The size is 16 or 32 bytes. @@ -380,24 +380,24 @@ following. The size is 16 or 32 bytes. - Name - Descriptor * - 0x0 - - \_\_be32 - - t\_blocknr + - __be32 + - t_blocknr - Lower 32-bits of the location of where the corresponding data block should end up on disk. * - 0x4 - - \_\_be32 - - t\_flags + - __be32 + - t_flags - Flags that go with the descriptor. See the table jbd2_tag_flags_ for more info. * - 0x8 - - \_\_be32 - - t\_blocknr\_high + - __be32 + - t_blocknr_high - Upper 32-bits of the location of where the corresponding data block - should end up on disk. This is zero if JBD2\_FEATURE\_INCOMPAT\_64BIT is + should end up on disk. This is zero if JBD2_FEATURE_INCOMPAT_64BIT is not enabled. * - 0xC - - \_\_be32 - - t\_checksum + - __be32 + - t_checksum - Checksum of the journal UUID, the sequence number, and the data block. * - - @@ -433,7 +433,7 @@ The journal tag flags are any combination of the following: * - 0x8 - This is the last tag in this descriptor block. -If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is NOT set, the journal block tag +If JBD2_FEATURE_INCOMPAT_CSUM_V3 is NOT set, the journal block tag is defined as ``struct journal_block_tag_s``, which looks like the following. The size is 8, 12, 24, or 28 bytes: @@ -446,18 +446,18 @@ following. The size is 8, 12, 24, or 28 bytes: - Name - Descriptor * - 0x0 - - \_\_be32 - - t\_blocknr + - __be32 + - t_blocknr - Lower 32-bits of the location of where the corresponding data block should end up on disk. * - 0x4 - - \_\_be16 - - t\_checksum + - __be16 + - t_checksum - Checksum of the journal UUID, the sequence number, and the data block. Note that only the lower 16 bits are stored. * - 0x6 - - \_\_be16 - - t\_flags + - __be16 + - t_flags - Flags that go with the descriptor. See the table jbd2_tag_flags_ for more info. * - @@ -466,8 +466,8 @@ following. The size is 8, 12, 24, or 28 bytes: - This next field is only present if the super block indicates support for 64-bit block numbers. * - 0x8 - - \_\_be32 - - t\_blocknr\_high + - __be32 + - t_blocknr_high - Upper 32-bits of the location of where the corresponding data block should end up on disk. * - @@ -483,8 +483,8 @@ following. The size is 8, 12, 24, or 28 bytes: ``j_uuid`` field in ``struct journal_s``, but only tune2fs touches that field. -If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or -JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a +If JBD2_FEATURE_INCOMPAT_CSUM_V2 or +JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the block is a ``struct jbd2_journal_block_tail``, which looks like this: .. list-table:: @@ -496,8 +496,8 @@ JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a - Name - Descriptor * - 0x0 - - \_\_be32 - - t\_checksum + - __be32 + - t_checksum - Checksum of the journal UUID + the descriptor block, with this field set to zero. @@ -538,25 +538,25 @@ length, but use a full block: - Name - Description * - 0x0 - - journal\_header\_t - - r\_header + - journal_header_t + - r_header - Common block header. * - 0xC - - \_\_be32 - - r\_count + - __be32 + - r_count - Number of bytes used in this block. * - 0x10 - - \_\_be32 or \_\_be64 + - __be32 or __be64 - blocks[0] - Blocks to revoke. -After r\_count is a linear array of block numbers that are effectively +After r_count is a linear array of block numbers that are effectively revoked by this transaction. The size of each block number is 8 bytes if the superblock advertises 64-bit block number support, or 4 bytes otherwise. -If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or -JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the revocation +If JBD2_FEATURE_INCOMPAT_CSUM_V2 or +JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the revocation block is a ``struct jbd2_journal_revoke_tail``, which has this format: .. list-table:: @@ -568,8 +568,8 @@ block is a ``struct jbd2_journal_revoke_tail``, which has this format: - Name - Description * - 0x0 - - \_\_be32 - - r\_checksum + - __be32 + - r_checksum - Checksum of the journal UUID + revocation block Commit Block @@ -592,38 +592,38 @@ bytes long (but uses a full block): - Name - Descriptor * - 0x0 - - journal\_header\_s + - journal_header_s - (open coded) - Common block header. * - 0xC - unsigned char - - h\_chksum\_type + - h_chksum_type - The type of checksum to use to verify the integrity of the data blocks in the transaction. See jbd2_checksum_type_ for more info. * - 0xD - unsigned char - - h\_chksum\_size + - h_chksum_size - The number of bytes used by the checksum. Most likely 4. * - 0xE - unsigned char - - h\_padding[2] + - h_padding[2] - * - 0x10 - - \_\_be32 - - h\_chksum[JBD2\_CHECKSUM\_BYTES] + - __be32 + - h_chksum[JBD2_CHECKSUM_BYTES] - 32 bytes of space to store checksums. If - JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 + JBD2_FEATURE_INCOMPAT_CSUM_V2 or JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the first ``__be32`` is the checksum of the journal UUID and the entire commit block, with this field zeroed. If - JBD2\_FEATURE\_COMPAT\_CHECKSUM is set, the first ``__be32`` is the + JBD2_FEATURE_COMPAT_CHECKSUM is set, the first ``__be32`` is the crc32 of all the blocks already written to the transaction. * - 0x30 - - \_\_be64 - - h\_commit\_sec + - __be64 + - h_commit_sec - The time that the transaction was committed, in seconds since the epoch. * - 0x38 - - \_\_be32 - - h\_commit\_nsec + - __be32 + - h_commit_nsec - Nanoseconds component of the above timestamp. Fast commits diff --git a/Documentation/filesystems/ext4/mmp.rst b/Documentation/filesystems/ext4/mmp.rst index 25660981d93c2d417855129298733a454b9765bd..174dd6538737d8f343a823b56fae3bbaa2c55cb9 100644 --- a/Documentation/filesystems/ext4/mmp.rst +++ b/Documentation/filesystems/ext4/mmp.rst @@ -7,8 +7,8 @@ Multiple mount protection (MMP) is a feature that protects the filesystem against multiple hosts trying to use the filesystem simultaneously. When a filesystem is opened (for mounting, or fsck, etc.), the MMP code running on the node (call it node A) checks a -sequence number. If the sequence number is EXT4\_MMP\_SEQ\_CLEAN, the -open continues. If the sequence number is EXT4\_MMP\_SEQ\_FSCK, then +sequence number. If the sequence number is EXT4_MMP_SEQ_CLEAN, the +open continues. If the sequence number is EXT4_MMP_SEQ_FSCK, then fsck is (hopefully) running, and open fails immediately. Otherwise, the open code will wait for twice the specified MMP check interval and check the sequence number again. If the sequence number has changed, then the @@ -40,38 +40,38 @@ The MMP structure (``struct mmp_struct``) is as follows: - Name - Description * - 0x0 - - \_\_le32 - - mmp\_magic + - __le32 + - mmp_magic - Magic number for MMP, 0x004D4D50 (“MMP”). * - 0x4 - - \_\_le32 - - mmp\_seq + - __le32 + - mmp_seq - Sequence number, updated periodically. * - 0x8 - - \_\_le64 - - mmp\_time + - __le64 + - mmp_time - Time that the MMP block was last updated. * - 0x10 - char[64] - - mmp\_nodename + - mmp_nodename - Hostname of the node that opened the filesystem. * - 0x50 - char[32] - - mmp\_bdevname + - mmp_bdevname - Block device name of the filesystem. * - 0x70 - - \_\_le16 - - mmp\_check\_interval + - __le16 + - mmp_check_interval - The MMP re-check interval, in seconds. * - 0x72 - - \_\_le16 - - mmp\_pad1 + - __le16 + - mmp_pad1 - Zero. * - 0x74 - - \_\_le32[226] - - mmp\_pad2 + - __le32[226] + - mmp_pad2 - Zero. * - 0x3FC - - \_\_le32 - - mmp\_checksum + - __le32 + - mmp_checksum - Checksum of the MMP block. diff --git a/Documentation/filesystems/ext4/overview.rst b/Documentation/filesystems/ext4/overview.rst index 123ebfde47ee1d815d2a01c600466bcb1f11057c..0fad6eda6e15d1ec029154fa9aaec5a387604551 100644 --- a/Documentation/filesystems/ext4/overview.rst +++ b/Documentation/filesystems/ext4/overview.rst @@ -7,7 +7,7 @@ An ext4 file system is split into a series of block groups. To reduce performance difficulties due to fragmentation, the block allocator tries very hard to keep each file's blocks within the same group, thereby reducing seek times. The size of a block group is specified in -``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 \* +``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 * ``block_size_in_bytes``. With the default block size of 4KiB, each group will contain 32,768 blocks, for a length of 128MiB. The number of block groups is the size of the device divided by the size of a block group. diff --git a/Documentation/filesystems/ext4/special_inodes.rst b/Documentation/filesystems/ext4/special_inodes.rst index 94f304e3a0a7b69a80a69a6c4cd286871dddf613..fc0636901fa0e1ebe643a8db8d19b9c2a045cf1b 100644 --- a/Documentation/filesystems/ext4/special_inodes.rst +++ b/Documentation/filesystems/ext4/special_inodes.rst @@ -34,7 +34,7 @@ ext4 reserves some inode for special features, as follows: * - 10 - Replica inode, used for some non-upstream feature? * - 11 - - Traditional first non-reserved inode. Usually this is the lost+found directory. See s\_first\_ino in the superblock. + - Traditional first non-reserved inode. Usually this is the lost+found directory. See s_first_ino in the superblock. Note that there are also some inodes allocated from non-reserved inode numbers for other filesystem features which are not referenced from standard directory @@ -47,9 +47,9 @@ hierarchy. These are generally reference from the superblock. They are: * - Superblock field - Description - * - s\_lpf\_ino + * - s_lpf_ino - Inode number of lost+found directory. - * - s\_prj\_quota\_inum + * - s_prj_quota_inum - Inode number of quota file tracking project quotas - * - s\_orphan\_file\_inum + * - s_orphan_file_inum - Inode number of file tracking orphan inodes. diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index f6a548e957bb9283ae97ad52f0a268e5255cea54..268888522e35cbc5e39c4eecb7923d96c0ca02e0 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst @@ -7,7 +7,7 @@ The superblock records various information about the enclosing filesystem, such as block counts, inode counts, supported features, maintenance information, and more. -If the sparse\_super feature flag is set, redundant copies of the +If the sparse_super feature flag is set, redundant copies of the superblock and group descriptors are kept only in the groups whose group number is either 0 or a power of 3, 5, or 7. If the flag is not set, redundant copies are kept in all groups. @@ -27,107 +27,107 @@ The ext4 superblock is laid out as follows in - Name - Description * - 0x0 - - \_\_le32 - - s\_inodes\_count + - __le32 + - s_inodes_count - Total inode count. * - 0x4 - - \_\_le32 - - s\_blocks\_count\_lo + - __le32 + - s_blocks_count_lo - Total block count. * - 0x8 - - \_\_le32 - - s\_r\_blocks\_count\_lo + - __le32 + - s_r_blocks_count_lo - This number of blocks can only be allocated by the super-user. * - 0xC - - \_\_le32 - - s\_free\_blocks\_count\_lo + - __le32 + - s_free_blocks_count_lo - Free block count. * - 0x10 - - \_\_le32 - - s\_free\_inodes\_count + - __le32 + - s_free_inodes_count - Free inode count. * - 0x14 - - \_\_le32 - - s\_first\_data\_block + - __le32 + - s_first_data_block - First data block. This must be at least 1 for 1k-block filesystems and is typically 0 for all other block sizes. * - 0x18 - - \_\_le32 - - s\_log\_block\_size - - Block size is 2 ^ (10 + s\_log\_block\_size). + - __le32 + - s_log_block_size + - Block size is 2 ^ (10 + s_log_block_size). * - 0x1C - - \_\_le32 - - s\_log\_cluster\_size - - Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is - enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size. + - __le32 + - s_log_cluster_size + - Cluster size is 2 ^ (10 + s_log_cluster_size) blocks if bigalloc is + enabled. Otherwise s_log_cluster_size must equal s_log_block_size. * - 0x20 - - \_\_le32 - - s\_blocks\_per\_group + - __le32 + - s_blocks_per_group - Blocks per group. * - 0x24 - - \_\_le32 - - s\_clusters\_per\_group + - __le32 + - s_clusters_per_group - Clusters per group, if bigalloc is enabled. Otherwise - s\_clusters\_per\_group must equal s\_blocks\_per\_group. + s_clusters_per_group must equal s_blocks_per_group. * - 0x28 - - \_\_le32 - - s\_inodes\_per\_group + - __le32 + - s_inodes_per_group - Inodes per group. * - 0x2C - - \_\_le32 - - s\_mtime + - __le32 + - s_mtime - Mount time, in seconds since the epoch. * - 0x30 - - \_\_le32 - - s\_wtime + - __le32 + - s_wtime - Write time, in seconds since the epoch. * - 0x34 - - \_\_le16 - - s\_mnt\_count + - __le16 + - s_mnt_count - Number of mounts since the last fsck. * - 0x36 - - \_\_le16 - - s\_max\_mnt\_count + - __le16 + - s_max_mnt_count - Number of mounts beyond which a fsck is needed. * - 0x38 - - \_\_le16 - - s\_magic + - __le16 + - s_magic - Magic signature, 0xEF53 * - 0x3A - - \_\_le16 - - s\_state + - __le16 + - s_state - File system state. See super_state_ for more info. * - 0x3C - - \_\_le16 - - s\_errors + - __le16 + - s_errors - Behaviour when detecting errors. See super_errors_ for more info. * - 0x3E - - \_\_le16 - - s\_minor\_rev\_level + - __le16 + - s_minor_rev_level - Minor revision level. * - 0x40 - - \_\_le32 - - s\_lastcheck + - __le32 + - s_lastcheck - Time of last check, in seconds since the epoch. * - 0x44 - - \_\_le32 - - s\_checkinterval + - __le32 + - s_checkinterval - Maximum time between checks, in seconds. * - 0x48 - - \_\_le32 - - s\_creator\_os + - __le32 + - s_creator_os - Creator OS. See the table super_creator_ for more info. * - 0x4C - - \_\_le32 - - s\_rev\_level + - __le32 + - s_rev_level - Revision level. See the table super_revision_ for more info. * - 0x50 - - \_\_le16 - - s\_def\_resuid + - __le16 + - s_def_resuid - Default uid for reserved blocks. * - 0x52 - - \_\_le16 - - s\_def\_resgid + - __le16 + - s_def_resgid - Default gid for reserved blocks. * - - @@ -143,50 +143,50 @@ The ext4 superblock is laid out as follows in about a feature in either the compatible or incompatible feature set, it must abort and not try to meddle with things it doesn't understand... * - 0x54 - - \_\_le32 - - s\_first\_ino + - __le32 + - s_first_ino - First non-reserved inode. * - 0x58 - - \_\_le16 - - s\_inode\_size + - __le16 + - s_inode_size - Size of inode structure, in bytes. * - 0x5A - - \_\_le16 - - s\_block\_group\_nr + - __le16 + - s_block_group_nr - Block group # of this superblock. * - 0x5C - - \_\_le32 - - s\_feature\_compat + - __le32 + - s_feature_compat - Compatible feature set flags. Kernel can still read/write this fs even if it doesn't understand a flag; fsck should not do that. See the super_compat_ table for more info. * - 0x60 - - \_\_le32 - - s\_feature\_incompat + - __le32 + - s_feature_incompat - Incompatible feature set. If the kernel or fsck doesn't understand one of these bits, it should stop. See the super_incompat_ table for more info. * - 0x64 - - \_\_le32 - - s\_feature\_ro\_compat + - __le32 + - s_feature_ro_compat - Readonly-compatible feature set. If the kernel doesn't understand one of these bits, it can still mount read-only. See the super_rocompat_ table for more info. * - 0x68 - - \_\_u8 - - s\_uuid[16] + - __u8 + - s_uuid[16] - 128-bit UUID for volume. * - 0x78 - char - - s\_volume\_name[16] + - s_volume_name[16] - Volume label. * - 0x88 - char - - s\_last\_mounted[64] + - s_last_mounted[64] - Directory where filesystem was last mounted. * - 0xC8 - - \_\_le32 - - s\_algorithm\_usage\_bitmap + - __le32 + - s_algorithm_usage_bitmap - For compression (Not used in e2fsprogs/Linux) * - - @@ -194,18 +194,18 @@ The ext4 superblock is laid out as follows in - Performance hints. Directory preallocation should only happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. * - 0xCC - - \_\_u8 - - s\_prealloc\_blocks + - __u8 + - s_prealloc_blocks - #. of blocks to try to preallocate for ... files? (Not used in e2fsprogs/Linux) * - 0xCD - - \_\_u8 - - s\_prealloc\_dir\_blocks + - __u8 + - s_prealloc_dir_blocks - #. of blocks to preallocate for directories. (Not used in e2fsprogs/Linux) * - 0xCE - - \_\_le16 - - s\_reserved\_gdt\_blocks + - __le16 + - s_reserved_gdt_blocks - Number of reserved GDT entries for future filesystem expansion. * - - @@ -213,281 +213,281 @@ The ext4 superblock is laid out as follows in - Journalling support is valid only if EXT4_FEATURE_COMPAT_HAS_JOURNAL is set. * - 0xD0 - - \_\_u8 - - s\_journal\_uuid[16] + - __u8 + - s_journal_uuid[16] - UUID of journal superblock * - 0xE0 - - \_\_le32 - - s\_journal\_inum + - __le32 + - s_journal_inum - inode number of journal file. * - 0xE4 - - \_\_le32 - - s\_journal\_dev + - __le32 + - s_journal_dev - Device number of journal file, if the external journal feature flag is set. * - 0xE8 - - \_\_le32 - - s\_last\_orphan + - __le32 + - s_last_orphan - Start of list of orphaned inodes to delete. * - 0xEC - - \_\_le32 - - s\_hash\_seed[4] + - __le32 + - s_hash_seed[4] - HTREE hash seed. * - 0xFC - - \_\_u8 - - s\_def\_hash\_version + - __u8 + - s_def_hash_version - Default hash algorithm to use for directory hashes. See super_def_hash_ for more info. * - 0xFD - - \_\_u8 - - s\_jnl\_backup\_type - - If this value is 0 or EXT3\_JNL\_BACKUP\_BLOCKS (1), then the + - __u8 + - s_jnl_backup_type + - If this value is 0 or EXT3_JNL_BACKUP_BLOCKS (1), then the ``s_jnl_blocks`` field contains a duplicate copy of the inode's ``i_block[]`` array and ``i_size``. * - 0xFE - - \_\_le16 - - s\_desc\_size + - __le16 + - s_desc_size - Size of group descriptors, in bytes, if the 64bit incompat feature flag is set. * - 0x100 - - \_\_le32 - - s\_default\_mount\_opts + - __le32 + - s_default_mount_opts - Default mount options. See the super_mountopts_ table for more info. * - 0x104 - - \_\_le32 - - s\_first\_meta\_bg - - First metablock block group, if the meta\_bg feature is enabled. + - __le32 + - s_first_meta_bg + - First metablock block group, if the meta_bg feature is enabled. * - 0x108 - - \_\_le32 - - s\_mkfs\_time + - __le32 + - s_mkfs_time - When the filesystem was created, in seconds since the epoch. * - 0x10C - - \_\_le32 - - s\_jnl\_blocks[17] + - __le32 + - s_jnl_blocks[17] - Backup copy of the journal inode's ``i_block[]`` array in the first 15 - elements and i\_size\_high and i\_size in the 16th and 17th elements, + elements and i_size_high and i_size in the 16th and 17th elements, respectively. * - - - - 64bit support is valid only if EXT4_FEATURE_COMPAT_64BIT is set. * - 0x150 - - \_\_le32 - - s\_blocks\_count\_hi + - __le32 + - s_blocks_count_hi - High 32-bits of the block count. * - 0x154 - - \_\_le32 - - s\_r\_blocks\_count\_hi + - __le32 + - s_r_blocks_count_hi - High 32-bits of the reserved block count. * - 0x158 - - \_\_le32 - - s\_free\_blocks\_count\_hi + - __le32 + - s_free_blocks_count_hi - High 32-bits of the free block count. * - 0x15C - - \_\_le16 - - s\_min\_extra\_isize + - __le16 + - s_min_extra_isize - All inodes have at least # bytes. * - 0x15E - - \_\_le16 - - s\_want\_extra\_isize + - __le16 + - s_want_extra_isize - New inodes should reserve # bytes. * - 0x160 - - \_\_le32 - - s\_flags + - __le32 + - s_flags - Miscellaneous flags. See the super_flags_ table for more info. * - 0x164 - - \_\_le16 - - s\_raid\_stride + - __le16 + - s_raid_stride - RAID stride. This is the number of logical blocks read from or written to the disk before moving to the next disk. This affects the placement of filesystem metadata, which will hopefully make RAID storage faster. * - 0x166 - - \_\_le16 - - s\_mmp\_interval + - __le16 + - s_mmp_interval - #. seconds to wait in multi-mount prevention (MMP) checking. In theory, MMP is a mechanism to record in the superblock which host and device have mounted the filesystem, in order to prevent multiple mounts. This feature does not seem to be implemented... * - 0x168 - - \_\_le64 - - s\_mmp\_block + - __le64 + - s_mmp_block - Block # for multi-mount protection data. * - 0x170 - - \_\_le32 - - s\_raid\_stripe\_width + - __le32 + - s_raid_stripe_width - RAID stripe width. This is the number of logical blocks read from or written to the disk before coming back to the current disk. This is used by the block allocator to try to reduce the number of read-modify-write operations in a RAID5/6. * - 0x174 - - \_\_u8 - - s\_log\_groups\_per\_flex + - __u8 + - s_log_groups_per_flex - Size of a flexible block group is 2 ^ ``s_log_groups_per_flex``. * - 0x175 - - \_\_u8 - - s\_checksum\_type + - __u8 + - s_checksum_type - Metadata checksum algorithm type. The only valid value is 1 (crc32c). * - 0x176 - - \_\_le16 - - s\_reserved\_pad + - __le16 + - s_reserved_pad - * - 0x178 - - \_\_le64 - - s\_kbytes\_written + - __le64 + - s_kbytes_written - Number of KiB written to this filesystem over its lifetime. * - 0x180 - - \_\_le32 - - s\_snapshot\_inum + - __le32 + - s_snapshot_inum - inode number of active snapshot. (Not used in e2fsprogs/Linux.) * - 0x184 - - \_\_le32 - - s\_snapshot\_id + - __le32 + - s_snapshot_id - Sequential ID of active snapshot. (Not used in e2fsprogs/Linux.) * - 0x188 - - \_\_le64 - - s\_snapshot\_r\_blocks\_count + - __le64 + - s_snapshot_r_blocks_count - Number of blocks reserved for active snapshot's future use. (Not used in e2fsprogs/Linux.) * - 0x190 - - \_\_le32 - - s\_snapshot\_list + - __le32 + - s_snapshot_list - inode number of the head of the on-disk snapshot list. (Not used in e2fsprogs/Linux.) * - 0x194 - - \_\_le32 - - s\_error\_count + - __le32 + - s_error_count - Number of errors seen. * - 0x198 - - \_\_le32 - - s\_first\_error\_time + - __le32 + - s_first_error_time - First time an error happened, in seconds since the epoch. * - 0x19C - - \_\_le32 - - s\_first\_error\_ino + - __le32 + - s_first_error_ino - inode involved in first error. * - 0x1A0 - - \_\_le64 - - s\_first\_error\_block + - __le64 + - s_first_error_block - Number of block involved of first error. * - 0x1A8 - - \_\_u8 - - s\_first\_error\_func[32] + - __u8 + - s_first_error_func[32] - Name of function where the error happened. * - 0x1C8 - - \_\_le32 - - s\_first\_error\_line + - __le32 + - s_first_error_line - Line number where error happened. * - 0x1CC - - \_\_le32 - - s\_last\_error\_time + - __le32 + - s_last_error_time - Time of most recent error, in seconds since the epoch. * - 0x1D0 - - \_\_le32 - - s\_last\_error\_ino + - __le32 + - s_last_error_ino - inode involved in most recent error. * - 0x1D4 - - \_\_le32 - - s\_last\_error\_line + - __le32 + - s_last_error_line - Line number where most recent error happened. * - 0x1D8 - - \_\_le64 - - s\_last\_error\_block + - __le64 + - s_last_error_block - Number of block involved in most recent error. * - 0x1E0 - - \_\_u8 - - s\_last\_error\_func[32] + - __u8 + - s_last_error_func[32] - Name of function where the most recent error happened. * - 0x200 - - \_\_u8 - - s\_mount\_opts[64] + - __u8 + - s_mount_opts[64] - ASCIIZ string of mount options. * - 0x240 - - \_\_le32 - - s\_usr\_quota\_inum + - __le32 + - s_usr_quota_inum - Inode number of user `quota `__ file. * - 0x244 - - \_\_le32 - - s\_grp\_quota\_inum + - __le32 + - s_grp_quota_inum - Inode number of group `quota `__ file. * - 0x248 - - \_\_le32 - - s\_overhead\_blocks + - __le32 + - s_overhead_blocks - Overhead blocks/clusters in fs. (Huh? This field is always zero, which means that the kernel calculates it dynamically.) * - 0x24C - - \_\_le32 - - s\_backup\_bgs[2] - - Block groups containing superblock backups (if sparse\_super2) + - __le32 + - s_backup_bgs[2] + - Block groups containing superblock backups (if sparse_super2) * - 0x254 - - \_\_u8 - - s\_encrypt\_algos[4] + - __u8 + - s_encrypt_algos[4] - Encryption algorithms in use. There can be up to four algorithms in use at any time; valid algorithm codes are given in the super_encrypt_ table below. * - 0x258 - - \_\_u8 - - s\_encrypt\_pw\_salt[16] + - __u8 + - s_encrypt_pw_salt[16] - Salt for the string2key algorithm for encryption. * - 0x268 - - \_\_le32 - - s\_lpf\_ino + - __le32 + - s_lpf_ino - Inode number of lost+found * - 0x26C - - \_\_le32 - - s\_prj\_quota\_inum + - __le32 + - s_prj_quota_inum - Inode that tracks project quotas. * - 0x270 - - \_\_le32 - - s\_checksum\_seed - - Checksum seed used for metadata\_csum calculations. This value is - crc32c(~0, $orig\_fs\_uuid). + - __le32 + - s_checksum_seed + - Checksum seed used for metadata_csum calculations. This value is + crc32c(~0, $orig_fs_uuid). * - 0x274 - - \_\_u8 - - s\_wtime_hi + - __u8 + - s_wtime_hi - Upper 8 bits of the s_wtime field. * - 0x275 - - \_\_u8 - - s\_mtime_hi + - __u8 + - s_mtime_hi - Upper 8 bits of the s_mtime field. * - 0x276 - - \_\_u8 - - s\_mkfs_time_hi + - __u8 + - s_mkfs_time_hi - Upper 8 bits of the s_mkfs_time field. * - 0x277 - - \_\_u8 - - s\_lastcheck_hi + - __u8 + - s_lastcheck_hi - Upper 8 bits of the s_lastcheck_hi field. * - 0x278 - - \_\_u8 - - s\_first_error_time_hi + - __u8 + - s_first_error_time_hi - Upper 8 bits of the s_first_error_time_hi field. * - 0x279 - - \_\_u8 - - s\_last_error_time_hi + - __u8 + - s_last_error_time_hi - Upper 8 bits of the s_last_error_time_hi field. * - 0x27A - - \_\_u8 - - s\_pad[2] + - __u8 + - s_pad[2] - Zero padding. * - 0x27C - - \_\_le16 - - s\_encoding + - __le16 + - s_encoding - Filename charset encoding. * - 0x27E - - \_\_le16 - - s\_encoding_flags + - __le16 + - s_encoding_flags - Filename charset encoding flags. * - 0x280 - - \_\_le32 - - s\_orphan\_file\_inum + - __le32 + - s_orphan_file_inum - Orphan file inode number. * - 0x284 - - \_\_le32 - - s\_reserved[94] + - __le32 + - s_reserved[94] - Padding to the end of the block. * - 0x3FC - - \_\_le32 - - s\_checksum + - __le32 + - s_checksum - Superblock checksum. .. _super_state: @@ -574,44 +574,44 @@ following: * - Value - Description * - 0x1 - - Directory preallocation (COMPAT\_DIR\_PREALLOC). + - Directory preallocation (COMPAT_DIR_PREALLOC). * - 0x2 - “imagic inodes”. Not clear from the code what this does - (COMPAT\_IMAGIC\_INODES). + (COMPAT_IMAGIC_INODES). * - 0x4 - - Has a journal (COMPAT\_HAS\_JOURNAL). + - Has a journal (COMPAT_HAS_JOURNAL). * - 0x8 - - Supports extended attributes (COMPAT\_EXT\_ATTR). + - Supports extended attributes (COMPAT_EXT_ATTR). * - 0x10 - Has reserved GDT blocks for filesystem expansion - (COMPAT\_RESIZE\_INODE). Requires RO\_COMPAT\_SPARSE\_SUPER. + (COMPAT_RESIZE_INODE). Requires RO_COMPAT_SPARSE_SUPER. * - 0x20 - - Has directory indices (COMPAT\_DIR\_INDEX). + - Has directory indices (COMPAT_DIR_INDEX). * - 0x40 - “Lazy BG”. Not in Linux kernel, seems to have been for uninitialized - block groups? (COMPAT\_LAZY\_BG) + block groups? (COMPAT_LAZY_BG) * - 0x80 - - “Exclude inode”. Not used. (COMPAT\_EXCLUDE\_INODE). + - “Exclude inode”. Not used. (COMPAT_EXCLUDE_INODE). * - 0x100 - “Exclude bitmap”. Seems to be used to indicate the presence of snapshot-related exclude bitmaps? Not defined in kernel or used in - e2fsprogs (COMPAT\_EXCLUDE\_BITMAP). + e2fsprogs (COMPAT_EXCLUDE_BITMAP). * - 0x200 - - Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs + - Sparse Super Block, v2. If this flag is set, the SB field s_backup_bgs points to the two block groups that contain backup superblocks - (COMPAT\_SPARSE\_SUPER2). + (COMPAT_SPARSE_SUPER2). * - 0x400 - Fast commits supported. Although fast commits blocks are backward incompatible, fast commit blocks are not always present in the journal. If fast commit blocks are present in the journal, JBD2 incompat feature - (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets - set (COMPAT\_FAST\_COMMIT). + (JBD2_FEATURE_INCOMPAT_FAST_COMMIT) gets + set (COMPAT_FAST_COMMIT). * - 0x1000 - Orphan file allocated. This is the special file for more efficient tracking of unlinked but still open inodes. When there may be any entries in the file, we additionally set proper rocompat feature - (RO\_COMPAT\_ORPHAN\_PRESENT). + (RO_COMPAT_ORPHAN_PRESENT). .. _super_incompat: @@ -625,45 +625,45 @@ following: * - Value - Description * - 0x1 - - Compression (INCOMPAT\_COMPRESSION). + - Compression (INCOMPAT_COMPRESSION). * - 0x2 - - Directory entries record the file type. See ext4\_dir\_entry\_2 below - (INCOMPAT\_FILETYPE). + - Directory entries record the file type. See ext4_dir_entry_2 below + (INCOMPAT_FILETYPE). * - 0x4 - - Filesystem needs recovery (INCOMPAT\_RECOVER). + - Filesystem needs recovery (INCOMPAT_RECOVER). * - 0x8 - - Filesystem has a separate journal device (INCOMPAT\_JOURNAL\_DEV). + - Filesystem has a separate journal device (INCOMPAT_JOURNAL_DEV). * - 0x10 - Meta block groups. See the earlier discussion of this feature - (INCOMPAT\_META\_BG). + (INCOMPAT_META_BG). * - 0x40 - - Files in this filesystem use extents (INCOMPAT\_EXTENTS). + - Files in this filesystem use extents (INCOMPAT_EXTENTS). * - 0x80 - - Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT). + - Enable a filesystem size of 2^64 blocks (INCOMPAT_64BIT). * - 0x100 - - Multiple mount protection (INCOMPAT\_MMP). + - Multiple mount protection (INCOMPAT_MMP). * - 0x200 - Flexible block groups. See the earlier discussion of this feature - (INCOMPAT\_FLEX\_BG). + (INCOMPAT_FLEX_BG). * - 0x400 - Inodes can be used to store large extended attribute values - (INCOMPAT\_EA\_INODE). + (INCOMPAT_EA_INODE). * - 0x1000 - - Data in directory entry (INCOMPAT\_DIRDATA). (Not implemented?) + - Data in directory entry (INCOMPAT_DIRDATA). (Not implemented?) * - 0x2000 - Metadata checksum seed is stored in the superblock. This feature enables - the administrator to change the UUID of a metadata\_csum filesystem + the administrator to change the UUID of a metadata_csum filesystem while the filesystem is mounted; without it, the checksum definition - requires all metadata blocks to be rewritten (INCOMPAT\_CSUM\_SEED). + requires all metadata blocks to be rewritten (INCOMPAT_CSUM_SEED). * - 0x4000 - - Large directory >2GB or 3-level htree (INCOMPAT\_LARGEDIR). Prior to + - Large directory >2GB or 3-level htree (INCOMPAT_LARGEDIR). Prior to this feature, directories could not be larger than 4GiB and could not have an htree more than 2 levels deep. If this feature is enabled, directories can be larger than 4GiB and have a maximum htree depth of 3. * - 0x8000 - - Data in inode (INCOMPAT\_INLINE\_DATA). + - Data in inode (INCOMPAT_INLINE_DATA). * - 0x10000 - - Encrypted inodes are present on the filesystem. (INCOMPAT\_ENCRYPT). + - Encrypted inodes are present on the filesystem. (INCOMPAT_ENCRYPT). .. _super_rocompat: @@ -678,54 +678,54 @@ the following: - Description * - 0x1 - Sparse superblocks. See the earlier discussion of this feature - (RO\_COMPAT\_SPARSE\_SUPER). + (RO_COMPAT_SPARSE_SUPER). * - 0x2 - This filesystem has been used to store a file greater than 2GiB - (RO\_COMPAT\_LARGE\_FILE). + (RO_COMPAT_LARGE_FILE). * - 0x4 - - Not used in kernel or e2fsprogs (RO\_COMPAT\_BTREE\_DIR). + - Not used in kernel or e2fsprogs (RO_COMPAT_BTREE_DIR). * - 0x8 - This filesystem has files whose sizes are represented in units of logical blocks, not 512-byte sectors. This implies a very large file - indeed! (RO\_COMPAT\_HUGE\_FILE) + indeed! (RO_COMPAT_HUGE_FILE) * - 0x10 - Group descriptors have checksums. In addition to detecting corruption, this is useful for lazy formatting with uninitialized groups - (RO\_COMPAT\_GDT\_CSUM). + (RO_COMPAT_GDT_CSUM). * - 0x20 - Indicates that the old ext3 32,000 subdirectory limit no longer applies - (RO\_COMPAT\_DIR\_NLINK). A directory's i\_links\_count will be set to 1 + (RO_COMPAT_DIR_NLINK). A directory's i_links_count will be set to 1 if it is incremented past 64,999. * - 0x40 - Indicates that large inodes exist on this filesystem - (RO\_COMPAT\_EXTRA\_ISIZE). + (RO_COMPAT_EXTRA_ISIZE). * - 0x80 - - This filesystem has a snapshot (RO\_COMPAT\_HAS\_SNAPSHOT). + - This filesystem has a snapshot (RO_COMPAT_HAS_SNAPSHOT). * - 0x100 - - `Quota `__ (RO\_COMPAT\_QUOTA). + - `Quota `__ (RO_COMPAT_QUOTA). * - 0x200 - This filesystem supports “bigalloc”, which means that file extents are tracked in units of clusters (of blocks) instead of blocks - (RO\_COMPAT\_BIGALLOC). + (RO_COMPAT_BIGALLOC). * - 0x400 - This filesystem supports metadata checksumming. - (RO\_COMPAT\_METADATA\_CSUM; implies RO\_COMPAT\_GDT\_CSUM, though - GDT\_CSUM must not be set) + (RO_COMPAT_METADATA_CSUM; implies RO_COMPAT_GDT_CSUM, though + GDT_CSUM must not be set) * - 0x800 - Filesystem supports replicas. This feature is neither in the kernel nor - e2fsprogs. (RO\_COMPAT\_REPLICA) + e2fsprogs. (RO_COMPAT_REPLICA) * - 0x1000 - Read-only filesystem image; the kernel will not mount this image read-write and most tools will refuse to write to the image. - (RO\_COMPAT\_READONLY) + (RO_COMPAT_READONLY) * - 0x2000 - - Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT) + - Filesystem tracks project quotas. (RO_COMPAT_PROJECT) * - 0x8000 - - Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY) + - Verity inodes may be present on the filesystem. (RO_COMPAT_VERITY) * - 0x10000 - Indicates orphan file may have valid orphan entries and thus we need to clean them up when mounting the filesystem - (RO\_COMPAT\_ORPHAN\_PRESENT). + (RO_COMPAT_ORPHAN_PRESENT). .. _super_def_hash: @@ -761,36 +761,36 @@ The ``s_default_mount_opts`` field is any combination of the following: * - Value - Description * - 0x0001 - - Print debugging info upon (re)mount. (EXT4\_DEFM\_DEBUG) + - Print debugging info upon (re)mount. (EXT4_DEFM_DEBUG) * - 0x0002 - New files take the gid of the containing directory (instead of the fsgid - of the current process). (EXT4\_DEFM\_BSDGROUPS) + of the current process). (EXT4_DEFM_BSDGROUPS) * - 0x0004 - - Support userspace-provided extended attributes. (EXT4\_DEFM\_XATTR\_USER) + - Support userspace-provided extended attributes. (EXT4_DEFM_XATTR_USER) * - 0x0008 - - Support POSIX access control lists (ACLs). (EXT4\_DEFM\_ACL) + - Support POSIX access control lists (ACLs). (EXT4_DEFM_ACL) * - 0x0010 - - Do not support 32-bit UIDs. (EXT4\_DEFM\_UID16) + - Do not support 32-bit UIDs. (EXT4_DEFM_UID16) * - 0x0020 - All data and metadata are commited to the journal. - (EXT4\_DEFM\_JMODE\_DATA) + (EXT4_DEFM_JMODE_DATA) * - 0x0040 - All data are flushed to the disk before metadata are committed to the - journal. (EXT4\_DEFM\_JMODE\_ORDERED) + journal. (EXT4_DEFM_JMODE_ORDERED) * - 0x0060 - Data ordering is not preserved; data may be written after the metadata - has been written. (EXT4\_DEFM\_JMODE\_WBACK) + has been written. (EXT4_DEFM_JMODE_WBACK) * - 0x0100 - - Disable write flushes. (EXT4\_DEFM\_NOBARRIER) + - Disable write flushes. (EXT4_DEFM_NOBARRIER) * - 0x0200 - Track which blocks in a filesystem are metadata and therefore should not be used as data blocks. This option will be enabled by default on 3.18, - hopefully. (EXT4\_DEFM\_BLOCK\_VALIDITY) + hopefully. (EXT4_DEFM_BLOCK_VALIDITY) * - 0x0400 - Enable DISCARD support, where the storage device is told about blocks - becoming unused. (EXT4\_DEFM\_DISCARD) + becoming unused. (EXT4_DEFM_DISCARD) * - 0x0800 - - Disable delayed allocation. (EXT4\_DEFM\_NODELALLOC) + - Disable delayed allocation. (EXT4_DEFM_NODELALLOC) .. _super_flags: @@ -820,12 +820,12 @@ The ``s_encrypt_algos`` list can contain any of the following: * - Value - Description * - 0 - - Invalid algorithm (ENCRYPTION\_MODE\_INVALID). + - Invalid algorithm (ENCRYPTION_MODE_INVALID). * - 1 - - 256-bit AES in XTS mode (ENCRYPTION\_MODE\_AES\_256\_XTS). + - 256-bit AES in XTS mode (ENCRYPTION_MODE_AES_256_XTS). * - 2 - - 256-bit AES in GCM mode (ENCRYPTION\_MODE\_AES\_256\_GCM). + - 256-bit AES in GCM mode (ENCRYPTION_MODE_AES_256_GCM). * - 3 - - 256-bit AES in CBC mode (ENCRYPTION\_MODE\_AES\_256\_CBC). + - 256-bit AES in CBC mode (ENCRYPTION_MODE_AES_256_CBC). Total size of the superblock is 1024 bytes. diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 4d19b19bcc0867a834f81498f73727eac2030508..73a4176144b3b80aff4db87bd4ab3bd8e96c35c4 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -301,7 +301,7 @@ through which it can issue requests and negotiate:: void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); }; @@ -381,8 +381,10 @@ The operations are as follows: allocated/grabbed the folio to be modified to allow the filesystem to flush conflicting state before allowing it to be modified. - It should return 0 if everything is now fine, -EAGAIN if the folio should be - regrabbed and any other error code to abort the operation. + It may unlock and discard the folio it was given and set the caller's folio + pointer to NULL. It should return 0 if everything is now fine (``*foliop`` + left set) or the op should be retried (``*foliop`` cleared) and any other + error code to abort the operation. * ``done`` diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 7da6c30ed596ad72a446b5dc81b52b305b77b069..316cfd8b1891e53ddf82f9352779484b42cdc0a8 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -466,6 +466,10 @@ overlay filesystem and the value of st_ino for filesystem objects may not be persistent and could change even while the overlay filesystem is mounted, as summarized in the `Inode properties`_ table above. +4) "idmapped mounts" +When the upper or lower layers are idmapped mounts overlayfs will be mounted +without support for POSIX Access Control Lists (ACLs). This limitation will +eventually be lifted. Changes to underlying filesystems --------------------------------- diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index b854bb4131647dc1ff311dcb699fe3b5a8685a7e..6b2bac8e9ce0885fbd1b43465ef3e390456093e4 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -129,18 +129,24 @@ yet. Bug reports are always welcome at the issue tracker below! * - arm64 - Supported - ``LLVM=1`` + * - hexagon + - Maintained + - ``LLVM=1`` * - mips - Maintained - - ``CC=clang`` + - ``LLVM=1`` * - powerpc - Maintained - ``CC=clang`` * - riscv - Maintained - - ``CC=clang`` + - ``LLVM=1`` * - s390 - Maintained - ``CC=clang`` + * - um (User Mode) + - Maintained + - ``LLVM=1`` * - x86 - Supported - ``LLVM=1`` diff --git a/Documentation/livepatch/module-elf-format.rst b/Documentation/livepatch/module-elf-format.rst index dbe9b400e39fe041bd36ce038a8ed7fe58e43650..7347638895a00d475348735e503b9d0caa546def 100644 --- a/Documentation/livepatch/module-elf-format.rst +++ b/Documentation/livepatch/module-elf-format.rst @@ -210,11 +210,11 @@ module->symtab. ===================================== Normally, a stripped down copy of a module's symbol table (containing only "core" symbols) is made available through module->symtab (See layout_symtab() -in kernel/module.c). For livepatch modules, the symbol table copied into memory -on module load must be exactly the same as the symbol table produced when the -patch module was compiled. This is because the relocations in each livepatch -relocation section refer to their respective symbols with their symbol indices, -and the original symbol indices (and thus the symtab ordering) must be +in kernel/module/kallsyms.c). For livepatch modules, the symbol table copied +into memory on module load must be exactly the same as the symbol table produced +when the patch module was compiled. This is because the relocations in each +livepatch relocation section refer to their respective symbols with their symbol +indices, and the original symbol indices (and thus the symtab ordering) must be preserved in order for apply_relocate_add() to find the right symbol. For example, take this particular rela from a livepatch module::: diff --git a/Documentation/loongarch/introduction.rst b/Documentation/loongarch/introduction.rst index 2bf40ad370dfadf73a253dbbadf66bfce5c7f3d2..216b3f390e806ae4d366d43a6260031b7fca2cb4 100644 --- a/Documentation/loongarch/introduction.rst +++ b/Documentation/loongarch/introduction.rst @@ -45,10 +45,12 @@ Name Alias Usage Preserved ``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes ================= =============== =================== ============ -Note: The register ``$r21`` is reserved in the ELF psABI, but used by the Linux -kernel for storing the percpu base address. It normally has no ABI name, but is -called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1`` in some old code, -however they are deprecated aliases of ``$a0`` and ``$a1`` respectively. +.. Note:: + The register ``$r21`` is reserved in the ELF psABI, but used by the Linux + kernel for storing the percpu base address. It normally has no ABI name, + but is called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1`` + in some old code,however they are deprecated aliases of ``$a0`` and ``$a1`` + respectively. FPRs ---- @@ -69,8 +71,9 @@ Name Alias Usage Preserved ``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes ================= ================== =================== ============ -Note: You may see ``$fv0`` or ``$fv1`` in some old code, however they are deprecated -aliases of ``$fa0`` and ``$fa1`` respectively. +.. Note:: + You may see ``$fv0`` or ``$fv1`` in some old code, however they are + deprecated aliases of ``$fa0`` and ``$fa1`` respectively. VRs ---- diff --git a/Documentation/loongarch/irq-chip-model.rst b/Documentation/loongarch/irq-chip-model.rst index 8d88f7ab2e5e91e6c0f3f8030cb8a55e60384e0a..7988f41923639dd3e2fc93e1709b2a5f4abe3405 100644 --- a/Documentation/loongarch/irq-chip-model.rst +++ b/Documentation/loongarch/irq-chip-model.rst @@ -145,12 +145,16 @@ Documentation of Loongson's LS7A chipset: https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English) -Note: CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described -in Section 7.4 of "LoongArch Reference Manual, Vol 1"; LIOINTC is "Legacy I/O -Interrupts" described in Section 11.1 of "Loongson 3A5000 Processor Reference -Manual"; EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of -"Loongson 3A5000 Processor Reference Manual"; HTVECINTC is "HyperTransport -Interrupts" described in Section 14.3 of "Loongson 3A5000 Processor Reference -Manual"; PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of -"Loongson 7A1000 Bridge User Manual"; PCH-LPC is "LPC Interrupts" described in -Section 24.3 of "Loongson 7A1000 Bridge User Manual". +.. Note:: + - CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described + in Section 7.4 of "LoongArch Reference Manual, Vol 1"; + - LIOINTC is "Legacy I/OInterrupts" described in Section 11.1 of + "Loongson 3A5000 Processor Reference Manual"; + - EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of + "Loongson 3A5000 Processor Reference Manual"; + - HTVECINTC is "HyperTransport Interrupts" described in Section 14.3 of + "Loongson 3A5000 Processor Reference Manual"; + - PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of + "Loongson 7A1000 Bridge User Manual"; + - PCH-LPC is "LPC Interrupts" described in Section 24.3 of + "Loongson 7A1000 Bridge User Manual". diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 04216564a03cb5951e82a8eb96ffcbd62a76af70..b3a534ed0e7c5fa1de2f847a67c8308f8b8342cd 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1085,7 +1085,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. @@ -1179,7 +1179,7 @@ ip_autobind_reuse - BOOLEAN option should only be set by experts. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting @@ -2925,6 +2925,43 @@ plpmtud_probe_interval - INTEGER Default: 0 +reconf_enable - BOOLEAN + Enable or disable extension of Stream Reconfiguration functionality + specified in RFC6525. This extension provides the ability to "reset" + a stream, and it includes the Parameters of "Outgoing/Incoming SSN + Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams". + + - 1: Enable extension. + - 0: Disable extension. + + Default: 0 + +intl_enable - BOOLEAN + Enable or disable extension of User Message Interleaving functionality + specified in RFC8260. This extension allows the interleaving of user + messages sent on different streams. With this feature enabled, I-DATA + chunk will replace DATA chunk to carry user messages if also supported + by the peer. Note that to use this feature, one needs to set this option + to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 2 + and SCTP_INTERLEAVING_SUPPORTED to 1. + + - 1: Enable extension. + - 0: Disable extension. + + Default: 0 + +ecn_enable - BOOLEAN + Control use of Explicit Congestion Notification (ECN) by SCTP. + Like in TCP, ECN is used only when both ends of the SCTP connection + indicate support for it. This feature is useful in avoiding losses + due to congestion by allowing supporting routers to signal congestion + before having to drop packets. + + 1: Enable ecn. + 0: Disable ecn. + + Default: 1 + ``/proc/sys/net/core/*`` ======================== diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index d43da709bf40a25e64e05042d7af0c7bd3ef4d8e..704f31da51672e3ae4c057e8aeab6f225206f3a2 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -104,7 +104,7 @@ Whenever possible, use the PHY side RGMII delay for these reasons: * PHY device drivers in PHYLIB being reusable by nature, being able to configure correctly a specified delay enables more designs with similar delay - requirements to be operate correctly + requirements to be operated correctly For cases where the PHY is not capable of providing this delay, but the Ethernet MAC driver is capable of doing so, the correct phy_interface_t value diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index c456b5225d6645754355e85b967ee0ce9db1c1e7..d140070815955fc710fd2dcac854438b24c910ba 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -6,6 +6,15 @@ netdev FAQ ========== +tl;dr +----- + + - designate your patch to a tree - ``[PATCH net]`` or ``[PATCH net-next]`` + - for fixes the ``Fixes:`` tag is required, regardless of the tree + - don't post large series (> 15 patches), break them up + - don't repost your patches within one 24h period + - reverse xmas tree + What is netdev? --------------- It is a mailing list for all network-related Linux stuff. This @@ -136,6 +145,20 @@ it to the maintainer to figure out what is the most recent and current version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. +How do I divide my work into patches? +------------------------------------- + +Put yourself in the shoes of the reviewer. Each patch is read separately +and therefore should constitute a comprehensible step towards your stated +goal. + +Avoid sending series longer than 15 patches. Larger series takes longer +to review as reviewers will defer looking at it until they find a large +chunk of time. A small series can be reviewed in a short time, so Maintainers +just do it. As a result, a sequence of smaller series gets merged quicker and +with better review coverage. Re-posting large series also increases the mailing +list traffic. + I made changes to only a few patches in a patch series should I resend only those changed? ------------------------------------------------------------------------------------------ No, please resend the entire patch series and make sure you do number your @@ -183,6 +206,19 @@ it is requested that you make it look like this:: * another line of text */ +What is "reverse xmas tree"? +---------------------------- + +Netdev has a convention for ordering local variables in functions. +Order the variable declaration lines longest to shortest, e.g.:: + + struct scatterlist *sg; + struct sk_buff *skb; + int err, i; + +If there are dependencies between the variables preventing the ordering +move the initialization out of line. + I am working in existing code which uses non-standard formatting. Which formatting should I use? ------------------------------------------------------------------------------------------------ Make your code follow the most recent guidelines, so that eventually all code diff --git a/Documentation/sound/soc/dai.rst b/Documentation/sound/soc/dai.rst index 009b07e5a0f38284ff1c57fe62febf6cd99f0ffe..bf8431386d26983f20c2d8c4047eca6561258fbf 100644 --- a/Documentation/sound/soc/dai.rst +++ b/Documentation/sound/soc/dai.rst @@ -10,7 +10,7 @@ AC97 ==== AC97 is a five wire interface commonly found on many PC sound cards. It is -now also popular in many portable devices. This DAI has a reset line and time +now also popular in many portable devices. This DAI has a RESET line and time multiplexes its data on its SDATA_OUT (playback) and SDATA_IN (capture) lines. The bit clock (BCLK) is always driven by the CODEC (usually 12.288MHz) and the frame (FRAME) (usually 48kHz) is always driven by the controller. Each AC97 diff --git a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst index 42f5d04e38ec03e59aed8472f2e6419fba904c67..0f6898860d6d1ece70c9efb9e76011007c693668 100644 --- a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst +++ b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst @@ -50,9 +50,9 @@ Di conseguenza, nella tabella dei simboli del kernel ci sarà una voce rappresentata dalla struttura ``kernel_symbol`` che avrà il campo ``namespace`` (spazio dei nomi) impostato. Un simbolo esportato senza uno spazio dei nomi avrà questo campo impostato a ``NULL``. Non esiste uno spazio dei nomi -di base. Il programma ``modpost`` e il codice in kernel/module.c usano lo spazio -dei nomi, rispettivamente, durante la compilazione e durante il caricamento -di un modulo. +di base. Il programma ``modpost`` e il codice in kernel/module/main.c usano lo +spazio dei nomi, rispettivamente, durante la compilazione e durante il +caricamento di un modulo. 2.2 Usare il simbolo di preprocessore DEFAULT_SYMBOL_NAMESPACE ============================================================== diff --git a/Documentation/translations/zh_CN/core-api/kernel-api.rst b/Documentation/translations/zh_CN/core-api/kernel-api.rst index e45fe80d1cd88439b7f5f3a1780881ea2dd5199a..962d31d019d74331cda09e55e557306f0a52b0dc 100644 --- a/Documentation/translations/zh_CN/core-api/kernel-api.rst +++ b/Documentation/translations/zh_CN/core-api/kernel-api.rst @@ -224,7 +224,7 @@ kernel/kmod.c 模块接口支持 ------------ -更多信息请参考文件kernel/module.c。 +更多信息请参阅kernel/module/目录下的文件。 硬件接口 ======== diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst index 6abf7ed534ca0c4ddac05d199be922f1ed109483..bb16f0611046d3435b095ef8325bb2298291f05b 100644 --- a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst +++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst @@ -52,7 +52,7 @@ 相应的 ksymtab 条目结构体 ``kernel_symbol`` 将有相应的成员 ``命名空间`` 集。 导出时未指明命名空间的符号将指向 ``NULL`` 。如果没有定义命名空间,则默认没有。 -``modpost`` 和kernel/module.c分别在构建时或模块加载时使用名称空间。 +``modpost`` 和kernel/module/main.c分别在构建时或模块加载时使用名称空间。 2.2 使用DEFAULT_SYMBOL_NAMESPACE定义 ==================================== diff --git a/Documentation/translations/zh_CN/loongarch/introduction.rst b/Documentation/translations/zh_CN/loongarch/introduction.rst index e31a1a928c4842a4843edc9553f3cfc963125f12..11686ee0caeb1e4b7c45a5e9fdefd9c0a4018362 100644 --- a/Documentation/translations/zh_CN/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/loongarch/introduction.rst @@ -46,10 +46,11 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其 ``$r23``-``$r31`` ``$s0``-``$s8`` 静态寄存器 是 ================= =============== =================== ========== -注意:``$r21``寄存器在ELF psABI中保留未使用,但是在Linux内核用于保存每CPU -变量基地址。该寄存器没有ABI命名,不过在内核中称为``$u0``。在一些遗留代码 -中有时可能见到``$v0``和``$v1``,它们是``$a0``和``$a1``的别名,属于已经废弃 -的用法。 +.. note:: + 注意: ``$r21`` 寄存器在ELF psABI中保留未使用,但是在Linux内核用于保 + 存每CPU变量基地址。该寄存器没有ABI命名,不过在内核中称为 ``$u0`` 。在 + 一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0`` 和 + ``$a1`` 的别名,属于已经废弃的用法。 浮点寄存器 ---------- @@ -68,8 +69,9 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其 ``$f24``-``$f31`` ``$fs0``-``$fs7`` 静态寄存器 是 ================= ================== =================== ========== -注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0`` -和 ``$a1`` 的别名,属于已经废弃的用法。 +.. note:: + 注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 + ``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。 向量寄存器 diff --git a/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst index 2a4c3ad38be4a67453b34e6fdb3217a3c65c54a6..fb5d23b49ed552fb5fc4ec577c8ec2acc782f401 100644 --- a/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst +++ b/Documentation/translations/zh_CN/loongarch/irq-chip-model.rst @@ -147,9 +147,11 @@ PCH-LPC:: https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版) -注:CPUINTC即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其中断 -控制逻辑;LIOINTC即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”;EIOINTC -即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”;HTVECINTC即《龙芯3A5000 -处理器使用手册》第14.3节所描述的“HyperTransport中断”;PCH-PIC/PCH-MSI即《龙芯7A1000桥 -片用户手册》第5章所描述的“中断控制器”;PCH-LPC即《龙芯7A1000桥片用户手册》第24.3节所 -描述的“LPC中断”。 +.. note:: + - CPUINTC:即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其 + 中断控制逻辑; + - LIOINTC:即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”; + - EIOINTC:即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”; + - HTVECINTC:即《龙芯3A5000处理器使用手册》第14.3节所描述的“HyperTransport中断”; + - PCH-PIC/PCH-MSI:即《龙芯7A1000桥片用户手册》第5章所描述的“中断控制器”; + - PCH-LPC:即《龙芯7A1000桥片用户手册》第24.3节所描述的“LPC中断”。 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 11e00a46c610f913455ae86df942d1e98709a0a4..6e090fb96a0edb4d83c386b01f8a839056cc9206 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5657,6 +5657,7 @@ by a string of size ``name_size``. #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) + #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES #define KVM_STATS_BASE_SHIFT 8 @@ -5702,14 +5703,13 @@ Bits 0-3 of ``flags`` encode the type: by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``) is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity - value.) The bucket value indicates how many samples fell in the bucket's range. + value.) * ``KVM_STATS_TYPE_LOG_HIST`` The statistic is reported as a logarithmic histogram. The number of buckets is specified by the ``size`` field. The range of the first bucket is [0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF). Otherwise, The Nth bucket (1 < N < ``size``) covers - [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell - in the bucket's range. + [pow(2, N-2), pow(2, N-1)). Bits 4-7 of ``flags`` encode the unit: @@ -5724,6 +5724,15 @@ Bits 4-7 of ``flags`` encode the unit: It indicates that the statistics data is used to measure time or latency. * ``KVM_STATS_UNIT_CYCLES`` It indicates that the statistics data is used to measure CPU clock cycles. + * ``KVM_STATS_UNIT_BOOLEAN`` + It indicates that the statistic will always be either 0 or 1. Boolean + statistics of "peak" type will never go back from 1 to 0. Boolean + statistics can be linear histograms (with two buckets) but not logarithmic + histograms. + +Note that, in the case of histograms, the unit applies to the bucket +ranges, while the bucket value indicates how many samples fell in the +bucket's range. Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the unit: @@ -5746,7 +5755,7 @@ the corresponding statistics data. The ``bucket_size`` field is used as a parameter for histogram statistics data. It is only used by linear histogram statistics data, specifying the size of a -bucket. +bucket in the unit expressed by bits 4-11 of ``flags`` together with ``exponent``. The ``name`` field is the name string of the statistics data. The name string starts at the end of ``struct kvm_stats_desc``. The maximum length including diff --git a/Documentation/vm/hwpoison.rst b/Documentation/vm/hwpoison.rst index c742de1769d18d779111829ce67de517e5d3cba8..b9d5253c1305762248c35be3d3512904c99a0ff0 100644 --- a/Documentation/vm/hwpoison.rst +++ b/Documentation/vm/hwpoison.rst @@ -120,7 +120,8 @@ Testing unpoison-pfn Software-unpoison page at PFN echoed into this file. This way a page can be reused again. This only works for Linux - injected failures, not for real memory failures. + injected failures, not for real memory failures. Once any hardware + memory failure happens, this feature is disabled. Note these injection interfaces are not stable and might change between kernel versions diff --git a/MAINTAINERS b/MAINTAINERS index 1fc9ead83d2aa3e60ccc4cfa8ee16df09ef579bf..651616ed8ae25e8e790915721d86c6404cb171fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -426,7 +426,7 @@ F: drivers/acpi/*thermal* ACPI VIOT DRIVER M: Jean-Philippe Brucker L: linux-acpi@vger.kernel.org -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained F: drivers/acpi/viot.c F: include/linux/acpi_viot.h @@ -959,7 +959,7 @@ F: drivers/video/fbdev/geode/ AMD IOMMU (AMD-VI) M: Joerg Roedel R: Suravee Suthikulpanit -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/amd/ @@ -1038,6 +1038,7 @@ F: arch/arm64/boot/dts/amd/ AMD XGBE DRIVER M: Tom Lendacky +M: "Shyam Sundar S K" L: netdev@vger.kernel.org S: Supported F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi @@ -2467,6 +2468,7 @@ ARM/NXP S32G ARCHITECTURE M: Chester Lin R: Andreas Färber R: Matthias Brugger +R: NXP S32 Linux Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm64/boot/dts/freescale/s32g*.dts* @@ -2496,10 +2498,8 @@ F: drivers/power/reset/oxnas-restart.c N: oxnas ARM/PALM TREO SUPPORT -M: Tomas Cech L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained -W: http://hackndev.com +S: Orphan F: arch/arm/mach-pxa/palmtreo.* ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT @@ -2537,6 +2537,7 @@ W: http://www.armlinux.org.uk/ ARM/QUALCOMM SUPPORT M: Andy Gross M: Bjorn Andersson +R: Konrad Dybcio L: linux-arm-msm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git @@ -3614,16 +3615,18 @@ S: Maintained F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml F: drivers/iio/accel/bma400* -BPF (Safe dynamic programs and tools) +BPF [GENERAL] (Safe Dynamic Programs and Tools) M: Alexei Starovoitov M: Daniel Borkmann M: Andrii Nakryiko -R: Martin KaFai Lau -R: Song Liu +R: Martin KaFai Lau +R: Song Liu R: Yonghong Song R: John Fastabend R: KP Singh -L: netdev@vger.kernel.org +R: Stanislav Fomichev +R: Hao Luo +R: Jiri Olsa L: bpf@vger.kernel.org S: Supported W: https://bpf.io/ @@ -3655,21 +3658,17 @@ F: scripts/pahole-version.sh F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ -N: bpf -K: bpf BPF JIT for ARM M: Shubham Bansal -L: netdev@vger.kernel.org L: bpf@vger.kernel.org -S: Maintained +S: Odd Fixes F: arch/arm/net/ BPF JIT for ARM64 M: Daniel Borkmann M: Alexei Starovoitov M: Zi Shen Lim -L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported F: arch/arm64/net/ @@ -3677,29 +3676,26 @@ F: arch/arm64/net/ BPF JIT for MIPS (32-BIT AND 64-BIT) M: Johan Almbladh M: Paul Burton -L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained F: arch/mips/net/ BPF JIT for NFP NICs M: Jakub Kicinski -L: netdev@vger.kernel.org L: bpf@vger.kernel.org -S: Supported +S: Odd Fixes F: drivers/net/ethernet/netronome/nfp/bpf/ BPF JIT for POWERPC (32-BIT AND 64-BIT) M: Naveen N. Rao -L: netdev@vger.kernel.org +M: Michael Ellerman L: bpf@vger.kernel.org -S: Maintained +S: Supported F: arch/powerpc/net/ BPF JIT for RISC-V (32-bit) M: Luke Nelson M: Xi Wang -L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained F: arch/riscv/net/ @@ -3707,7 +3703,6 @@ X: arch/riscv/net/bpf_jit_comp64.c BPF JIT for RISC-V (64-bit) M: Björn Töpel -L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained F: arch/riscv/net/ @@ -3717,36 +3712,80 @@ BPF JIT for S390 M: Ilya Leoshkevich M: Heiko Carstens M: Vasily Gorbik -L: netdev@vger.kernel.org L: bpf@vger.kernel.org -S: Maintained +S: Supported F: arch/s390/net/ X: arch/s390/net/pnet.c BPF JIT for SPARC (32-BIT AND 64-BIT) M: David S. Miller -L: netdev@vger.kernel.org L: bpf@vger.kernel.org -S: Maintained +S: Odd Fixes F: arch/sparc/net/ BPF JIT for X86 32-BIT M: Wang YanQing -L: netdev@vger.kernel.org L: bpf@vger.kernel.org -S: Maintained +S: Odd Fixes F: arch/x86/net/bpf_jit_comp32.c BPF JIT for X86 64-BIT M: Alexei Starovoitov M: Daniel Borkmann -L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported F: arch/x86/net/ X: arch/x86/net/bpf_jit_comp32.c -BPF LSM (Security Audit and Enforcement using BPF) +BPF [CORE] +M: Alexei Starovoitov +M: Daniel Borkmann +R: John Fastabend +L: bpf@vger.kernel.org +S: Maintained +F: kernel/bpf/verifier.c +F: kernel/bpf/tnum.c +F: kernel/bpf/core.c +F: kernel/bpf/syscall.c +F: kernel/bpf/dispatcher.c +F: kernel/bpf/trampoline.c +F: include/linux/bpf* +F: include/linux/filter.h + +BPF [BTF] +M: Martin KaFai Lau +L: bpf@vger.kernel.org +S: Maintained +F: kernel/bpf/btf.c +F: include/linux/btf* + +BPF [TRACING] +M: Song Liu +R: Jiri Olsa +L: bpf@vger.kernel.org +S: Maintained +F: kernel/trace/bpf_trace.c +F: kernel/bpf/stackmap.c + +BPF [NETWORKING] (tc BPF, sock_addr) +M: Martin KaFai Lau +M: Daniel Borkmann +R: John Fastabend +L: bpf@vger.kernel.org +L: netdev@vger.kernel.org +S: Maintained +F: net/core/filter.c +F: net/sched/act_bpf.c +F: net/sched/cls_bpf.c + +BPF [NETWORKING] (struct_ops, reuseport) +M: Martin KaFai Lau +L: bpf@vger.kernel.org +L: netdev@vger.kernel.org +S: Maintained +F: kernel/bpf/bpf_struct* + +BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF) M: KP Singh R: Florent Revest R: Brendan Jackman @@ -3757,13 +3796,64 @@ F: include/linux/bpf_lsm.h F: kernel/bpf/bpf_lsm.c F: security/bpf/ -BPFTOOL +BPF [STORAGE & CGROUPS] +M: Martin KaFai Lau +L: bpf@vger.kernel.org +S: Maintained +F: kernel/bpf/cgroup.c +F: kernel/bpf/*storage.c +F: kernel/bpf/bpf_lru* + +BPF [RINGBUF] +M: Andrii Nakryiko +L: bpf@vger.kernel.org +S: Maintained +F: kernel/bpf/ringbuf.c + +BPF [ITERATOR] +M: Yonghong Song +L: bpf@vger.kernel.org +S: Maintained +F: kernel/bpf/*iter.c + +BPF [L7 FRAMEWORK] (sockmap) +M: John Fastabend +M: Jakub Sitnicki +L: netdev@vger.kernel.org +L: bpf@vger.kernel.org +S: Maintained +F: include/linux/skmsg.h +F: net/core/skmsg.c +F: net/core/sock_map.c +F: net/ipv4/tcp_bpf.c +F: net/ipv4/udp_bpf.c +F: net/unix/unix_bpf.c + +BPF [LIBRARY] (libbpf) +M: Andrii Nakryiko +L: bpf@vger.kernel.org +S: Maintained +F: tools/lib/bpf/ + +BPF [TOOLING] (bpftool) M: Quentin Monnet L: bpf@vger.kernel.org S: Maintained F: kernel/bpf/disasm.* F: tools/bpf/bpftool/ +BPF [SELFTESTS] (Test Runners & Infrastructure) +M: Andrii Nakryiko +R: Mykola Lysenko +L: bpf@vger.kernel.org +S: Maintained +F: tools/testing/selftests/bpf/ + +BPF [MISC] +L: bpf@vger.kernel.org +S: Odd Fixes +K: (?:\b|_)bpf(?:\b|_) + BROADCOM B44 10/100 ETHERNET DRIVER M: Michael Chan L: netdev@vger.kernel.org @@ -3796,12 +3886,12 @@ N: bcmbca N: bcm[9]?47622 BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE -M: Nicolas Saenz Julienne +M: Florian Fainelli R: Broadcom internal kernel review list L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git +T: git git://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: drivers/pci/controller/pcie-brcmstb.c F: drivers/staging/vc04_services @@ -4959,6 +5049,7 @@ Q: http://patchwork.kernel.org/project/linux-clk/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git F: Documentation/devicetree/bindings/clock/ F: drivers/clk/ +F: include/dt-bindings/clock/ F: include/linux/clk-pr* F: include/linux/clk/ F: include/linux/of_clk.h @@ -5009,7 +5100,7 @@ COMPUTE EXPRESS LINK (CXL) M: Alison Schofield M: Vishal Verma M: Ira Weiny -M: Ben Widawsky +M: Ben Widawsky M: Dan Williams L: linux-cxl@vger.kernel.org S: Maintained @@ -5961,7 +6052,7 @@ DMA MAPPING HELPERS M: Christoph Hellwig M: Marek Szyprowski R: Robin Murphy -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -5973,7 +6064,7 @@ F: kernel/dma/ DMA MAPPING BENCHMARK M: Xiang Chen -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ @@ -7557,7 +7648,7 @@ F: drivers/gpu/drm/exynos/exynos_dp* EXYNOS SYSMMU (IOMMU) driver M: Marek Szyprowski -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained F: drivers/iommu/exynos-iommu.c @@ -8479,6 +8570,7 @@ F: Documentation/devicetree/bindings/gpio/ F: Documentation/driver-api/gpio/ F: drivers/gpio/ F: include/asm-generic/gpio.h +F: include/dt-bindings/gpio/ F: include/linux/gpio.h F: include/linux/gpio/ F: include/linux/of_gpio.h @@ -9132,6 +9224,7 @@ F: drivers/media/platform/st/sti/hva HWPOISON MEMORY FAILURE HANDLING M: Naoya Horiguchi +R: Miaohe Lin L: linux-mm@kvack.org S: Maintained F: mm/hwpoison-inject.c @@ -9276,6 +9369,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git F: Documentation/devicetree/bindings/i2c/i2c.txt F: Documentation/i2c/ F: drivers/i2c/* +F: include/dt-bindings/i2c/i2c.h F: include/linux/i2c-dev.h F: include/linux/i2c-smbus.h F: include/linux/i2c.h @@ -9291,6 +9385,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git F: Documentation/devicetree/bindings/i2c/ F: drivers/i2c/algos/ F: drivers/i2c/busses/ +F: include/dt-bindings/i2c/ I2C-TAOS-EVM DRIVER M: Jean Delvare @@ -9811,7 +9906,10 @@ INTEL ASoC DRIVERS M: Cezary Rojewski M: Pierre-Louis Bossart M: Liam Girdwood -M: Jie Yang +M: Peter Ujfalusi +M: Bard Liao +M: Ranjani Sridharan +M: Kai Vehmanen L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/intel/ @@ -9974,7 +10072,7 @@ F: drivers/hid/intel-ish-hid/ INTEL IOMMU (VT-d) M: David Woodhouse M: Lu Baolu -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ @@ -10353,7 +10451,7 @@ F: include/linux/iomap.h IOMMU DRIVERS M: Joerg Roedel M: Will Deacon -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: Documentation/devicetree/bindings/iommu/ @@ -10830,6 +10928,7 @@ M: Marc Zyngier R: James Morse R: Alexandru Elisei R: Suzuki K Poulose +R: Oliver Upton L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers) S: Maintained @@ -10872,7 +10971,6 @@ F: arch/riscv/include/asm/kvm* F: arch/riscv/include/uapi/asm/kvm* F: arch/riscv/kvm/ F: tools/testing/selftests/kvm/*/riscv/ -F: tools/testing/selftests/kvm/riscv/ KERNEL VIRTUAL MACHINE for s390 (KVM/s390) M: Christian Borntraeger @@ -10897,28 +10995,51 @@ F: tools/testing/selftests/kvm/*/s390x/ F: tools/testing/selftests/kvm/s390x/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) +M: Sean Christopherson M: Paolo Bonzini -R: Sean Christopherson -R: Vitaly Kuznetsov -R: Wanpeng Li -R: Jim Mattson -R: Joerg Roedel L: kvm@vger.kernel.org S: Supported -W: http://www.linux-kvm.org T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git F: arch/x86/include/asm/kvm* -F: arch/x86/include/asm/pvclock-abi.h F: arch/x86/include/asm/svm.h F: arch/x86/include/asm/vmx*.h F: arch/x86/include/uapi/asm/kvm* F: arch/x86/include/uapi/asm/svm.h F: arch/x86/include/uapi/asm/vmx.h -F: arch/x86/kernel/kvm.c -F: arch/x86/kernel/kvmclock.c F: arch/x86/kvm/ F: arch/x86/kvm/*/ +KVM PARAVIRT (KVM/paravirt) +M: Paolo Bonzini +R: Wanpeng Li +R: Vitaly Kuznetsov +L: kvm@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: arch/x86/kernel/kvm.c +F: arch/x86/kernel/kvmclock.c +F: arch/x86/include/asm/pvclock-abi.h +F: include/linux/kvm_para.h +F: include/uapi/linux/kvm_para.h +F: include/uapi/asm-generic/kvm_para.h +F: include/asm-generic/kvm_para.h +F: arch/um/include/asm/kvm_para.h +F: arch/x86/include/asm/kvm_para.h +F: arch/x86/include/uapi/asm/kvm_para.h + +KVM X86 HYPER-V (KVM/hyper-v) +M: Vitaly Kuznetsov +M: Sean Christopherson +M: Paolo Bonzini +L: kvm@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: arch/x86/kvm/hyperv.* +F: arch/x86/kvm/kvm_onhyperv.* +F: arch/x86/kvm/svm/hyperv.* +F: arch/x86/kvm/svm/svm_onhyperv.* +F: arch/x86/kvm/vmx/evmcs.* + KERNFS M: Greg Kroah-Hartman M: Tejun Heo @@ -11097,20 +11218,6 @@ S: Maintained F: include/net/l3mdev.h F: net/l3mdev -L7 BPF FRAMEWORK -M: John Fastabend -M: Daniel Borkmann -M: Jakub Sitnicki -L: netdev@vger.kernel.org -L: bpf@vger.kernel.org -S: Maintained -F: include/linux/skmsg.h -F: net/core/skmsg.c -F: net/core/sock_map.c -F: net/ipv4/tcp_bpf.c -F: net/ipv4/udp_bpf.c -F: net/unix/unix_bpf.c - LANDLOCK SECURITY MODULE M: Mickaël Salaün L: linux-security-module@vger.kernel.org @@ -11590,6 +11697,7 @@ F: drivers/gpu/drm/bridge/lontium-lt8912b.c LOONGARCH M: Huacai Chen R: WANG Xuerui +L: loongarch@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git F: arch/loongarch/ @@ -12502,7 +12610,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c MEDIATEK IOMMU DRIVER M: Yong Wu -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/iommu/mediatek* @@ -12845,9 +12953,8 @@ M: Andrew Morton L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org -T: quilt https://ozlabs.org/~akpm/mmotm/ -T: quilt https://ozlabs.org/~akpm/mmots/ -T: git git://github.com/hnaz/linux-mm.git +T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new F: include/linux/gfp.h F: include/linux/memory_hotplug.h F: include/linux/mm.h @@ -12857,6 +12964,18 @@ F: include/linux/vmalloc.h F: mm/ F: tools/testing/selftests/vm/ +MEMORY HOT(UN)PLUG +M: David Hildenbrand +M: Oscar Salvador +L: linux-mm@kvack.org +S: Maintained +F: Documentation/admin-guide/mm/memory-hotplug.rst +F: Documentation/core-api/memory-hotplug.rst +F: drivers/base/memory.c +F: include/linux/memory_hotplug.h +F: mm/memory_hotplug.c +F: tools/testing/selftests/memory-hotplug/ + MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal M: Richard Weinberger @@ -13801,6 +13920,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/devicetree/bindings/net/ F: drivers/connector/ F: drivers/net/ +F: include/dt-bindings/net/ F: include/linux/etherdevice.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h @@ -13952,7 +14072,6 @@ F: net/ipv6/tcp*.c NETWORKING [TLS] M: Boris Pismenny M: John Fastabend -M: Daniel Borkmann M: Jakub Kicinski L: netdev@vger.kernel.org S: Maintained @@ -14243,7 +14362,8 @@ S: Maintained F: drivers/net/phy/nxp-c45-tja11xx.c NXP FSPI DRIVER -M: Ashish Kumar +M: Han Xu +M: Haibo Chen R: Yogesh Gaur L: linux-spi@vger.kernel.org S: Maintained @@ -14261,7 +14381,7 @@ F: drivers/iio/gyro/fxas21002c_i2c.c F: drivers/iio/gyro/fxas21002c_spi.c NXP i.MX CLOCK DRIVERS -M: Abel Vesa +M: Abel Vesa L: linux-clk@vger.kernel.org L: linux-imx@nxp.com S: Maintained @@ -14349,9 +14469,8 @@ F: Documentation/devicetree/bindings/sound/nxp,tfa989x.yaml F: sound/soc/codecs/tfa989x.c NXP-NCI NFC DRIVER -R: Charles Gorand L: linux-nfc@lists.01.org (subscribers-only) -S: Supported +S: Orphan F: Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml F: drivers/nfc/nxp-nci @@ -14869,6 +14988,7 @@ F: include/dt-bindings/ OPENCOMPUTE PTP CLOCK DRIVER M: Jonathan Lemon +M: Vadim Fedorenko L: netdev@vger.kernel.org S: Maintained F: drivers/ptp/ptp_ocp.c @@ -15739,7 +15859,7 @@ F: drivers/pinctrl/freescale/ PIN CONTROLLER - INTEL M: Mika Westerberg M: Andy Shevchenko -S: Maintained +S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git F: drivers/pinctrl/intel/ @@ -16261,7 +16381,7 @@ F: drivers/crypto/qat/ QCOM AUDIO (ASoC) DRIVERS M: Srinivas Kandagatla -M: Banajit Goswami +M: Banajit Goswami L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/codecs/lpass-va-macro.c @@ -16488,7 +16608,7 @@ F: Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml F: drivers/cpufreq/qcom-cpufreq-nvmem.c QUALCOMM CRYPTO DRIVERS -M: Thara Gopinath +M: Thara Gopinath L: linux-crypto@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained @@ -16542,7 +16662,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c QUALCOMM IOMMU M: Rob Clark -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev L: linux-arm-msm@vger.kernel.org S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -16598,7 +16718,7 @@ F: include/linux/if_rmnet.h QUALCOMM TSENS THERMAL DRIVER M: Amit Kucheria -M: Thara Gopinath +M: Thara Gopinath L: linux-pm@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained @@ -17153,12 +17273,15 @@ N: riscv K: riscv RISC-V/MICROCHIP POLARFIRE SOC SUPPORT -M: Lewis Hanly M: Conor Dooley +M: Daire McNamara L: linux-riscv@lists.infradead.org S: Supported F: arch/riscv/boot/dts/microchip/ +F: drivers/char/hw_random/mpfs-rng.c +F: drivers/clk/microchip/clk-mpfs.c F: drivers/mailbox/mailbox-mpfs.c +F: drivers/pci/controller/pcie-microchip-host.c F: drivers/soc/microchip/ F: include/soc/microchip/mpfs.h @@ -18055,6 +18178,7 @@ F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS M: Karsten Graul +M: Wenjia Zhang L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ @@ -18687,8 +18811,10 @@ F: sound/soc/ SOUND - SOUND OPEN FIRMWARE (SOF) DRIVERS M: Pierre-Louis Bossart M: Liam Girdwood +M: Peter Ujfalusi +M: Bard Liao M: Ranjani Sridharan -M: Kai Vehmanen +R: Kai Vehmanen M: Daniel Baluta L: sound-open-firmware@alsa-project.org (moderated for non-subscribers) S: Supported @@ -19167,7 +19293,7 @@ F: arch/x86/boot/video* SWIOTLB SUBSYSTEM M: Christoph Hellwig -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -19305,7 +19431,7 @@ R: Andy Shevchenko R: Mika Westerberg R: Jan Dabros L: linux-i2c@vger.kernel.org -S: Maintained +S: Supported F: drivers/i2c/busses/i2c-designware-* SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER @@ -20712,6 +20838,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git F: Documentation/devicetree/bindings/usb/ F: Documentation/usb/ F: drivers/usb/ +F: include/dt-bindings/usb/ F: include/linux/usb.h F: include/linux/usb/ @@ -21841,7 +21968,7 @@ XEN SWIOTLB SUBSYSTEM M: Juergen Gross M: Stefano Stabellini L: xen-devel@lists.xenproject.org (moderated for non-subscribers) -L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* diff --git a/Makefile b/Makefile index 1a6678d817bd1ae70586cb4c1e70e9074d5f342f..00fd80c5dd6e65bd332632dbc1ebb3c35015129f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc7 NAME = Superb Owl # *DOCUMENTATION* @@ -1141,7 +1141,7 @@ KBUILD_MODULES := 1 autoksyms_recursive: descend modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ - "$(MAKE) -f $(srctree)/Makefile vmlinux" + "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive" endif autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 184899808ee73795a6081e31370a4eb6c3d829cb..5112f493f49462eac8ba7f2cee38deab059a2b93 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1586,7 +1586,6 @@ dtb-$(CONFIG_ARCH_ASPEED) += \ aspeed-bmc-lenovo-hr630.dtb \ aspeed-bmc-lenovo-hr855xg2.dtb \ aspeed-bmc-microsoft-olympus.dtb \ - aspeed-bmc-nuvia-dc-scm.dtb \ aspeed-bmc-opp-lanyang.dtb \ aspeed-bmc-opp-mihawk.dtb \ aspeed-bmc-opp-mowgli.dtb \ @@ -1599,6 +1598,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \ aspeed-bmc-opp-witherspoon.dtb \ aspeed-bmc-opp-zaius.dtb \ aspeed-bmc-portwell-neptune.dtb \ + aspeed-bmc-qcom-dc-scm-v1.dtb \ aspeed-bmc-quanta-q71l.dtb \ aspeed-bmc-quanta-s6q.dtb \ aspeed-bmc-supermicro-x11spi.dtb \ diff --git a/arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts b/arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts similarity index 97% rename from arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts rename to arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts index f4a97cfb0f238ccb6d65b64a0254a5f6ca2439b4..259ef3f54c5ccde976088f7a117f96d5a3494bb4 100644 --- a/arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts +++ b/arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts @@ -6,8 +6,8 @@ #include "aspeed-g6.dtsi" / { - model = "Nuvia DC-SCM BMC"; - compatible = "nuvia,dc-scm-bmc", "aspeed,ast2600"; + model = "Qualcomm DC-SCM V1 BMC"; + compatible = "qcom,dc-scm-v1-bmc", "aspeed,ast2600"; aliases { serial4 = &uart5; diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts index 7719ea3d4933c76cd121087fbd22a972c483a7c5..81ccb0636a009c5c340e597cb925f6d0a137fdf3 100644 --- a/arch/arm/boot/dts/at91-sam9x60ek.dts +++ b/arch/arm/boot/dts/at91-sam9x60ek.dts @@ -233,10 +233,9 @@ status = "okay"; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; - size = <128>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts index 806eb1d911d7ce29579048ba1e65dd2ffaa984c1..164201a8fbf2d876e11758d07eaed3829cd9d25d 100644 --- a/arch/arm/boot/dts/at91-sama5d2_icp.dts +++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts @@ -329,21 +329,21 @@ status = "okay"; eeprom@50 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x50>; pagesize = <16>; status = "okay"; }; eeprom@52 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x52>; pagesize = <16>; status = "disabled"; }; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; status = "disabled"; diff --git a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts index 443e8b02289773630ac9b53d35512b18047ed835..14af1fd6d247d42ff3729baa67c19c7503e912e9 100644 --- a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts +++ b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts @@ -120,26 +120,31 @@ port@0 { reg = <0>; label = "lan1"; + phy-mode = "internal"; }; port@1 { reg = <1>; label = "lan2"; + phy-mode = "internal"; }; port@2 { reg = <2>; label = "lan3"; + phy-mode = "internal"; }; port@3 { reg = <3>; label = "lan4"; + phy-mode = "internal"; }; port@4 { reg = <4>; label = "lan5"; + phy-mode = "internal"; }; port@5 { diff --git a/arch/arm/boot/dts/bcm2711-rpi-400.dts b/arch/arm/boot/dts/bcm2711-rpi-400.dts index f4d2fc20397c7018372ae360d0c2b0591501d33a..c53d9eb0b80270dbcb024db02281043e3d5e6720 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-400.dts @@ -28,12 +28,12 @@ &expgpio { gpio-line-names = "BT_ON", "WL_ON", - "", + "PWR_LED_OFF", "GLOBAL_RESET", "VDD_SD_IO_SEL", - "CAM_GPIO", + "GLOBAL_SHUTDOWN", "SD_PWR_ON", - "SD_OC_N"; + "SHUTDOWN_REQUEST"; }; &genet_mdio { diff --git a/arch/arm/boot/dts/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/imx6qdl-colibri.dtsi index c383e0e4110c897b8f0854fd9397f0e92669e72e..7df270cea2928ca18a1ffc324b30bab77c090769 100644 --- a/arch/arm/boot/dts/imx6qdl-colibri.dtsi +++ b/arch/arm/boot/dts/imx6qdl-colibri.dtsi @@ -593,7 +593,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_atmel_conn>; reg = <0x4a>; - reset-gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>; /* SODIMM 106 */ + reset-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; /* SODIMM 106 */ status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index fded07f370b390b292466ec67477819ccaaba7cb..d6ba4b2a60f6f53f1e4f6a64eb783de13eae7a4f 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index d27beb47f9a3bc6fb8d60e356feb45bbbb669040..652feff334966be07cbce7b0149705db989295e1 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -762,7 +762,7 @@ regulator-name = "vddpu"; regulator-min-microvolt = <725000>; regulator-max-microvolt = <1450000>; - regulator-enable-ramp-delay = <150>; + regulator-enable-ramp-delay = <380>; anatop-reg-offset = <0x140>; anatop-vol-bit-shift = <9>; anatop-vol-bit-width = <5>; diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi index 15621e03fa4d46f6f269744d78cb691ffcc216e3..2c3ae715c683d68c7920e78d65f96ac4b8f660d9 100644 --- a/arch/arm/boot/dts/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi @@ -166,7 +166,7 @@ atmel_mxt_ts: touchscreen@4a { compatible = "atmel,maxtouch"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_atmel_conn>; + pinctrl-0 = <&pinctrl_atmel_conn &pinctrl_atmel_snvs_conn>; reg = <0x4a>; interrupt-parent = <&gpio5>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 107 / INT */ @@ -331,7 +331,6 @@ pinctrl_atmel_conn: atmelconngrp { fsl,pins = < MX6UL_PAD_JTAG_MOD__GPIO1_IO10 0xb0a0 /* SODIMM 106 */ - MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */ >; }; @@ -684,6 +683,12 @@ }; &iomuxc_snvs { + pinctrl_atmel_snvs_conn: atmelsnvsconngrp { + fsl,pins = < + MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */ + >; + }; + pinctrl_snvs_gpio1: snvsgpio1grp { fsl,pins = < MX6ULL_PAD_SNVS_TAMPER6__GPIO5_IO06 0x110a0 /* SODIMM 93 */ diff --git a/arch/arm/boot/dts/imx7d-smegw01.dts b/arch/arm/boot/dts/imx7d-smegw01.dts index c6b32064a0096a3b95c353e67fd7cf750a5f79c2..21b509c43393e7e0e766b2b652314bedf0a7e5a3 100644 --- a/arch/arm/boot/dts/imx7d-smegw01.dts +++ b/arch/arm/boot/dts/imx7d-smegw01.dts @@ -216,10 +216,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; + no-1-8-v; non-removable; - cap-sd-highspeed; - sd-uhs-ddr50; - mmc-ddr-1_8v; vmmc-supply = <®_wifi>; enable-sdio-wakeup; status = "okay"; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 008e3da460f1b288b1a20a62fc7b15b0b45593d7..039eed79d2e730c5c0c4ec9aca844b9b852e5621 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -120,6 +120,7 @@ compatible = "usb-nop-xceiv"; clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>; clock-names = "main_clk"; + power-domains = <&pgc_hsic_phy>; #phy-cells = <0>; }; @@ -1153,7 +1154,6 @@ compatible = "fsl,imx7d-usb", "fsl,imx27-usb"; reg = <0x30b30000 0x200>; interrupts = ; - power-domains = <&pgc_hsic_phy>; clocks = <&clks IMX7D_USB_CTRL_CLK>; fsl,usbphy = <&usbphynop3>; fsl,usbmisc = <&usbmisc3 0>; diff --git a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi index 4cab1b3b3b29a5dd0e1ead930e9d2a1507e4a4e1..725dcf707b315c04208c21ea1639dbe6aeec56b8 100644 --- a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi +++ b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi @@ -87,22 +87,22 @@ phy4: ethernet-phy@5 { reg = <5>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy5: ethernet-phy@6 { reg = <6>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy6: ethernet-phy@7 { reg = <7>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy7: ethernet-phy@8 { reg = <8>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 814ad0b46232654da103923b0eaf43bae0d694be..c3b8a6d6302753d4b233cb9495bddd54208e9c74 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -506,6 +506,8 @@ interrupts = ; clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default"; + pinctrl-0 = <&blsp1_uart2_default>; status = "disabled"; }; @@ -581,6 +583,9 @@ interrupts = ; clocks = <&gcc GCC_BLSP2_UART1_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&blsp2_uart1_default>; + pinctrl-1 = <&blsp2_uart1_sleep>; status = "disabled"; }; @@ -599,6 +604,8 @@ interrupts = ; clocks = <&gcc GCC_BLSP2_UART4_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default"; + pinctrl-0 = <&blsp2_uart4_default>; status = "disabled"; }; @@ -639,6 +646,9 @@ interrupts = <0 106 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP2_QUP6_I2C_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&blsp2_i2c6_default>; + pinctrl-1 = <&blsp2_i2c6_sleep>; #address-cells = <1>; #size-cells = <0>; }; @@ -1256,7 +1266,7 @@ }; }; - blsp1_uart2_active: blsp1-uart2-active { + blsp1_uart2_default: blsp1-uart2-default { rx { pins = "gpio5"; function = "blsp_uart2"; @@ -1272,7 +1282,7 @@ }; }; - blsp2_uart1_active: blsp2-uart1-active { + blsp2_uart1_default: blsp2-uart1-default { tx-rts { pins = "gpio41", "gpio44"; function = "blsp_uart7"; @@ -1295,7 +1305,7 @@ bias-pull-down; }; - blsp2_uart4_active: blsp2-uart4-active { + blsp2_uart4_default: blsp2-uart4-default { tx-rts { pins = "gpio53", "gpio56"; function = "blsp_uart10"; @@ -1406,7 +1416,19 @@ bias-pull-up; }; - /* BLSP2_I2C6 info is missing - nobody uses it though? */ + blsp2_i2c6_default: blsp2-i2c6-default { + pins = "gpio87", "gpio88"; + function = "blsp_i2c12"; + drive-strength = <2>; + bias-disable; + }; + + blsp2_i2c6_sleep: blsp2-i2c6-sleep { + pins = "gpio87", "gpio88"; + function = "blsp_i2c12"; + drive-strength = <2>; + bias-pull-up; + }; spi8_default: spi8_default { mosi { diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 89c71d419f82ab78d532577352a2345277f4e0f5..659a17fc755cf234206670512de09c3cd876732d 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1124,7 +1124,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/stm32mp15-scmi.dtsi b/arch/arm/boot/dts/stm32mp15-scmi.dtsi new file mode 100644 index 0000000000000000000000000000000000000000..543f24c2f4f6bccceed836886eca70863e9eddd6 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp15-scmi.dtsi @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2022 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ + +/ { + firmware { + optee: optee { + compatible = "linaro,optee-tz"; + method = "smc"; + }; + + scmi: scmi { + compatible = "linaro,scmi-optee"; + #address-cells = <1>; + #size-cells = <0>; + linaro,optee-channel-id = <0>; + shmem = <&scmi_shm>; + + scmi_clk: protocol@14 { + reg = <0x14>; + #clock-cells = <1>; + }; + + scmi_reset: protocol@16 { + reg = <0x16>; + #reset-cells = <1>; + }; + + scmi_voltd: protocol@17 { + reg = <0x17>; + + scmi_reguls: regulators { + #address-cells = <1>; + #size-cells = <0>; + + scmi_reg11: reg11@0 { + reg = <0>; + regulator-name = "reg11"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + }; + + scmi_reg18: reg18@1 { + voltd-name = "reg18"; + reg = <1>; + regulator-name = "reg18"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + scmi_usb33: usb33@2 { + reg = <2>; + regulator-name = "usb33"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + }; + }; + }; + }; + + soc { + scmi_sram: sram@2ffff000 { + compatible = "mmio-sram"; + reg = <0x2ffff000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x2ffff000 0x1000>; + + scmi_shm: scmi-sram@0 { + compatible = "arm,scmi-shmem"; + reg = <0 0x80>; + }; + }; + }; +}; + +®11 { + status = "disabled"; +}; + +®18 { + status = "disabled"; +}; + +&usb33 { + status = "disabled"; +}; + +&usbotg_hs { + usb33d-supply = <&scmi_usb33>; +}; + +&usbphyc { + vdda1v1-supply = <&scmi_reg11>; + vdda1v8-supply = <&scmi_reg18>; +}; + +/delete-node/ &clk_hse; +/delete-node/ &clk_hsi; +/delete-node/ &clk_lse; +/delete-node/ &clk_lsi; +/delete-node/ &clk_csi; diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index 1b2fd3426a810933893fadef533a078f294086bf..e04dda5ddd954e4246cae0b5a78b3d82bc98daa9 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -115,33 +115,6 @@ status = "disabled"; }; - firmware { - optee: optee { - compatible = "linaro,optee-tz"; - method = "smc"; - status = "disabled"; - }; - - scmi: scmi { - compatible = "linaro,scmi-optee"; - #address-cells = <1>; - #size-cells = <0>; - linaro,optee-channel-id = <0>; - shmem = <&scmi_shm>; - status = "disabled"; - - scmi_clk: protocol@14 { - reg = <0x14>; - #clock-cells = <1>; - }; - - scmi_reset: protocol@16 { - reg = <0x16>; - #reset-cells = <1>; - }; - }; - }; - soc { compatible = "simple-bus"; #address-cells = <1>; @@ -149,20 +122,6 @@ interrupt-parent = <&intc>; ranges; - scmi_sram: sram@2ffff000 { - compatible = "mmio-sram"; - reg = <0x2ffff000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0x2ffff000 0x1000>; - - scmi_shm: scmi-sram@0 { - compatible = "arm,scmi-shmem"; - reg = <0 0x80>; - status = "disabled"; - }; - }; - timers2: timer@40000000 { #address-cells = <1>; #size-cells = <0>; @@ -606,7 +565,7 @@ compatible = "st,stm32-cec"; reg = <0x40016000 0x400>; interrupts = ; - clocks = <&rcc CEC_K>, <&clk_lse>; + clocks = <&rcc CEC_K>, <&rcc CEC>; clock-names = "cec", "hdmi-cec"; status = "disabled"; }; @@ -1515,7 +1474,7 @@ usbh_ohci: usb@5800c000 { compatible = "generic-ohci"; reg = <0x5800c000 0x1000>; - clocks = <&rcc USBH>, <&usbphyc>; + clocks = <&usbphyc>, <&rcc USBH>; resets = <&rcc USBH_R>; interrupts = ; status = "disabled"; @@ -1524,7 +1483,7 @@ usbh_ehci: usb@5800d000 { compatible = "generic-ehci"; reg = <0x5800d000 0x1000>; - clocks = <&rcc USBH>; + clocks = <&usbphyc>, <&rcc USBH>; resets = <&rcc USBH_R>; interrupts = ; companion = <&usbh_ohci>; diff --git a/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts b/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts index e3d3f3f30c7dae4a1eb91998e27dc07df41e2aea..e539cc80bef8108cb4d55c3b4130958860c4e072 100644 --- a/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157a-dk1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157A-DK1 SCMI Discovery Board"; @@ -28,6 +29,10 @@ clocks = <&scmi_clk CK_SCMI_MPU>; }; +&dsi { + clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; +}; + &gpioz { clocks = <&scmi_clk CK_SCMI_GPIOZ>; }; @@ -54,10 +59,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -76,11 +77,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts b/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts index 45dcd299aa9e7e92854dd91aad6fbb20926a063d..97e4f94b0a24eeaedc34af98cccfbb76f7877152 100644 --- a/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-dk2.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-DK2 SCMI Discovery Board"; @@ -34,6 +35,7 @@ }; &dsi { + phy-dsi-supply = <&scmi_reg18>; clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; }; @@ -63,10 +65,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -85,11 +83,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts b/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts index 458e0ca3cded3c53569c3ca252d5d3f67222f029..9cf0a44d2f47e9e0d0329d896709b07b3d91465e 100644 --- a/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-ed1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-ED1 SCMI eval daughter"; @@ -33,6 +34,10 @@ resets = <&scmi_reset RST_SCMI_CRYP1>; }; +&dsi { + clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; +}; + &gpioz { clocks = <&scmi_clk CK_SCMI_GPIOZ>; }; @@ -59,10 +64,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -81,11 +82,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts b/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts index df9c113edb4b5467e484fa1057ea53356177b6b3..3b9dd6f4ccc96aabb22ddc6ad7a0b9913a394d35 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-ev1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-EV1 SCMI eval daughter on eval mother"; @@ -35,6 +36,7 @@ }; &dsi { + phy-dsi-supply = <&scmi_reg18>; clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; }; @@ -68,10 +70,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -90,11 +88,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d92d4b842fcb799bb1681d173e266..3706216ffb40bac94501f3f4f529e2d5f7a87cfc 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index ca32446b187f5d61da868e8a6a0d9deecc611757..f53086ddc48b032bbc58a06ffd9cc97abaed3731 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -93,6 +93,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_DRM=y CONFIG_DRM_PANEL_SEIKO_43WVF1G=y CONFIG_DRM_MXSFB=y +CONFIG_FB=y CONFIG_FB_MODE_HELPERS=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_BACKLIGHT_CLASS_DEVICE=y diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index f1d0a7807cd0e13beca85a00ea9a34ef504b104a..41536feb4392140798e33f1c736f70c9dd798563 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -112,19 +112,6 @@ static __always_inline void set_domain(unsigned int val) } #endif -#ifdef CONFIG_CPU_USE_DOMAINS -#define modify_domain(dom,type) \ - do { \ - unsigned int domain = get_domain(); \ - domain &= ~domain_mask(dom); \ - domain = domain | domain_val(dom, type); \ - set_domain(domain); \ - } while (0) - -#else -static inline void modify_domain(unsigned dom, unsigned type) { } -#endif - /* * Generate the T (user) versions of the LDR/STR and related * instructions (inline assembly) diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7cdbb36b27037e0de39bbb6a5744c..2b8970d8e5a2ff84bffb8fc2302b53d5d08882b9 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 93051e2f402c8452a743e83ace1413258a0b6034..1408a6a15d0e03c81225c0f143f85011e8b0162f 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 7aa3ded4af9292d006e2346efa554187467aa393..6a447ac67d80df7d8e79b60ad7e0d13d6076785d 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -302,6 +302,7 @@ local_restart: b ret_fast_syscall #endif ENDPROC(vector_swi) + .ltorg /* * This is the really slow path. We're going to be doing diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index b1a43d7bc56ce1c403eadfa1fdbd3f54a246b16d..df6d673e83d563ec3423aef9e8d84fb78067a6f9 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -202,7 +202,7 @@ static const struct wakeup_source_info ws_info[] = { static const struct of_device_id sama5d2_ws_ids[] = { { .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] }, - { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] }, + { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] }, { .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, @@ -213,24 +213,24 @@ static const struct of_device_id sama5d2_ws_ids[] = { }; static const struct of_device_id sam9x60_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] }, { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] }, { /* sentinel */ } }; static const struct of_device_id sama7g5_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sama7g5-rtc", .data = &ws_info[1] }, { .compatible = "microchip,sama7g5-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, { .compatible = "microchip,sama7g5-sdhci", .data = &ws_info[3] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sama7g5-rtt", .data = &ws_info[4] }, { /* sentinel */ } }; @@ -1079,7 +1079,7 @@ securam_fail: return ret; } -static void at91_pm_secure_init(void) +static void __init at91_pm_secure_init(void) { int suspend_mode; struct arm_smccc_res res; diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index 512943eae30a554690ddf7cd40c047e74e7d63bd..2e203626eda529edbe31b025da39fe16a991be98 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOENT; syscon = of_iomap(syscon_np, 0); + of_node_put(syscon_np); if (!syscon) return -ENOMEM; diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index e4f4b20b83a2d985ccae7f56da5728e908848586..3fc4ec830e3a33f4089b2aa6d63abb0fb0982dca 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -372,6 +372,7 @@ static void __init cns3xxx_init(void) /* De-Asscer SATA Reset */ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); } + of_node_put(dn); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); if (of_device_is_available(dn)) { @@ -385,6 +386,7 @@ static void __init cns3xxx_init(void) cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); } + of_node_put(dn); pm_power_off = cns3xxx_power_off; diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 8b48326be9fd57f46597d203ed1ea872ee1eefe9..51a247ca4da8c86244247431ee4f16e6a7da4418 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -149,6 +149,7 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); + of_node_put(np); } static void __init exynos_init_irq(void) diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c index 4b8ad728bb42aa68b852e3dfee0b1c656e388426..32ac60b89fdcc5e48716f67c48d91498abff5ab7 100644 --- a/arch/arm/mach-meson/platsmp.c +++ b/arch/arm/mach-meson/platsmp.c @@ -71,6 +71,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } sram_base = of_iomap(node, 0); + of_node_put(node); if (!sram_base) { pr_err("Couldn't map SRAM registers\n"); return; @@ -91,6 +92,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } scu_base = of_iomap(node, 0); + of_node_put(node); if (!scu_base) { pr_err("Couldn't map SCU registers\n"); return; diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c index 87389d9456b95c30fc0730df589310d0c2523005..30d781d80fe0cba106d369df15d0968b8cd8c50a 100644 --- a/arch/arm/mach-rockchip/pm.c +++ b/arch/arm/mach-rockchip/pm.c @@ -311,7 +311,7 @@ void __init rockchip_suspend_init(void) &match); if (!match) { pr_err("Failed to find PMU node\n"); - return; + goto out_put; } pm_data = (struct rockchip_pm_data *) match->data; @@ -320,9 +320,12 @@ void __init rockchip_suspend_init(void) if (ret) { pr_err("%s: matches init error %d\n", __func__, ret); - return; + goto out_put; } } suspend_set_ops(pm_data->ops); + +out_put: + of_node_put(np); } diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c index d1fdb6066f7bb47945f82847afd865ad7d48ef9a..c7c17c0f936cfbc2889970c703361f20664a9e9b 100644 --- a/arch/arm/mach-spear/time.c +++ b/arch/arm/mach-spear/time.c @@ -218,13 +218,13 @@ void __init spear_setup_of_timer(void) irq = irq_of_parse_and_map(np, 0); if (!irq) { pr_err("%s: No irq passed for timer via DT\n", __func__); - return; + goto err_put_np; } gpt_base = of_iomap(np, 0); if (!gpt_base) { pr_err("%s: of iomap failed\n", __func__); - return; + goto err_put_np; } gpt_clk = clk_get_sys("gpt0", NULL); @@ -239,6 +239,8 @@ void __init spear_setup_of_timer(void) goto err_prepare_enable_clk; } + of_node_put(np); + spear_clockevent_init(irq); spear_clocksource_init(); @@ -248,4 +250,6 @@ err_prepare_enable_clk: clk_put(gpt_clk); err_iomap: iounmap(gpt_base); +err_put_np: + of_node_put(np); } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index a3a4589ec73b04ad14aa481195f2a0bc1377b5b8..fc439c2c16f83a2cd7fc556476637528605a218b 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -631,7 +631,11 @@ config CPU_USE_DOMAINS bool help This option enables or disables the use of domain switching - via the set_fs() function. + using the DACR (domain access control register) to protect memory + domains from each other. In Linux we use three domains: kernel, user + and IO. The domains are used to protect userspace from kernelspace + and to handle IO-space as a special type of memory by assigning + manager or client roles to running code (such as a process). config CPU_V7M_NUM_IRQ int "Number of external interrupts connected to the NVIC" diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f499559d19362885c289d38c3a854a33f10c2bb..f8dd0b3cc8e040cb7cb737e10ae465c11882c913 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 5e2be37a198e29eefa1b0a3ce6b64296c4d0e2f6..cd17e324aa51ea6596cc83b76049b06ca8c329fb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -489,6 +496,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -568,6 +576,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -587,6 +596,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -647,6 +658,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -1360,7 +1373,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); map.virtual = FDT_FIXED_BASE; map.length = FDT_FIXED_SIZE; - map.type = MT_ROM; + map.type = MT_MEMORY_RO; create_mapping(&map); } diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index fb9f3eb6bf483d22041a06fc9eb17bdf87866558..8bc7a2d6d6c7f96cda00741fe76faa148cee7b51 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method) #else static unsigned int spectre_v2_install_workaround(unsigned int method) { - pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", - smp_processor_id()); + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); return SPECTRE_VULNERABLE; } @@ -209,10 +208,10 @@ static int spectre_bhb_install_workaround(int method) return SPECTRE_VULNERABLE; spectre_bhb_method = method; - } - pr_info("CPU%u: Spectre BHB: using %s workaround\n", - smp_processor_id(), spectre_bhb_method_name(method)); + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } return SPECTRE_MITIGATED; } diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 97317359899218873e1cfa0560c66645a78e16c5..facc889d05eeeaf3ec9a2332bee31865d963adf4 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include #include #include +#include #include void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 84a1cea1f43b9a3331b35093e4c57cb78d670815..309648c17f48685c8a115bcb1e03dd0212a35f49 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -63,11 +63,12 @@ out: unsigned long __pfn_to_mfn(unsigned long pfn) { - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; struct xen_p2m_entry *entry; unsigned long irqflags; read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (entry->pfn <= pfn && @@ -152,10 +153,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn, int rc; unsigned long irqflags; struct xen_p2m_entry *p2m_entry; - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; if (mfn == INVALID_P2M_ENTRY) { write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi index 66023d5535247df0dff768c09cbbfe1b48ab42cd..d084c33d5ca82317c5d861ab997b4c90aa10b3d5 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi @@ -9,6 +9,14 @@ /delete-node/ cpu@3; }; + timer { + compatible = "arm,armv8-timer"; + interrupts = , + , + , + ; + }; + pmu { compatible = "arm,cortex-a53-pmu"; interrupts = , diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index a4be040a00c0733186967379c2a1decd233765ad..967d2cd3c3cee4610de6d92416b84881d2e218b0 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -29,6 +29,8 @@ device_type = "cpu"; compatible = "brcm,brahma-b53"; reg = <0x0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0xfff8>; next-level-cache = <&l2>; }; diff --git a/arch/arm64/boot/dts/exynos/exynos7885.dtsi b/arch/arm64/boot/dts/exynos/exynos7885.dtsi index 3170661f5b67258867e32aefca3c79276c225347..9c233c56558ce86fce37d1980cf7cc43f4c5cd5a 100644 --- a/arch/arm64/boot/dts/exynos/exynos7885.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7885.dtsi @@ -280,8 +280,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&uart0_bus>; - clocks = <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART0_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART0_PCLK>, + <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <64>; status = "disabled"; @@ -293,8 +293,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&uart1_bus>; - clocks = <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART1_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART1_PCLK>, + <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <256>; status = "disabled"; @@ -306,8 +306,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&uart2_bus>; - clocks = <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART2_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART2_PCLK>, + <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <256>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 92465f7776035a1464c0543906d118098f7c38d7..d5cdd77e5a95906e157b478a91c591dea4d8f364 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -224,9 +224,12 @@ little-endian; }; - efuse@1e80000 { + sfp: efuse@1e80000 { compatible = "fsl,ls1028a-sfp"; reg = <0x0 0x1e80000 0x0 0x10000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(4)>; + clock-names = "sfp"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 4c3ac4214a2cdfe35184954717d3f62f34b4ad9c..9a4de739e6a2a1356e61c53412961d3be02c4da9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -395,41 +395,41 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f - MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x19 + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 + MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x10 >; }; pinctrl_fec: fecgrp { fsl,pins = < - MX8MP_IOMUXC_SAI1_RXD2__ENET1_MDC 0x3 - MX8MP_IOMUXC_SAI1_RXD3__ENET1_MDIO 0x3 - MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x91 - MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x91 - MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x91 - MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x91 - MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x91 - MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x1f - MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x1f - MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x1f - MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x1f - MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x1f - MX8MP_IOMUXC_SAI1_RXD0__GPIO4_IO02 0x19 + MX8MP_IOMUXC_SAI1_RXD2__ENET1_MDC 0x2 + MX8MP_IOMUXC_SAI1_RXD3__ENET1_MDIO 0x2 + MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x90 + MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x90 + MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x90 + MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x90 + MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x90 + MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x16 + MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x16 + MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x16 + MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x16 + MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x16 + MX8MP_IOMUXC_SAI1_RXD0__GPIO4_IO02 0x10 >; }; @@ -461,28 +461,28 @@ pinctrl_gpio_led: gpioledgrp { fsl,pins = < - MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x19 + MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x140 >; }; pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3 - MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3 + MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2 + MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2 >; }; pinctrl_i2c3: i2c3grp { fsl,pins = < - MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3 - MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3 + MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2 + MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2 >; }; pinctrl_i2c5: i2c5grp { fsl,pins = < - MX8MP_IOMUXC_SPDIF_RX__I2C5_SDA 0x400001c3 - MX8MP_IOMUXC_SPDIF_TX__I2C5_SCL 0x400001c3 + MX8MP_IOMUXC_SPDIF_RX__I2C5_SDA 0x400001c2 + MX8MP_IOMUXC_SPDIF_TX__I2C5_SCL 0x400001c2 >; }; @@ -500,20 +500,20 @@ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49 - MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49 + MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x140 + MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x140 >; }; pinctrl_usb1_vbus: usb1grp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x19 + MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x10 >; }; @@ -525,7 +525,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -537,7 +537,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -549,7 +549,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts b/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts index 70a701a624a678785855bc1d6a35c20d3ea86355..dd703b6a5e17c91dac8860687a408ca4765310dc 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts @@ -110,28 +110,28 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f - MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x19 + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 + MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x10 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49 - MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49 + MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x40 + MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x40 >; }; @@ -151,7 +151,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -163,13 +163,13 @@ pinctrl_reg_usb1: regusb1grp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO14__GPIO1_IO14 0x19 + MX8MP_IOMUXC_GPIO1_IO14__GPIO1_IO14 0x10 >; }; pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index 984a6b9ded8d7a8c4936f220d099ab2cc755c7c3..6aa720bafe2898593b80f292e768d83a0a29a105 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -116,48 +116,48 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x10 >; }; pinctrl_i2c2: i2c2grp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3 - MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3 + MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2 + MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2 >; }; pinctrl_i2c2_gpio: i2c2gpiogrp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e3 - MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e3 + MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e2 + MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e2 >; }; pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; pinctrl_uart1: uart1grp { fsl,pins = < - MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x49 - MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x49 + MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x40 + MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x40 >; }; @@ -175,7 +175,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -187,7 +187,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -199,7 +199,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index 101d311476034bd2d82ce1ee61210a01d28d0c35..521215520a0f40acb3a9310f82eef1354328ee7c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -622,15 +622,15 @@ pinctrl_hog: hoggrp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO09__GPIO1_IO09 0x40000041 /* DIO0 */ - MX8MP_IOMUXC_GPIO1_IO11__GPIO1_IO11 0x40000041 /* DIO1 */ - MX8MP_IOMUXC_NAND_DQS__GPIO3_IO14 0x40000041 /* M2SKT_OFF# */ - MX8MP_IOMUXC_SD2_DATA2__GPIO2_IO17 0x40000159 /* PCIE1_WDIS# */ - MX8MP_IOMUXC_SD2_DATA3__GPIO2_IO18 0x40000159 /* PCIE2_WDIS# */ - MX8MP_IOMUXC_SD2_CMD__GPIO2_IO14 0x40000159 /* PCIE3_WDIS# */ - MX8MP_IOMUXC_NAND_DATA00__GPIO3_IO06 0x40000041 /* M2SKT_RST# */ - MX8MP_IOMUXC_SAI1_TXD6__GPIO4_IO18 0x40000159 /* M2SKT_WDIS# */ - MX8MP_IOMUXC_NAND_ALE__GPIO3_IO00 0x40000159 /* M2SKT_GDIS# */ + MX8MP_IOMUXC_GPIO1_IO09__GPIO1_IO09 0x40000040 /* DIO0 */ + MX8MP_IOMUXC_GPIO1_IO11__GPIO1_IO11 0x40000040 /* DIO1 */ + MX8MP_IOMUXC_NAND_DQS__GPIO3_IO14 0x40000040 /* M2SKT_OFF# */ + MX8MP_IOMUXC_SD2_DATA2__GPIO2_IO17 0x40000150 /* PCIE1_WDIS# */ + MX8MP_IOMUXC_SD2_DATA3__GPIO2_IO18 0x40000150 /* PCIE2_WDIS# */ + MX8MP_IOMUXC_SD2_CMD__GPIO2_IO14 0x40000150 /* PCIE3_WDIS# */ + MX8MP_IOMUXC_NAND_DATA00__GPIO3_IO06 0x40000040 /* M2SKT_RST# */ + MX8MP_IOMUXC_SAI1_TXD6__GPIO4_IO18 0x40000150 /* M2SKT_WDIS# */ + MX8MP_IOMUXC_NAND_ALE__GPIO3_IO00 0x40000150 /* M2SKT_GDIS# */ MX8MP_IOMUXC_SAI3_TXD__GPIO5_IO01 0x40000104 /* UART_TERM */ MX8MP_IOMUXC_SAI3_TXFS__GPIO4_IO31 0x40000104 /* UART_RS485 */ MX8MP_IOMUXC_SAI3_TXC__GPIO5_IO00 0x40000104 /* UART_HALF */ @@ -639,47 +639,47 @@ pinctrl_accel: accelgrp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO07__GPIO1_IO07 0x159 + MX8MP_IOMUXC_GPIO1_IO07__GPIO1_IO07 0x150 >; }; pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f - MX8MP_IOMUXC_SAI3_RXD__GPIO4_IO30 0x141 /* RST# */ - MX8MP_IOMUXC_SAI3_RXFS__GPIO4_IO28 0x159 /* IRQ# */ + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 + MX8MP_IOMUXC_SAI3_RXD__GPIO4_IO30 0x140 /* RST# */ + MX8MP_IOMUXC_SAI3_RXFS__GPIO4_IO28 0x150 /* IRQ# */ >; }; pinctrl_fec: fecgrp { fsl,pins = < - MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x91 - MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x91 - MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x91 - MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x91 - MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x91 - MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x1f - MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x1f - MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x1f - MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x1f - MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x1f - MX8MP_IOMUXC_SAI1_RXFS__ENET1_1588_EVENT0_IN 0x141 - MX8MP_IOMUXC_SAI1_RXC__ENET1_1588_EVENT0_OUT 0x141 + MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x90 + MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x90 + MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x90 + MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x90 + MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x90 + MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x16 + MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x16 + MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x16 + MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x16 + MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x16 + MX8MP_IOMUXC_SAI1_RXFS__ENET1_1588_EVENT0_IN 0x140 + MX8MP_IOMUXC_SAI1_RXC__ENET1_1588_EVENT0_OUT 0x140 >; }; @@ -692,61 +692,61 @@ pinctrl_gsc: gscgrp { fsl,pins = < - MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x159 + MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x150 >; }; pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3 - MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3 + MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2 + MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2 >; }; pinctrl_i2c2: i2c2grp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3 - MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3 + MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2 + MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2 >; }; pinctrl_i2c3: i2c3grp { fsl,pins = < - MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3 - MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3 + MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2 + MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2 >; }; pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MP_IOMUXC_I2C4_SCL__I2C4_SCL 0x400001c3 - MX8MP_IOMUXC_I2C4_SDA__I2C4_SDA 0x400001c3 + MX8MP_IOMUXC_I2C4_SCL__I2C4_SCL 0x400001c2 + MX8MP_IOMUXC_I2C4_SDA__I2C4_SDA 0x400001c2 >; }; pinctrl_ksz: kszgrp { fsl,pins = < - MX8MP_IOMUXC_SAI3_RXC__GPIO4_IO29 0x159 /* IRQ# */ - MX8MP_IOMUXC_SAI3_MCLK__GPIO5_IO02 0x141 /* RST# */ + MX8MP_IOMUXC_SAI3_RXC__GPIO4_IO29 0x150 /* IRQ# */ + MX8MP_IOMUXC_SAI3_MCLK__GPIO5_IO02 0x140 /* RST# */ >; }; pinctrl_gpio_leds: ledgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_DATA0__GPIO2_IO15 0x19 - MX8MP_IOMUXC_SD2_DATA1__GPIO2_IO16 0x19 + MX8MP_IOMUXC_SD2_DATA0__GPIO2_IO15 0x10 + MX8MP_IOMUXC_SD2_DATA1__GPIO2_IO16 0x10 >; }; pinctrl_pmic: pmicgrp { fsl,pins = < - MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x141 + MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x140 >; }; pinctrl_pps: ppsgrp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO12__GPIO1_IO12 0x141 + MX8MP_IOMUXC_GPIO1_IO12__GPIO1_IO12 0x140 >; }; @@ -758,13 +758,13 @@ pinctrl_reg_usb2: regusb2grp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO06__GPIO1_IO06 0x141 + MX8MP_IOMUXC_GPIO1_IO06__GPIO1_IO06 0x140 >; }; pinctrl_reg_wifi: regwifigrp { fsl,pins = < - MX8MP_IOMUXC_NAND_DATA03__GPIO3_IO09 0x119 + MX8MP_IOMUXC_NAND_DATA03__GPIO3_IO09 0x110 >; }; @@ -811,7 +811,7 @@ pinctrl_uart3_gpio: uart3gpiogrp { fsl,pins = < - MX8MP_IOMUXC_NAND_DATA02__GPIO3_IO08 0x119 + MX8MP_IOMUXC_NAND_DATA02__GPIO3_IO08 0x110 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index d9542dfff83fba7888c8477d270114b8542df639..410d0d5e6f1e524a5d0d5ffd974db6f953e2f9ac 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -595,7 +595,7 @@ pgc_ispdwp: power-domain@18 { #power-domain-cells = <0>; reg = ; - clocks = <&clk IMX8MP_CLK_MEDIA_ISP_DIV>; + clocks = <&clk IMX8MP_CLK_MEDIA_ISP_ROOT>; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/s32g2.dtsi b/arch/arm64/boot/dts/freescale/s32g2.dtsi index 59ea8a25aa4c16d05d62d0cf4492915e0df4f4f1..824d401e7a2c5325299c8ce3b90303f4162382ac 100644 --- a/arch/arm64/boot/dts/freescale/s32g2.dtsi +++ b/arch/arm64/boot/dts/freescale/s32g2.dtsi @@ -79,7 +79,7 @@ }; }; - soc { + soc@0 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi index 3b0cc85d66742a4510ab18b2e7c05bff89f737f5..71e373b11de9d8bb990c852aa28469653100f43e 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi @@ -74,7 +74,7 @@ vdd_l17_29-supply = <&vph_pwr>; vdd_l20_21-supply = <&vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; /* S1, S2, S6 and S12 are managed by RPMPD */ diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts index 7748b745a5df527e613bb3ca76c56552648acfc6..afa91ca9a3dcd423b6e8b6cba5bb3745735536c5 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts @@ -171,7 +171,7 @@ vdd_l17_29-supply = <&vph_pwr>; vdd_l20_21-supply = <&vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; /* S1, S2, S6 and S12 are managed by RPMPD */ diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 0318d42c573649d2c8e56a3bd0d1c906697165c8..1ac2913b182ccdfb45cef036314ec87c8c400914 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -100,7 +100,7 @@ CPU6: cpu@102 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x102>; enable-method = "psci"; next-level-cache = <&L2_1>; }; @@ -108,7 +108,7 @@ CPU7: cpu@103 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x103>; enable-method = "psci"; next-level-cache = <&L2_1>; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi index 9b3e3d13c1658d80a517df6773cd7b223496a2a7..d1e2df5164eaab4befb4e741d21b02908f1e9df7 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi @@ -5,7 +5,7 @@ * Copyright 2021 Google LLC. */ -#include "sc7180-trogdor.dtsi" +/* This file must be included after sc7180-trogdor.dtsi */ / { /* BOARD-SPECIFIC TOP LEVEL NODES */ diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi index fe2369c29aad2eed665843875575e58c416ae608..88f6a7d4d0203a708fe9a8d0f49eb85139b135cb 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi @@ -5,7 +5,7 @@ * Copyright 2020 Google LLC. */ -#include "sc7180-trogdor.dtsi" +/* This file must be included after sc7180-trogdor.dtsi */ &ap_sar_sensor { semtech,cs0-ground; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 0692ae0e60a42dfce746510e412a1e92afea0269..038538c8c6141686dcb0d6d8dda20afa7ffe052b 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4244,7 +4244,7 @@ power-domains = <&dispcc MDSS_GDSC>; - clocks = <&gcc GCC_DISP_AHB_CLK>, + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; clock-names = "iface", "core"; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 7d08fad76371734510ac07a32febb0e724fc6ef0..b87756bf1ce4481a6440182df05d362892bc7988 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -2853,6 +2853,16 @@ reg = <0x0 0x17100000 0x0 0x10000>, /* GICD */ <0x0 0x17180000 0x0 0x200000>; /* GICR * 8 */ interrupts = ; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + gic_its: msi-controller@17140000 { + compatible = "arm,gic-v3-its"; + reg = <0x0 0x17140000 0x0 0x20000>; + msi-controller; + #msi-cells = <1>; + }; }; timer@17420000 { @@ -3037,8 +3047,8 @@ iommus = <&apps_smmu 0xe0 0x0>; - interconnects = <&aggre1_noc MASTER_UFS_MEM &mc_virt SLAVE_EBI1>, - <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_UFS_MEM_CFG>; + interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, + <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; interconnect-names = "ufs-ddr", "cpu-ufs"; clock-names = "core_clk", diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 913d845eb51a87a367e97c9e5d7e57c2cbe60940..1977103a5ef44c20074466a589ac037b4b691853 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -376,7 +376,8 @@ camera: &i2c7 { <&cru ACLK_VIO>, <&cru ACLK_GIC_PRE>, <&cru PCLK_DDR>, - <&cru ACLK_HDCP>; + <&cru ACLK_HDCP>, + <&cru ACLK_VDU>; assigned-clock-rates = <600000000>, <1600000000>, <1000000000>, @@ -388,6 +389,7 @@ camera: &i2c7 { <400000000>, <200000000>, <200000000>, + <400000000>, <400000000>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index fbd0346624e69cadad2ef15854e55fc2b65289fd..9d5b0e8c9cca72af9630f4e26496b294458bc071 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1462,7 +1462,8 @@ <&cru HCLK_PERILP1>, <&cru PCLK_PERILP1>, <&cru ACLK_VIO>, <&cru ACLK_HDCP>, <&cru ACLK_GIC_PRE>, - <&cru PCLK_DDR>; + <&cru PCLK_DDR>, + <&cru ACLK_VDU>; assigned-clock-rates = <594000000>, <800000000>, <1000000000>, @@ -1473,7 +1474,8 @@ <100000000>, <50000000>, <400000000>, <400000000>, <200000000>, - <200000000>; + <200000000>, + <400000000>; }; grf: syscon@ff770000 { diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 1534e11a9ad17373bb77de632bccc5aaae80f346..fa953b73664218328882b8dd7c559a9afea2f935 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -687,6 +687,7 @@ }; &usb_host0_xhci { + dr_mode = "host"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 7bdcecc0dfe4b078ba5877df74aabcf3a5782d36..02d5f5a8ca036171f715114212eb9df5b7042d0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -133,7 +133,7 @@ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m1_miim diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index f64b368c6c37175799bee972d92a5c4d4caa3a5c..cdb530597c5ebf9dd6b7e795c375caeae0c1d517 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -456,13 +456,11 @@ clock-names = "clk_ahb", "clk_xin"; mmc-ddr-1_8v; mmc-hs200-1_8v; - mmc-hs400-1_8v; ti,trm-icp = <0x2>; ti,otap-del-sel-legacy = <0x0>; ti,otap-del-sel-mmc-hs = <0x0>; ti,otap-del-sel-ddr52 = <0x6>; ti,otap-del-sel-hs200 = <0x7>; - ti,otap-del-sel-hs400 = <0x4>; }; sdhci1: mmc@fa00000 { diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi index be7f39299894e16e53b0f882cb4f3cad691371e6..19966f72c5b387b3ad403fba90c79154812a3fd7 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi @@ -33,7 +33,7 @@ ranges; #interrupt-cells = <3>; interrupt-controller; - reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */ + reg = <0x00 0x01800000 0x00 0x100000>, /* GICD */ <0x00 0x01900000 0x00 0x100000>, /* GICR */ <0x00 0x6f000000 0x00 0x2000>, /* GICC */ <0x00 0x6f010000 0x00 0x1000>, /* GICH */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 47a1e25e25bbc836e2c99cec064fd9dfd4ce7a09..de32152cea0484fb03ea11d6417fb410db577b68 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -362,11 +362,6 @@ struct kvm_vcpu_arch { struct arch_timer_cpu timer_cpu; struct kvm_pmu pmu; - /* - * Anything that is not used directly from assembly code goes - * here. - */ - /* * Guest registers we preserve during guest debugging. * diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 3c8af033a9976cdfd567a5a6086f013aa30e11dd..0e80db4327b604c3613890289fc07e01ed6df6ae 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void) /* * Code only run in VHE/NVHE hyp context can assume VHE is present or * absent. Otherwise fall back to caps. + * This allows the compiler to discard VHE-specific code from the + * nVHE object, reducing the number of external symbol references + * needed to link. */ if (is_vhe_hyp_code()) return true; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 42ea2bd856c6059e3813d6219d754986aeca7669..8d88433de81da0a0ecf890f8328433fa4a0c4d9a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { - if (kvm_get_mode() != KVM_MODE_PROTECTED) - return false; - - if (is_kernel_in_hyp_mode()) { - pr_warn("Protected KVM not available with VHE\n"); - return false; - } - - return true; + return kvm_get_mode() == KVM_MODE_PROTECTED; } #endif /* CONFIG_KVM */ @@ -3109,7 +3101,6 @@ void cpu_set_feature(unsigned int num) WARN_ON(num >= MAX_CPU_FEATURES); elf_hwcap |= BIT(num); } -EXPORT_SYMBOL_GPL(cpu_set_feature); bool cpu_have_feature(unsigned int num) { diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index d42a205ef6259c8354115aa05caa2b506bc0fb75..bd5df50e46432a4b0efcb55f081fd3278c948820 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -102,7 +102,6 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * x19-x29 per the AAPCS, and we created frame records upon entry, so we need * to restore x0-x8, x29, and x30. */ -ftrace_common_return: /* Restore function arguments */ ldp x0, x1, [sp] ldp x2, x3, [sp, #S_X2] diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index f447c4a36f694ae72f24b379ceae4e71a426a53e..ea5dc7c90f465f8bada797603df16249b6979a09 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -78,47 +78,76 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) } /* - * Turn on the call to ftrace_caller() in instrumented function + * Find the address the callsite must branch to in order to reach '*addr'. + * + * Due to the limited range of 'BL' instructions, modules may be placed too far + * away to branch directly and must use a PLT. + * + * Returns true when '*addr' contains a reachable target address, or has been + * modified to contain a PLT address. Returns false otherwise. */ -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, + struct module *mod, + unsigned long *addr) { unsigned long pc = rec->ip; - u32 old, new; - long offset = (long)pc - (long)addr; + long offset = (long)*addr - (long)pc; + struct plt_entry *plt; - if (offset < -SZ_128M || offset >= SZ_128M) { - struct module *mod; - struct plt_entry *plt; + /* + * When the target is within range of the 'BL' instruction, use 'addr' + * as-is and branch to that directly. + */ + if (offset >= -SZ_128M && offset < SZ_128M) + return true; - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - return -EINVAL; + /* + * When the target is outside of the range of a 'BL' instruction, we + * must use a PLT to reach it. We can only place PLTs for modules, and + * only when module PLT support is built-in. + */ + if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + return false; - /* - * On kernels that support module PLTs, the offset between the - * branch instruction and its target may legally exceed the - * range of an ordinary relative 'bl' opcode. In this case, we - * need to branch via a trampoline in the module. - * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' - * retains its validity throughout the remainder of this code. - */ + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + if (!mod) { preempt_disable(); mod = __module_text_address(pc); preempt_enable(); + } - if (WARN_ON(!mod)) - return -EINVAL; + if (WARN_ON(!mod)) + return false; - plt = get_ftrace_plt(mod, addr); - if (!plt) { - pr_err("ftrace: no module PLT for %ps\n", (void *)addr); - return -EINVAL; - } - - addr = (unsigned long)plt; + plt = get_ftrace_plt(mod, *addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)*addr); + return false; } + *addr = (unsigned long)plt; + return true; +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; + old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -132,6 +161,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long pc = rec->ip; u32 old, new; + if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) + return -EINVAL; + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; + old = aarch64_insn_gen_branch_imm(pc, old_addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -181,54 +215,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - bool validate = true; u32 old = 0, new; - long offset = (long)pc - (long)addr; - if (offset < -SZ_128M || offset >= SZ_128M) { - u32 replaced; - - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - return -EINVAL; - - /* - * 'mod' is only set at module load time, but if we end up - * dealing with an out-of-range condition, we can assume it - * is due to a module being loaded far away from the kernel. - */ - if (!mod) { - preempt_disable(); - mod = __module_text_address(pc); - preempt_enable(); - - if (WARN_ON(!mod)) - return -EINVAL; - } - - /* - * The instruction we are about to patch may be a branch and - * link instruction that was redirected via a PLT entry. In - * this case, the normal validation will fail, but we can at - * least check that we are dealing with a branch and link - * instruction that points into the right module. - */ - if (aarch64_insn_read((void *)pc, &replaced)) - return -EFAULT; - - if (!aarch64_insn_is_bl(replaced) || - !within_module(pc + aarch64_get_branch_offset(replaced), - mod)) - return -EINVAL; - - validate = false; - } else { - old = aarch64_insn_gen_branch_imm(pc, addr, - AARCH64_INSN_BRANCH_LINK); - } + if (!ftrace_find_callable_addr(rec, mod, &addr)) + return -EINVAL; + old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_nop(); - return ftrace_modify_code(pc, old, new, validate); + return ftrace_modify_code(pc, old, new, true); } void arch_ftrace_update_code(int command) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index cf3a759f10d4ef55e6b58c3fc581dde5bff93a9f..fea3223704b6339a0381ddee334168ec5d83201b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -303,14 +303,13 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) early_fixmap_init(); early_ioremap_init(); + setup_machine_fdt(__fdt_pointer); + /* * Initialise the static keys early as they may be enabled by the - * cpufeature code, early parameters, and DT setup. + * cpufeature code and early parameters. */ jump_label_init(); - - setup_machine_fdt(__fdt_pointer); - parse_early_param(); /* diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 4e39ace073af0fa70ff255c301f869df6463ad2f..3b8d062e30ea412b148ccb6bc214b0907b3d16f2 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid) struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); struct arch_timer_context *timer; + if (WARN(!vcpu, "No vcpu context!\n")) + return false; + if (vintid == vcpu_vtimer(vcpu)->irq.irq) timer = vcpu_vtimer(vcpu); else if (vintid == vcpu_ptimer(vcpu)->irq.irq) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 400bb0fe2745b4549a616b717a396f4df18ed697..83a7f61354d3f597cdbe328f3dd561efcb8d6982 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { + ret = -ENOMEM; goto out_free_stage2_pgd; + } cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); kvm_vgic_early_init(kvm); @@ -2110,11 +2112,11 @@ static int finalize_hyp_mode(void) return 0; /* - * Exclude HYP BSS from kmemleak so that it doesn't get peeked - * at, which would end badly once the section is inaccessible. - * None of other sections should ever be introspected. + * Exclude HYP sections from kmemleak so that they don't get peeked + * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size); return pkvm_drop_host_privileges(); } @@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg) return -EINVAL; if (strcmp(arg, "protected") == 0) { - kvm_mode = KVM_MODE_PROTECTED; + if (!is_kernel_in_hyp_mode()) + kvm_mode = KVM_MODE_PROTECTED; + else + pr_warn_once("Protected KVM not available with VHE\n"); + return 0; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3d251a4d2cf7bfdb3ed82df94d30b8d6b48a458c..6012b08ecb14e7f6464374cf780c9bf271af1cca 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST; + vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; @@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * operations. Do this for ZA as well for now for simplicity. */ if (system_supports_sme()) { + vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 78edf077fa3b61e9d99bcba88b8e370b8dd09bd3..1e78acf9662eb17490f2973ccd5a8831c876ee9b 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot) { - hyp_assert_lock_held(&host_kvm.lock); - return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); } int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { - hyp_assert_lock_held(&host_kvm.lock); - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, addr, size, &host_s2_pool, owner_id); } diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index b6d86e423319f5cce0fda2fd191a1dc994652b88..35a4331ba5f31e6be1ebdcf8b87e65910346c023 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) case SYS_ID_AA64MMFR2_EL1: return get_pvm_id_aa64mmfr2(vcpu); default: - /* - * Should never happen because all cases are covered in - * pvm_sys_reg_descs[]. - */ - WARN_ON(1); - break; + /* Unhandled ID register, RAZ */ + return 0; } - - return 0; } static u64 read_id_reg(const struct kvm_vcpu *vcpu, @@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, /* Mark the specified system register as an AArch64 feature id register. */ #define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } +/* + * sys_reg_desc initialiser for architecturally unallocated cpufeature ID + * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 + * (1 <= crm < 8, 0 <= Op2 < 8). + */ +#define ID_UNALLOCATED(crm, op2) { \ + Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ + .access = pvm_access_id_aarch64, \ +} + /* Mark the specified system register as Read-As-Zero/Write-Ignored */ #define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } @@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { AARCH32(SYS_MVFR0_EL1), AARCH32(SYS_MVFR1_EL1), AARCH32(SYS_MVFR2_EL1), + ID_UNALLOCATED(3,3), AARCH32(SYS_ID_PFR2_EL1), AARCH32(SYS_ID_DFR1_EL1), AARCH32(SYS_ID_MMFR5_EL1), + ID_UNALLOCATED(3,7), /* AArch64 ID registers */ /* CRm=4 */ AARCH64(SYS_ID_AA64PFR0_EL1), AARCH64(SYS_ID_AA64PFR1_EL1), + ID_UNALLOCATED(4,2), + ID_UNALLOCATED(4,3), AARCH64(SYS_ID_AA64ZFR0_EL1), + ID_UNALLOCATED(4,5), + ID_UNALLOCATED(4,6), + ID_UNALLOCATED(4,7), AARCH64(SYS_ID_AA64DFR0_EL1), AARCH64(SYS_ID_AA64DFR1_EL1), + ID_UNALLOCATED(5,2), + ID_UNALLOCATED(5,3), AARCH64(SYS_ID_AA64AFR0_EL1), AARCH64(SYS_ID_AA64AFR1_EL1), + ID_UNALLOCATED(5,6), + ID_UNALLOCATED(5,7), AARCH64(SYS_ID_AA64ISAR0_EL1), AARCH64(SYS_ID_AA64ISAR1_EL1), + AARCH64(SYS_ID_AA64ISAR2_EL1), + ID_UNALLOCATED(6,3), + ID_UNALLOCATED(6,4), + ID_UNALLOCATED(6,5), + ID_UNALLOCATED(6,6), + ID_UNALLOCATED(6,7), AARCH64(SYS_ID_AA64MMFR0_EL1), AARCH64(SYS_ID_AA64MMFR1_EL1), AARCH64(SYS_ID_AA64MMFR2_EL1), + ID_UNALLOCATED(7,3), + ID_UNALLOCATED(7,4), + ID_UNALLOCATED(7,5), + ID_UNALLOCATED(7,6), + ID_UNALLOCATED(7,7), /* Scalable Vector Registers are restricted. */ diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index 77a67e9d3d14b947fa8e8369da4a184024bf03df..e070cda86e12ffeb12a94284cc34e93d55cf4353 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, vgic_mmio_read_pending, vgic_mmio_write_spending, - NULL, vgic_uaccess_write_spending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, vgic_mmio_read_pending, vgic_mmio_write_cpending, - NULL, vgic_uaccess_write_cpending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, vgic_mmio_read_active, vgic_mmio_write_sactive, diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index f7aa7bcd6fb8cdaed3272b577fa5fa15de290203..f15e29cc63ce1bf6b1dbac3502c1ea370bae1e3d 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, return 0; } -static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - u32 value = 0; - int i; - - /* - * pending state of interrupt is latched in pending_latch variable. - * Userspace will save and restore pending state and line_level - * separately. - * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst - * for handling of ISPENDR and ICPENDR. - */ - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - bool state = irq->pending_latch; - - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { - int err; - - err = irq_get_irqchip_state(irq->host_irq, - IRQCHIP_STATE_PENDING, - &state); - WARN_ON(err); - } - - if (state) - value |= (1U << i); - - vgic_put_irq(vcpu->kvm, irq); - } - - return value; -} - static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, vgic_mmio_read_pending, vgic_mmio_write_spending, - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, vgic_mmio_read_pending, vgic_mmio_write_cpending, @@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, vgic_mmio_read_pending, vgic_mmio_write_spending, - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, vgic_mmio_read_pending, vgic_mmio_write_cpending, diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 49837d3a3ef5625ba0c5e4d2fed57e362b55ffc0..997d0fce2088345891b22d45e7728412abd90f7d 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, return 0; } -unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) +static unsigned long __read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 value = 0; @@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, unsigned long flags; bool val; + /* + * When used from userspace with a GICv3 model: + * + * Pending state of interrupt is latched in pending_latch + * variable. Userspace will save and restore pending state + * and line_level separately. + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst + * for handling of ISPENDR and ICPENDR. + */ raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid)) { int err; @@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, IRQCHIP_STATE_PENDING, &val); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); - } else if (vgic_irq_is_mapped_level(irq)) { + } else if (!is_user && vgic_irq_is_mapped_level(irq)) { val = vgic_get_phys_line_level(irq); } else { - val = irq_is_pending(irq); + switch (vcpu->kvm->arch.vgic.vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + if (is_user) { + val = irq->pending_latch; + break; + } + fallthrough; + default: + val = irq_is_pending(irq); + break; + } } value |= ((u32)val << i); @@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, false); +} + +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, true); +} + static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) { return (vgic_irq_is_sgi(irq->intid) && diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h index 3fa696f198a37ab9aad81832db4b76d61bcf1ece..6082d4b66d3983495967dc02f63db8d72106b77c 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.h +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 8d5f0506fd87f392de4792431c1fdf7c0b2c2745..d78ae63d7c15f802c3d4796d56daa5e6bde319c2 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -66,7 +66,7 @@ static void flush_context(void) * the next context-switch, we broadcast TLB flush + I-cache * invalidation over the inner shareable domain on rollover. */ - kvm_call_hyp(__kvm_flush_vm_context); + kvm_call_hyp(__kvm_flush_vm_context); } static bool check_update_reserved_vmid(u64 vmid, u64 newvmid) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 0ea6cc25dc66356de86d262cc3d48bb993ec3cd0..21c907987080f60d4b25ef70f438332a6c1c24cc 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -218,8 +218,6 @@ SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) */ SYM_FUNC_START(__pi___dma_map_area) add x1, x0, x1 - cmp w2, #DMA_FROM_DEVICE - b.eq __pi_dcache_inval_poc b __pi_dcache_clean_poc SYM_FUNC_END(__pi___dma_map_area) SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index e2a5ec9fdc0db0289c567090acbc11280e758fc1..3618ef3f6d8181efd61a59081e41d886cb4b34e3 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -214,6 +214,19 @@ static pte_t get_clear_contig(struct mm_struct *mm, return orig_pte; } +static pte_t get_clear_contig_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pgsize, + unsigned long ncontig) +{ + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + + flush_tlb_range(&vma, addr, addr + (pgsize * ncontig)); + return orig_pte; +} + /* * Changing some bits of contiguous entries requires us to follow a * Break-Before-Make approach, breaking the whole contiguous set @@ -447,19 +460,20 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; + struct mm_struct *mm = vma->vm_mm; pgprot_t hugeprot; pte_t orig_pte; if (!pte_cont(pte)) return ptep_set_access_flags(vma, addr, ptep, pte, dirty); - ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); + ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; if (!__cont_access_flags_changed(ptep, pte, ncontig)) return 0; - orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) @@ -470,7 +484,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); return 1; } @@ -492,7 +506,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; - pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); + pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); pte = pte_wrprotect(pte); hugeprot = pte_pgprot(pte); @@ -505,17 +519,15 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + struct mm_struct *mm = vma->vm_mm; size_t pgsize; int ncontig; - pte_t orig_pte; if (!pte_cont(READ_ONCE(*ptep))) return ptep_clear_flush(vma, addr, ptep); - ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); - orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig); - flush_tlb_range(vma, addr, addr + pgsize * ncontig); - return orig_pte; + ncontig = find_num_contig(mm, addr, ptep, &pgsize); + return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); } static int __init hugetlbpage_init(void) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1920d52653b4150571d8551b713ae1ed4a9b4b69..53a912befb626b53bfb866fd63d8b6f62bd25c26 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -54,7 +54,6 @@ config LOONGARCH select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE select GENERIC_ENTRY - select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE @@ -77,7 +76,6 @@ config LOONGARCH select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING - select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD @@ -86,8 +84,6 @@ config LOONGARCH select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_MEMBLOCK - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS diff --git a/arch/loongarch/include/asm/branch.h b/arch/loongarch/include/asm/branch.h index 3f33c89f35b487bffe9e232ccb9f151862415d12..9a133e4c068e26c98fce575e67a1c174b2e0be03 100644 --- a/arch/loongarch/include/asm/branch.h +++ b/arch/loongarch/include/asm/branch.h @@ -12,10 +12,9 @@ static inline unsigned long exception_era(struct pt_regs *regs) return regs->csr_era; } -static inline int compute_return_era(struct pt_regs *regs) +static inline void compute_return_era(struct pt_regs *regs) { regs->csr_era += 4; - return 0; } #endif /* _ASM_BRANCH_H */ diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h index adb16e4b43b01911a04fc248b20689c3073994ff..b6be527831dd9cc057215b39847470242a23019c 100644 --- a/arch/loongarch/include/asm/fpregdef.h +++ b/arch/loongarch/include/asm/fpregdef.h @@ -48,6 +48,5 @@ #define fcsr1 $r1 #define fcsr2 $r2 #define fcsr3 $r3 -#define vcsr16 $r16 #endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 3dba4986f6c92eed4deba1da2ac286c993990527..dc47fc724fa17efd661b122f557d10951ddebc83 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -6,6 +6,7 @@ #define _ASM_PAGE_H #include +#include /* * PAGE_SHIFT determines the page size diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 5dc84d8f18d605fedda4ec4c8aa7ea0b01b067e9..d9e86cfa53e2203afa60c09855d670d42a9efce5 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -426,6 +426,11 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/ @@ -497,11 +502,6 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pmd; } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; -} - static inline struct page *pmd_page(pmd_t pmd) { if (pmd_trans_huge(pmd)) diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 1d63c934b289c14cc6d7c0932ba799ee5a0321ed..57ec45aa078ec06f0e04a97a80cbe850a1e18c26 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,7 +80,6 @@ BUILD_FPR_ACCESS(64) struct loongarch_fpu { unsigned int fcsr; - unsigned int vcsr; uint64_t fcc; /* 8x8 */ union fpureg fpr[NUM_FPU_REGS]; }; @@ -161,7 +160,6 @@ struct thread_struct { */ \ .fpu = { \ .fcsr = 0, \ - .vcsr = 0, \ .fcc = 0, \ .fpr = {{{0,},},}, \ }, \ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index bfb65eb2844f501b9ceb297fe9c0d4377e65079b..20cd9e16a95abb7b7677e2cfd8dc3f34212ae58d 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -166,7 +166,6 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); OFFSET(THREAD_FCC, loongarch_fpu, fcc); - OFFSET(THREAD_VCSR, loongarch_fpu, vcsr); BLANK(); } diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 6c87ea36b2573b3d4b152759894649ee25e88cb9..529ab8f44ec6d9fcecbba29099c224fa087d9c08 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -263,7 +263,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0); - c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3; + c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) & CPUCFG2_FPVERS) >> 3; c->fpu_csr0 = FPU_CSR_RN; c->fpu_mask = FPU_CSR_RSVD; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 75c6ce0682a2411b345c467280f9fdc0f30d19f9..a631a7137667bfce45c6adfdc2666c180abb0b0b 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -146,16 +146,6 @@ movgr2fcsr fcsr0, \tmp0 .endm - .macro sc_save_vcsr base, tmp0 - movfcsr2gr \tmp0, vcsr16 - EX st.w \tmp0, \base, 0 - .endm - - .macro sc_restore_vcsr base, tmp0 - EX ld.w \tmp0, \base, 0 - movgr2fcsr vcsr16, \tmp0 - .endm - /* * Save a thread's fp context. */ diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index e596dfcd924b3d6e8135faf09d91171aba41fad6..d01e62dd414f20b73985eb9a5a8ef2b9eb788e09 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -14,8 +14,6 @@ __REF -SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE) - SYM_CODE_START(kernel_entry) # kernel entry point /* Config direct window and set PG */ diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index a76f547a5aa3a84911c50a38bdc0c2bf3b162f50..a13f92593cfdad89294abe8d37582b40c2c12954 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -429,7 +429,6 @@ int __init init_numa_memory(void) return 0; } -EXPORT_SYMBOL(init_numa_memory); #endif void __init paging_init(void) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e4060f84a22169d71bdeafab3328777277fea2e1..1bf58c65e2bf0cc4afa7d38cdd434521d184d3b3 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -475,8 +475,7 @@ asmlinkage void noinstr do_ri(struct pt_regs *regs) die_if_kernel("Reserved instruction in kernel code", regs); - if (unlikely(compute_return_era(regs) < 0)) - goto out; + compute_return_era(regs); if (unlikely(get_user(opcode, era) < 0)) { status = SIGSEGV; diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 9d508158fe1a609eb2e75023fb84b2b2dce79dae..69c76f26c1c57e03ba4ff6389111abd8cbedf03a 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -37,6 +37,7 @@ SECTIONS HEAD_TEXT_SECTION . = ALIGN(PECOFF_SEGMENT_ALIGN); + _stext = .; .text : { TEXT_TEXT SCHED_TEXT @@ -101,6 +102,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + ELF_DETAILS .gptab.sdata : { *(.gptab.data) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index e272f8ac57d16677f9d807418c7d4987b579c802..9818ce11546bcb502415dbe7e000d1798ae4aac4 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -281,15 +281,16 @@ void setup_tlb_handler(int cpu) if (pcpu_handlers[cpu]) return; - page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz)); + page = alloc_pages_node(cpu_to_node(cpu), GFP_ATOMIC, get_order(vec_sz)); if (!page) return; addr = page_address(page); - pcpu_handlers[cpu] = virt_to_phys(addr); + pcpu_handlers[cpu] = (unsigned long)addr; memcpy((void *)addr, (void *)eentry, vec_sz); local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz); - csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); } #endif diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 6b6e16732c603fc09a4e61232b4bc14f207275ab..92e40403225783256ae816584c5abba19c32bcb3 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif cflags-vdso := $(ccflags-vdso) \ + -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index b0a034b468bbba485a4dd83631bc2a57849be79a..42e69664efd93147b068adf9c8711757db54bedb 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -111,8 +111,9 @@ clocks = <&cgu X1000_CLK_RTCLK>, <&cgu X1000_CLK_EXCLK>, - <&cgu X1000_CLK_PCLK>; - clock-names = "rtc", "ext", "pclk"; + <&cgu X1000_CLK_PCLK>, + <&cgu X1000_CLK_TCU>; + clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi index dbf21afaccb1a5df50ce2dcad8bf9a52c00e978a..65a5da71c1994726876d5106f865054bbe462dda 100644 --- a/arch/mips/boot/dts/ingenic/x1830.dtsi +++ b/arch/mips/boot/dts/ingenic/x1830.dtsi @@ -104,8 +104,9 @@ clocks = <&cgu X1830_CLK_RTCLK>, <&cgu X1830_CLK_EXCLK>, - <&cgu X1830_CLK_PCLK>; - clock-names = "rtc", "ext", "pclk"; + <&cgu X1830_CLK_PCLK>, + <&cgu X1830_CLK_TCU>; + clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/mips/generic/board-ranchu.c b/arch/mips/generic/board-ranchu.c index a89aaad59cb18615cbb0e86bcf45a61d03f6b665..930c450418822aab80cbfebb17041b5261b09190 100644 --- a/arch/mips/generic/board-ranchu.c +++ b/arch/mips/generic/board-ranchu.c @@ -44,6 +44,7 @@ static __init unsigned int ranchu_measure_hpt_freq(void) __func__); rtc_base = of_iomap(np, 0); + of_node_put(np); if (!rtc_base) panic("%s(): Failed to ioremap Goldfish RTC base!", __func__); diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 5204fc6d6d502faf97c230b429d62dcd8d61592a..1187729d8cbb1b188e98a141f46fee54d20b35c8 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -208,6 +208,12 @@ void __init ltq_soc_init(void) of_address_to_resource(np_sysgpe, 0, &res_sys[2])) panic("Failed to get core resources"); + of_node_put(np_status); + of_node_put(np_ebu); + of_node_put(np_sys1); + of_node_put(np_syseth); + of_node_put(np_sysgpe); + if ((request_mem_region(res_status.start, resource_size(&res_status), res_status.name) < 0) || (request_mem_region(res_ebu.start, resource_size(&res_ebu), diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index b732495f138af0c2a58bf2fde1a413ae6422b563..20622bf0a9b3dea7fc9a85fc36abb38171e822fb 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -408,6 +408,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) if (!ltq_eiu_membase) panic("Failed to remap eiu memory"); } + of_node_put(eiu_node); return 0; } diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 084f6caba5f23754bf5f2b892a87c17d325ae720..d444a1b98a724d07db257446daaf24a57ffe748c 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -441,6 +441,10 @@ void __init ltq_soc_init(void) of_address_to_resource(np_ebu, 0, &res_ebu)) panic("Failed to get core resources"); + of_node_put(np_pmu); + of_node_put(np_cgu); + of_node_put(np_ebu); + if (!request_mem_region(res_pmu.start, resource_size(&res_pmu), res_pmu.name) || !request_mem_region(res_cgu.start, resource_size(&res_cgu), diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index bbf1e38e14319742d2a85ebbc2926eb44f5b44e1..2cb708cdf01a0cc02c9f062825a4a102f1a9d5f5 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -214,6 +214,8 @@ static void update_gic_frequency_dt(void) if (of_update_property(node, &gic_frequency_prop) < 0) pr_err("error updating gic frequency property\n"); + + of_node_put(node); } #endif diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 129915616763ac18e8c7e9e5c7be81061fe71439..d9c8c4e46aff99e1b86ba734a9a57f3522bec43d 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -98,13 +98,18 @@ static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup) np = of_find_compatible_node(NULL, NULL, lookup->compatible); if (np) { lookup->name = (char *)np->name; - if (lookup->phys_addr) + if (lookup->phys_addr) { + of_node_put(np); continue; + } if (!of_address_to_resource(np, 0, &res)) lookup->phys_addr = res.start; + of_node_put(np); } } + of_node_put(root); + return 0; } diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c index 7174e9abbb1ba891c51cea9f7858175eb3ecbd00..777b515c52c8da635f18eb0601d530b1af6d3cbf 100644 --- a/arch/mips/pic32/pic32mzda/time.c +++ b/arch/mips/pic32/pic32mzda/time.c @@ -32,6 +32,9 @@ static unsigned int pic32_xlate_core_timer_irq(void) goto default_map; irq = irq_of_parse_and_map(node, 0); + + of_node_put(node); + if (!irq) goto default_map; diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 587c7b99876976d43c4996db81b431af1bc0d89f..ea8072acf8d94ebc66b121285091342adbeb21b0 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -40,6 +40,8 @@ __iomem void *plat_of_remap_node(const char *node) if (of_address_to_resource(np, 0, &res)) panic("Failed to get resource for %s", node); + of_node_put(np); + if (!request_mem_region(res.start, resource_size(&res), res.name)) diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7b7f25b4b057e47b9f91e259268714c9d7864608..9240bcdbe74e4b78bc2e1b2aa7eabc6bf3d1ecea 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); - return -1; } diff --git a/arch/openrisc/kernel/unwinder.c b/arch/openrisc/kernel/unwinder.c index 8ae15c2c18459eb08541998a07eaaabf4df7ea81..c6ad6f867a6a8f3e7b792180e67a8a030bebd758 100644 --- a/arch/openrisc/kernel/unwinder.c +++ b/arch/openrisc/kernel/unwinder.c @@ -25,7 +25,7 @@ struct or1k_frameinfo { /* * Verify a frameinfo structure. The return address should be a valid text * address. The frame pointer may be null if its the last frame, otherwise - * the frame pointer should point to a location in the stack after the the + * the frame pointer should point to a location in the stack after the * top of the next frame up. */ static inline int or1k_frameinfo_valid(struct or1k_frameinfo *frameinfo) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5f2448dc5a2b2ee77e08f7b3b470273eff388cf4..fa400055b2d5080f41090bfcbbf5fe35567eb06b 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,6 +10,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h index d63a2acb91f2b6ea805646b27afa4bc584457525..55d29c4f716e69e2ecf925f3551b4e76794dba5f 100644 --- a/arch/parisc/include/asm/fb.h +++ b/arch/parisc/include/asm/fb.h @@ -12,7 +12,7 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; } -#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI) +#if defined(CONFIG_FB_STI) int fb_is_primary_device(struct fb_info *info); #else static inline int fb_is_primary_device(struct fb_info *info) diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 2673d57eeb0083e990e21e9ec18f0f1bfe092b2c..94652e13c2603ee42fb6dbc1f451ba9738577b7e 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -224,8 +224,13 @@ int main(void) BLANK(); DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE); DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE); +#ifdef CONFIG_64BIT DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32); DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32); +#else + DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE); + DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE); +#endif BLANK(); DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base)); DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride)); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c8a11fcecf4c67b8df5ffadfb403e5d71e1c0d9f..a9bc578e4c52e57caa1f3d297cee5417f18722d1 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -722,7 +722,10 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon return; if (parisc_requires_coherency()) { - flush_user_cache_page(vma, vmaddr); + if (vma->vm_flags & VM_SHARED) + flush_data_cache(); + else + flush_user_cache_page(vma, vmaddr); return; } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index ed1e88a74dc422cf0069820055507c69d23cf88a..bac581b5ecfc5f85e3cb9a3c63efaa020690756b 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -146,7 +146,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " depw %%r0,31,2,%4\n" "1: ldw 0(%%sr1,%4),%0\n" "2: ldw 4(%%sr1,%4),%3\n" -" subi 32,%4,%2\n" +" subi 32,%2,%2\n" " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" diff --git a/arch/parisc/math-emu/decode_exc.c b/arch/parisc/math-emu/decode_exc.c index 494ca41df05d00df6b950ce7f0c34ca6bf4ed7c7..d41ddb3430b5b57dab69da8933871993771dc233 100644 --- a/arch/parisc/math-emu/decode_exc.c +++ b/arch/parisc/math-emu/decode_exc.c @@ -102,7 +102,7 @@ decode_fpu(unsigned int Fpu_register[], unsigned int trap_counts[]) * that happen. Want to keep this overhead low, but still provide * some information to the customer. All exits from this routine * need to restore Fpu_register[0] - */ + */ bflags=(Fpu_register[0] & 0xf8000000); Fpu_register[0] &= 0x07ffffff; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c22f5815594847adab8d2ecb24126c79d5197036..fcbb81feb7ad82588d6c57d811bfd5cfc2d6831d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -38,7 +38,7 @@ config RISCV select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU - select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU @@ -364,8 +364,13 @@ config RISCV_ISA_SVPBMT select RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the SVPBMT extension - (Supervisor-mode: page-based memory types) and enable its usage. + Adds support to dynamically detect the presence of the SVPBMT + ISA-extension (Supervisor-mode: page-based memory types) and + enable its usage. + + The memory type for a page contains a combination of attributes + that indicate the cacheability, idempotency, and ordering + properties for access to that page. The SVPBMT extension is only available on 64Bit cpus. diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index ebfcd5cc6eaf2fc720d603a366f74657c7cab892..457ac72c9b36d6296064201a4bfd5318095c644a 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -35,6 +35,7 @@ config ERRATA_SIFIVE_CIP_1200 config ERRATA_THEAD bool "T-HEAD errata" + depends on !XIP_KERNEL select RISCV_ALTERNATIVE help All T-HEAD errata Kconfig depend on this Kconfig. Disabling diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 8c32591341942fba260c42b1090cbda9ef7f0deb..496d3b7642bd13abe4b5b4591fb92aea3bc50999 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -50,6 +50,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu1_intc: interrupt-controller { @@ -77,6 +78,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu2_intc: interrupt-controller { @@ -104,6 +106,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu3_intc: interrupt-controller { @@ -131,6 +134,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu4_intc: interrupt-controller { #interrupt-cells = <1>; @@ -192,6 +196,15 @@ riscv,ndev = <186>; }; + pdma: dma-controller@3000000 { + compatible = "sifive,fu540-c000-pdma", "sifive,pdma0"; + reg = <0x0 0x3000000 0x0 0x8000>; + interrupt-parent = <&plic>; + interrupts = <5 6>, <7 8>, <9 10>, <11 12>; + dma-channels = <4>; + #dma-cells = <1>; + }; + clkcfg: clkcfg@20002000 { compatible = "microchip,mpfs-clkcfg"; reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>; diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 672f02b21ce02de7a758a11b68955ed1c036f067..1031038423e748a79ee4919bfad45cb3592172aa 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -111,6 +111,7 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin, cpu_apply_errata |= tmp; } } - if (cpu_apply_errata != cpu_req_errata) + if (stage != RISCV_ALTERNATIVES_MODULE && + cpu_apply_errata != cpu_req_errata) warn_miss_errata(cpu_req_errata - cpu_apply_errata); } diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 9e2888dbb5b1316b8a66990067b9b9cd50c6246b..416ead0f9a655909b243ee52fc197d117f712bbf 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -75,20 +75,20 @@ asm volatile(ALTERNATIVE( \ "nop\n\t" \ "nop\n\t" \ "nop", \ - "li t3, %2\n\t" \ - "slli t3, t3, %4\n\t" \ + "li t3, %1\n\t" \ + "slli t3, t3, %3\n\t" \ "and t3, %0, t3\n\t" \ "bne t3, zero, 2f\n\t" \ - "li t3, %3\n\t" \ - "slli t3, t3, %4\n\t" \ + "li t3, %2\n\t" \ + "slli t3, t3, %3\n\t" \ "or %0, %0, t3\n\t" \ "2:", THEAD_VENDOR_ID, \ ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ : "+r"(_val) \ - : "0"(_val), \ - "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT)) + "I"(ALT_THEAD_PBMT_SHIFT) \ + : "t3") #else #define ALT_THEAD_PMA(_val) #endif diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 5c2aba5efbd0ecf4ff302af06f016e3fa55b36bf..dc42375c2357136356df1ca7f42480eeea98a3dc 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -175,7 +175,7 @@ static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) static inline unsigned long _pud_pfn(pud_t pud) { - return pud_val(pud) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pud_val(pud)); } static inline pmd_t *pud_pgtable(pud_t pud) @@ -278,13 +278,13 @@ static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) static inline unsigned long _p4d_pfn(p4d_t p4d) { - return p4d_val(p4d) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(p4d_val(p4d)); } static inline pud_t *p4d_pgtable(p4d_t p4d) { if (pgtable_l4_enabled) - return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d))); return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } @@ -292,7 +292,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline struct page *p4d_page(p4d_t p4d) { - return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(p4d_val(p4d))); } #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) @@ -347,7 +347,7 @@ static inline void pgd_clear(pgd_t *pgd) static inline p4d_t *pgd_pgtable(pgd_t pgd) { if (pgtable_l5_enabled) - return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd))); return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); } @@ -355,7 +355,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd) static inline struct page *pgd_page(pgd_t pgd) { - return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(pgd_val(pgd))); } #define pgd_page(pgd) pgd_page(pgd) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 1d1be9d9419c1287a7b4ede8670e488408dae2b1..5dbd6610729bbf00ecbceee32d0dc9b55e7f107c 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -261,7 +261,7 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) static inline unsigned long _pgd_pfn(pgd_t pgd) { - return pgd_val(pgd) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pgd_val(pgd)); } static inline struct page *pmd_page(pmd_t pmd) @@ -590,14 +590,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE)); } -#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT) static inline unsigned long pmd_pfn(pmd_t pmd) { return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT); } -#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT) static inline unsigned long pud_pfn(pud_t pud) { diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index a6f62a6d1edd92ca1cade5fd802daf718b08f35c..12b05ce164bbe3afb4c370cd43a2094c268c84bf 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -293,7 +293,6 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, unsigned int stage) { u32 cpu_req_feature = cpufeature_probe(stage); - u32 cpu_apply_feature = 0; struct alt_entry *alt; u32 tmp; @@ -307,10 +306,8 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, } tmp = (1U << alt->errata_id); - if (cpu_req_feature & tmp) { + if (cpu_req_feature & tmp) patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); - cpu_apply_feature |= tmp; - } } } #endif diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 1c00695ebee7ee1d41d36e6cff40322efa3a3f99..9826073fbc67d3c4a03390db49f7edf3eb25a542 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -54,7 +54,7 @@ static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) static inline unsigned long gstage_pte_page_vaddr(pte_t pte) { - return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); } static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 7f4ad5e4373a9d72a9f1cd6acb962a4a929c9f9c..f3455dc013fa0cd8dfefd7a7116477c510985cf7 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -781,9 +781,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, (!vcpu->arch.power_off) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); + kvm_vcpu_srcu_read_lock(vcpu); if (vcpu->arch.power_off || vcpu->arch.pause) { /* diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 9f764df125db9f1190249f6f3c69fac6d41e468e..6cd93995fb65e1931a5739dc19e5d58429e3fa1b 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -97,7 +97,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu) * We ran out of VMIDs so we increment vmid_version and * start assigning VMIDs from 1. * - * This also means existing VMIDs assignement to all Guest + * This also means existing VMIDs assignment to all Guest * instances is invalid and we have force VMID re-assignement * for all Guest instances. The Guest instances that were not * running will automatically pick-up new VMIDs because will diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 91c0b80a8bf04a6f3b1e0e3638291ef249a33a5a..8cd9e56c629ba8c374ab9cd0c0117663809cbd27 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -484,7 +484,6 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE - select BUILD_BIN2C depends on CRYPTO depends on CRYPTO_SHA256 depends on CRYPTO_SHA256_S390 diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 495c68a4df1e9d25b98ae194d3c1095d3d29c65e..4cb5d17e7ead620ee1a84db0347cf68f18a009c9 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -82,7 +82,7 @@ endif ifdef CONFIG_EXPOLINE ifdef CONFIG_EXPOLINE_EXTERN - KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o + KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern else @@ -163,6 +163,12 @@ vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) + +ifdef CONFIG_EXPOLINE_EXTERN +modules_prepare: expoline_prepare +expoline_prepare: + $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o +endif endif # Don't use tabs in echo arguments diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 56007c763902ae05a47a86976c2af00283ec90f8..1f2d40993c4d2defdc1f8ebc813bd11b93dc135d 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -4,232 +4,15 @@ * * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger - * - * The s390_arch_random_generate() function may be called from random.c - * in interrupt context. So this implementation does the best to be very - * fast. There is a buffer of random data which is asynchronously checked - * and filled by a workqueue thread. - * If there are enough bytes in the buffer the s390_arch_random_generate() - * just delivers these bytes. Otherwise false is returned until the - * worker thread refills the buffer. - * The worker fills the rng buffer by pulling fresh entropy from the - * high quality (but slow) true hardware random generator. This entropy - * is then spread over the buffer with an pseudo random generator PRNG. - * As the arch_get_random_seed_long() fetches 8 bytes and the calling - * function add_interrupt_randomness() counts this as 1 bit entropy the - * distribution needs to make sure there is in fact 1 bit entropy contained - * in 8 bytes of the buffer. The current values pull 32 byte entropy - * and scatter this into a 2048 byte buffer. So 8 byte in the buffer - * will contain 1 bit of entropy. - * The worker thread is rescheduled based on the charge level of the - * buffer but at least with 500 ms delay to avoid too much CPU consumption. - * So the max. amount of rng data delivered via arch_get_random_seed is - * limited to 4k bytes per second. */ #include #include #include -#include #include -#include -#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); - -#define ARCH_REFILL_TICKS (HZ/2) -#define ARCH_PRNG_SEED_SIZE 32 -#define ARCH_RNG_BUF_SIZE 2048 - -static DEFINE_SPINLOCK(arch_rng_lock); -static u8 *arch_rng_buf; -static unsigned int arch_rng_buf_idx; - -static void arch_rng_refill_buffer(struct work_struct *); -static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); - -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - /* max hunk is ARCH_RNG_BUF_SIZE */ - if (nbytes > ARCH_RNG_BUF_SIZE) - return false; - - /* lock rng buffer */ - if (!spin_trylock(&arch_rng_lock)) - return false; - - /* try to resolve the requested amount of bytes from the buffer */ - arch_rng_buf_idx -= nbytes; - if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { - memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); - spin_unlock(&arch_rng_lock); - return true; - } - - /* not enough bytes in rng buffer, refill is done asynchronously */ - spin_unlock(&arch_rng_lock); - - return false; -} -EXPORT_SYMBOL(s390_arch_random_generate); - -static void arch_rng_refill_buffer(struct work_struct *unused) -{ - unsigned int delay = ARCH_REFILL_TICKS; - - spin_lock(&arch_rng_lock); - if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { - /* buffer is exhausted and needs refill */ - u8 seed[ARCH_PRNG_SEED_SIZE]; - u8 prng_wa[240]; - /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ - memset(prng_wa, 0, sizeof(prng_wa)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_wa, NULL, 0, seed, sizeof(seed)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, - &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); - arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; - } - delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; - spin_unlock(&arch_rng_lock); - - /* kick next check */ - queue_delayed_work(system_long_wq, &arch_rng_work, delay); -} - -/* - * Here follows the implementation of s390_arch_get_random_long(). - * - * The random longs to be pulled by arch_get_random_long() are - * prepared in an 4K buffer which is filled from the NIST 800-90 - * compliant s390 drbg. By default the random long buffer is refilled - * 256 times before the drbg itself needs a reseed. The reseed of the - * drbg is done with 32 bytes fetched from the high quality (but slow) - * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256 - * bits of entropy are spread over 256 * 4KB = 1MB serving 131072 - * arch_get_random_long() invocations before reseeded. - * - * How often the 4K random long buffer is refilled with the drbg - * before the drbg is reseeded can be adjusted. There is a module - * parameter 's390_arch_rnd_long_drbg_reseed' accessible via - * /sys/module/arch_random/parameters/rndlong_drbg_reseed - * or as kernel command line parameter - * arch_random.rndlong_drbg_reseed= - * This parameter tells how often the drbg fills the 4K buffer before - * it is re-seeded by fresh entropy from the trng. - * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64 - * KB with 32 bytes of fresh entropy pulled from the trng. So a value - * of 16 would result in 256 bits entropy per 64 KB. - * A value of 256 results in 1MB of drbg output before a reseed of the - * drbg is done. So this would spread the 256 bits of entropy among 1MB. - * Setting this parameter to 0 forces the reseed to take place every - * time the 4K buffer is depleted, so the entropy rises to 256 bits - * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With - * setting this parameter to negative values all this effort is - * disabled, arch_get_random long() returns false and thus indicating - * that the arch_get_random_long() feature is disabled at all. - */ - -static unsigned long rndlong_buf[512]; -static DEFINE_SPINLOCK(rndlong_lock); -static int rndlong_buf_index; - -static int rndlong_drbg_reseed = 256; -module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600); -MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed"); - -static inline void refill_rndlong_buf(void) -{ - static u8 prng_ws[240]; - static int drbg_counter; - - if (--drbg_counter < 0) { - /* need to re-seed the drbg */ - u8 seed[32]; - - /* fetch seed from trng */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* seed drbg */ - memset(prng_ws, 0, sizeof(prng_ws)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_ws, NULL, 0, seed, sizeof(seed)); - /* re-init counter for drbg */ - drbg_counter = rndlong_drbg_reseed; - } - - /* fill the arch_get_random_long buffer from drbg */ - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws, - (u8 *) rndlong_buf, sizeof(rndlong_buf), - NULL, 0); -} - -bool s390_arch_get_random_long(unsigned long *v) -{ - bool rc = false; - unsigned long flags; - - /* arch_get_random_long() disabled ? */ - if (rndlong_drbg_reseed < 0) - return false; - - /* try to lock the random long lock */ - if (!spin_trylock_irqsave(&rndlong_lock, flags)) - return false; - - if (--rndlong_buf_index >= 0) { - /* deliver next long value from the buffer */ - *v = rndlong_buf[rndlong_buf_index]; - rc = true; - goto out; - } - - /* buffer is depleted and needs refill */ - if (in_interrupt()) { - /* delay refill in interrupt context to next caller */ - rndlong_buf_index = 0; - goto out; - } - - /* refill random long buffer */ - refill_rndlong_buf(); - rndlong_buf_index = ARRAY_SIZE(rndlong_buf); - - /* and provide one random long */ - *v = rndlong_buf[--rndlong_buf_index]; - rc = true; - -out: - spin_unlock_irqrestore(&rndlong_lock, flags); - return rc; -} -EXPORT_SYMBOL(s390_arch_get_random_long); - -static int __init s390_arch_random_init(void) -{ - /* all the needed PRNO subfunctions available ? */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && - cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { - - /* alloc arch random working buffer */ - arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); - if (!arch_rng_buf) - return -ENOMEM; - - /* kick worker queue job to fill the random buffer */ - queue_delayed_work(system_long_wq, - &arch_rng_work, ARCH_REFILL_TICKS); - - /* enable arch random to the outside world */ - static_branch_enable(&s390_arch_random_available); - } - - return 0; -} -arch_initcall(s390_arch_random_init); diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 5dc712fde3c7f078ca4d26910cd9ff3b130964d7..2c6e1c6ecbe780284c2374d7ee0d4394b1a66fda 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -15,17 +15,13 @@ #include #include +#include DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -bool s390_arch_get_random_long(unsigned long *v); -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); - static inline bool __must_check arch_get_random_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) - return s390_arch_get_random_long(v); return false; } @@ -37,7 +33,9 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } @@ -45,7 +43,9 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index d910d71b5bb50e72df5def006d748ee2b23614cb..7e9e99523e952517a55859651fd36ea3ccb12f5d 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -2,8 +2,6 @@ #ifndef _ASM_S390_NOSPEC_ASM_H #define _ASM_S390_NOSPEC_ASM_H -#include -#include #include #ifdef __ASSEMBLY__ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 54ae2dc65e3b20905e901f398e8fe0eb3ab6b9a5..2f983e0b95e09d98be327b75dd79e435fab8fff8 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -133,9 +133,9 @@ struct slibe { * @sb_count: number of storage blocks * @sba: storage block element addresses * @dcount: size of storage block elements - * @user0: user defineable value - * @res4: reserved paramater - * @user1: user defineable value + * @user0: user definable value + * @res4: reserved parameter + * @user1: user definable value */ struct qaob { u64 res0[6]; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a2c1c55daec03c8faee196c7edf5ed6b865b3a96..28124d0fa1d5e8839dc9a776f6739ca6b81950fb 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -219,6 +219,11 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, unsigned long src; int rc; + if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter))) + return -EINVAL; + /* Multi-segment iterators are not supported */ + if (iter->nr_segs > 1) + return -EINVAL; if (!csize) return 0; src = pfn_to_phys(pfn) + offset; @@ -228,7 +233,10 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, rc = copy_oldmem_user(iter->iov->iov_base, src, csize); else rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize); - return rc; + if (rc < 0) + return rc; + iov_iter_advance(iter, csize); + return csize; } /* diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 483ab5e10164d859d07112c578f52c1c85889cfb..f7dd3c849e68ca6e2dcc396113d788b0d4714f85 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -516,6 +516,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) return err; } +/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; +} + static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; @@ -525,7 +545,7 @@ static int cpumf_pmu_event_init(struct perf_event *event) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ - err = __hw_perf_event_init(event, PERF_TYPE_RAW); + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); else return -ENOENT; diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 8c1545946d85e79854d2d5249bac96521c61f6cc..b38b4ae01589b20d58fa91d78b05844977f2f5e1 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -193,8 +193,9 @@ static int paicrypt_event_init(struct perf_event *event) /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) return -ENOENT; - /* PAI crypto event must be valid */ - if (a->config > PAI_CRYPTO_BASE + paicrypt_cnt) + /* PAI crypto event must be in valid range */ + if (a->config < PAI_CRYPTO_BASE || + a->config > PAI_CRYPTO_BASE + paicrypt_cnt) return -EINVAL; /* Allow only CPU wide operation, no process context for now. */ if (event->hw.target || event->cpu == -1) @@ -208,6 +209,12 @@ static int paicrypt_event_init(struct perf_event *event) if (rc) return rc; + /* Event initialization sets last_tag to 0. When later on the events + * are deleted and re-added, do not reset the event count value to zero. + * Events are added, deleted and re-added when 2 or more events + * are active at the same time. + */ + event->hw.last_tag = 0; cpump->event = event; event->destroy = paicrypt_event_destroy; @@ -242,9 +249,12 @@ static void paicrypt_start(struct perf_event *event, int flags) { u64 sum; - sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); - local64_set(&event->count, 0); + if (!event->hw.last_tag) { + event->hw.last_tag = 1; + sum = paicrypt_getall(event); /* Get current value */ + local64_set(&event->count, 0); + local64_set(&event->hw.prev_count, sum); + } } static int paicrypt_add(struct perf_event *event, int flags) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8d91eccc096325b47cf21f593da2826ccc420863..0a37f5de286316095598b60242318cf184fa3823 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -875,6 +875,11 @@ static void __init setup_randomness(void) if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free(vmms, PAGE_SIZE); + +#ifdef CONFIG_ARCH_RANDOM + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); +#endif } /* diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 5d415b3db6d14c626988667d1894d68805213cd1..580d2e3265cb2ce04523064b2c14c76b78ae42db 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,7 +7,6 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o -obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o @@ -22,3 +21,5 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/ diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..854631d9cb03a8e2158285c2c746382bf12b8518 --- /dev/null +++ b/arch/s390/lib/expoline/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += expoline.o diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline/expoline.S similarity index 100% rename from arch/s390/lib/expoline.S rename to arch/s390/lib/expoline/expoline.S diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 360ada80d20c3b72766f26fa1cbf138138d2dc2f..d237bc6841cb8608c2bb3118d549d8f9bf9c7c5b 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -48,7 +48,6 @@ OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*' $(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE $(call if_changed,objcopy) -$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE - $(call if_changed_rule,as_o_S) +$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro -obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o +obj-y += kexec-purgatory.o diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index cf9a3ec32406f856181af0eff2908fa4e00a0b96..fba90e670ed41d4821a45edeac6649936d85419e 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, #endif /* CONFIG_HAVE_IOREMAP_PROT */ #else /* CONFIG_MMU */ -#define iounmap(addr) do { } while (0) -#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +static inline void __iomem *ioremap(phys_addr_t offset, size_t size) +{ + return (void __iomem *)(unsigned long)offset; +} + +static inline void iounmap(volatile void __iomem *addr) { } #endif /* CONFIG_MMU */ #define ioremap_uc ioremap diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 95af12e82a328448ce122a4eb81996d26bd1cb02..cdbd9653aa14e5b29ffba30d98a2741de6bc31aa 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -102,8 +102,8 @@ extern unsigned long uml_physmem; * casting is the right thing, but 32-bit UML can't have 64-bit virtual * addresses */ -#define __pa(virt) to_phys((void *) (unsigned long) (virt)) -#define __va(phys) to_virt((unsigned long) (phys)) +#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt)) +#define __va(phys) uml_to_virt((unsigned long) (phys)) #define phys_to_pfn(p) ((p) >> PAGE_SHIFT) #define pfn_to_phys(pfn) PFN_PHYS(pfn) diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h index 4862c91d4213c4f6d1c1e9430ca902e080aae0a2..98aacd54410801551d3b887e1181a9e7b0f7429f 100644 --- a/arch/um/include/shared/mem.h +++ b/arch/um/include/shared/mem.h @@ -9,12 +9,12 @@ extern int phys_mapping(unsigned long phys, unsigned long long *offset_out); extern unsigned long uml_physmem; -static inline unsigned long to_phys(void *virt) +static inline unsigned long uml_to_phys(void *virt) { return(((unsigned long) virt) - uml_physmem); } -static inline void *to_virt(unsigned long phys) +static inline void *uml_to_virt(unsigned long phys) { return((void *) uml_physmem + phys); } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 0760e24f2ebabf38465c70275d6f2cda1cd35332..9838967d0b2f1032d8dd00f69394b21b141eef8c 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -432,6 +432,10 @@ void apply_retpolines(s32 *start, s32 *end) { } +void apply_returns(s32 *start, s32 *end) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 87d3129e7362ef25659c3a3245dfcbc8e998fc85..c316c993a94918d1b5d2b73a49be2501cc8a609f 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -251,7 +251,7 @@ static int userspace_tramp(void *stack) signal(SIGTERM, SIG_DFL); signal(SIGWINCH, SIG_IGN); - fd = phys_mapping(to_phys(__syscall_stub_start), &offset); + fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); if (addr == MAP_FAILED) { @@ -261,7 +261,7 @@ static int userspace_tramp(void *stack) } if (stack != NULL) { - fd = phys_mapping(to_phys(stack), &offset); + fd = phys_mapping(uml_to_phys(stack), &offset); addr = mmap((void *) STUB_DATA, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, offset); @@ -534,7 +534,7 @@ int copy_context_skas0(unsigned long new_stack, int pid) struct stub_data *data = (struct stub_data *) current_stack; struct stub_data *child_data = (struct stub_data *) new_stack; unsigned long long new_offset; - int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset); /* * prepare offset and fd of child's stack as argument for parent's diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be0b95e51df663f94a5c7e4c3fc3965179d26bdc..e58798f636d474d277af5eb59dc9b6a42b2c31d0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -462,29 +462,6 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH -config RETPOLINE - bool "Avoid speculative indirect branches in kernel" - select OBJTOOL if HAVE_OBJTOOL - default y - help - Compile kernel with the retpoline compiler options to guard against - kernel-to-user data leaks by avoiding speculative indirect - branches. Requires a compiler with -mindirect-branch=thunk-extern - support for full protection. The kernel may run slower. - -config CC_HAS_SLS - def_bool $(cc-option,-mharden-sls=all) - -config SLS - bool "Mitigate Straight-Line-Speculation" - depends on CC_HAS_SLS && X86_64 - select OBJTOOL if HAVE_OBJTOOL - default n - help - Compile the kernel with straight-line-speculation options to guard - against straight line speculation. The kernel image might be slightly - larger. - config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) @@ -2453,6 +2430,91 @@ source "kernel/livepatch/Kconfig" endmenu +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + +menuconfig SPECULATION_MITIGATIONS + bool "Mitigations for speculative execution vulnerabilities" + default y + help + Say Y here to enable options which enable mitigations for + speculative execution hardware vulnerabilities. + + If you say N, all mitigations will be disabled. You really + should know what you are doing to say so. + +if SPECULATION_MITIGATIONS + +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on (X86_64 || X86_PAE) + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pti.rst for more details. + +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + select OBJTOOL if HAVE_OBJTOOL + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK + select OBJTOOL if HAVE_OBJTOOL + default y + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. + Requires a compiler with -mfunction-return=thunk-extern + support for full protection. The kernel may run slower. + +config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" + depends on CPU_SUP_AMD && RETHUNK + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + +config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" + depends on CPU_SUP_AMD + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + +config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" + depends on CPU_SUP_INTEL + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. + This mitigates both spectre_v2 and retbleed at great cost to + performance. + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + select OBJTOOL if HAVE_OBJTOOL + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + +endif + config ARCH_HAS_ADD_PAGES def_bool y depends on ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a74886aed3495f29b356c3a0c63b3f6c1f8fa8f1..1f40dad30d5084f50aaec155db49df961681cdd8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -21,6 +21,12 @@ ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline endif + +ifdef CONFIG_RETHUNK +RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +endif + export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 44c350d627c79b4b13e29bfe74cbec1e5c69879f..d4a314cc50d6ee6ecaa7268f59bbb946c71fb916 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -110,6 +110,7 @@ void kernel_add_identity_map(unsigned long start, unsigned long end) void initialize_identity_maps(void *rmode) { unsigned long cmdline; + struct setup_data *sd; /* Exclude the encryption mask from __PHYSICAL_MASK */ physical_mask &= ~sme_me_mask; @@ -163,6 +164,18 @@ void initialize_identity_maps(void *rmode) cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); + /* + * Also map the setup_data entries passed via boot_params in case they + * need to be accessed by uncompressed kernel via the identity mapping. + */ + sd = (struct setup_data *)boot_params->hdr.setup_data; + while (sd) { + unsigned long sd_addr = (unsigned long)sd; + + kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len); + sd = (struct setup_data *)sd->next; + } + sev_prep_identity_maps(top_level_pgt); /* Load the new page-table. */ diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 03deb4d6920d552b79f4654da5fe531baa3157d1..928dcf7a20d987a0e41ba400f584c0efaa5c26cd 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -124,6 +124,51 @@ static u64 get_cc_mask(void) return BIT_ULL(gpa_width - 1); } +/* + * The TDX module spec states that #VE may be injected for a limited set of + * reasons: + * + * - Emulation of the architectural #VE injection on EPT violation; + * + * - As a result of guest TD execution of a disallowed instruction, + * a disallowed MSR access, or CPUID virtualization; + * + * - A notification to the guest TD about anomalous behavior; + * + * The last one is opt-in and is not used by the kernel. + * + * The Intel Software Developer's Manual describes cases when instruction + * length field can be used in section "Information for VM Exits Due to + * Instruction Execution". + * + * For TDX, it ultimately means GET_VEINFO provides reliable instruction length + * information if #VE occurred due to instruction execution, but not for EPT + * violations. + */ +static int ve_instr_len(struct ve_info *ve) +{ + switch (ve->exit_reason) { + case EXIT_REASON_HLT: + case EXIT_REASON_MSR_READ: + case EXIT_REASON_MSR_WRITE: + case EXIT_REASON_CPUID: + case EXIT_REASON_IO_INSTRUCTION: + /* It is safe to use ve->instr_len for #VE due instructions */ + return ve->instr_len; + case EXIT_REASON_EPT_VIOLATION: + /* + * For EPT violations, ve->insn_len is not defined. For those, + * the kernel must decode instructions manually and should not + * be using this function. + */ + WARN_ONCE(1, "ve->instr_len is not defined for EPT violations"); + return 0; + default: + WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason); + return ve->instr_len; + } +} + static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) { struct tdx_hypercall_args args = { @@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0); } -static bool handle_halt(void) +static int handle_halt(struct ve_info *ve) { /* * Since non safe halt is mainly used in CPU offlining @@ -158,9 +203,9 @@ static bool handle_halt(void) const bool do_sti = false; if (__halt(irq_disabled, do_sti)) - return false; + return -EIO; - return true; + return ve_instr_len(ve); } void __cpuidle tdx_safe_halt(void) @@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } -static bool read_msr(struct pt_regs *regs) +static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, @@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs) * (GHCI), section titled "TDG.VP.VMCALL". */ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) - return false; + return -EIO; regs->ax = lower_32_bits(args.r11); regs->dx = upper_32_bits(args.r11); - return true; + return ve_instr_len(ve); } -static bool write_msr(struct pt_regs *regs) +static int write_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, @@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs) * can be found in TDX Guest-Host-Communication Interface * (GHCI) section titled "TDG.VP.VMCALL". */ - return !__tdx_hypercall(&args, 0); + if (__tdx_hypercall(&args, 0)) + return -EIO; + + return ve_instr_len(ve); } -static bool handle_cpuid(struct pt_regs *regs) +static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, @@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs) */ if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) { regs->ax = regs->bx = regs->cx = regs->dx = 0; - return true; + return ve_instr_len(ve); } /* @@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs) * (GHCI), section titled "VP.VMCALL". */ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) - return false; + return -EIO; /* * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of @@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs) regs->cx = args.r14; regs->dx = args.r15; - return true; + return ve_instr_len(ve); } static bool mmio_read(int size, unsigned long addr, unsigned long *val) @@ -283,10 +331,10 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val) EPT_WRITE, addr, val); } -static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) +static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { + unsigned long *reg, val, vaddr; char buffer[MAX_INSN_SIZE]; - unsigned long *reg, val; struct insn insn = {}; enum mmio_type mmio; int size, extend_size; @@ -294,34 +342,49 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) /* Only in-kernel MMIO is supported */ if (WARN_ON_ONCE(user_mode(regs))) - return false; + return -EFAULT; if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) - return false; + return -EFAULT; if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) - return false; + return -EINVAL; mmio = insn_decode_mmio(&insn, &size); if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) - return false; + return -EINVAL; if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { reg = insn_get_modrm_reg_ptr(&insn, regs); if (!reg) - return false; + return -EINVAL; } - ve->instr_len = insn.length; + /* + * Reject EPT violation #VEs that split pages. + * + * MMIO accesses are supposed to be naturally aligned and therefore + * never cross page boundaries. Seeing split page accesses indicates + * a bug or a load_unaligned_zeropad() that stepped into an MMIO page. + * + * load_unaligned_zeropad() will recover using exception fixups. + */ + vaddr = (unsigned long)insn_get_addr_ref(&insn, regs); + if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE) + return -EFAULT; /* Handle writes first */ switch (mmio) { case MMIO_WRITE: memcpy(&val, reg, size); - return mmio_write(size, ve->gpa, val); + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn.length; case MMIO_WRITE_IMM: val = insn.immediate.value; - return mmio_write(size, ve->gpa, val); + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn.length; case MMIO_READ: case MMIO_READ_ZERO_EXTEND: case MMIO_READ_SIGN_EXTEND: @@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) * decoded or handled properly. It was likely not using io.h * helpers or accessed MMIO accidentally. */ - return false; + return -EINVAL; default: WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); - return false; + return -EINVAL; } /* Handle reads */ if (!mmio_read(size, ve->gpa, &val)) - return false; + return -EIO; switch (mmio) { case MMIO_READ: @@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) default: /* All other cases has to be covered with the first switch() */ WARN_ON_ONCE(1); - return false; + return -EINVAL; } if (extend_size) memset(reg, extend_val, extend_size); memcpy(reg, &val, size); - return true; + return insn.length; } static bool handle_in(struct pt_regs *regs, int size, int port) @@ -421,13 +484,14 @@ static bool handle_out(struct pt_regs *regs, int size, int port) * * Return True on success or False on failure. */ -static bool handle_io(struct pt_regs *regs, u32 exit_qual) +static int handle_io(struct pt_regs *regs, struct ve_info *ve) { + u32 exit_qual = ve->exit_qual; int size, port; - bool in; + bool in, ret; if (VE_IS_IO_STRING(exit_qual)) - return false; + return -EIO; in = VE_IS_IO_IN(exit_qual); size = VE_GET_IO_SIZE(exit_qual); @@ -435,9 +499,13 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual) if (in) - return handle_in(regs, size, port); + ret = handle_in(regs, size, port); else - return handle_out(regs, size, port); + ret = handle_out(regs, size, port); + if (!ret) + return -EIO; + + return ve_instr_len(ve); } /* @@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual) __init bool tdx_early_handle_ve(struct pt_regs *regs) { struct ve_info ve; + int insn_len; tdx_get_ve_info(&ve); if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION) return false; - return handle_io(regs, ve.exit_qual); + insn_len = handle_io(regs, &ve); + if (insn_len < 0) + return false; + + regs->ip += insn_len; + return true; } void tdx_get_ve_info(struct ve_info *ve) @@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve) ve->instr_info = upper_32_bits(out.r10); } -/* Handle the user initiated #VE */ -static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve) +/* + * Handle the user initiated #VE. + * + * On success, returns the number of bytes RIP should be incremented (>=0) + * or -errno on error. + */ +static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve) { switch (ve->exit_reason) { case EXIT_REASON_CPUID: - return handle_cpuid(regs); + return handle_cpuid(regs, ve); default: pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); - return false; + return -EIO; } } -/* Handle the kernel #VE */ -static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) +/* + * Handle the kernel #VE. + * + * On success, returns the number of bytes RIP should be incremented (>=0) + * or -errno on error. + */ +static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) { switch (ve->exit_reason) { case EXIT_REASON_HLT: - return handle_halt(); + return handle_halt(ve); case EXIT_REASON_MSR_READ: - return read_msr(regs); + return read_msr(regs, ve); case EXIT_REASON_MSR_WRITE: - return write_msr(regs); + return write_msr(regs, ve); case EXIT_REASON_CPUID: - return handle_cpuid(regs); + return handle_cpuid(regs, ve); case EXIT_REASON_EPT_VIOLATION: return handle_mmio(regs, ve); case EXIT_REASON_IO_INSTRUCTION: - return handle_io(regs, ve->exit_qual); + return handle_io(regs, ve); default: pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); - return false; + return -EIO; } } bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve) { - bool ret; + int insn_len; if (user_mode(regs)) - ret = virt_exception_user(regs, ve); + insn_len = virt_exception_user(regs, ve); else - ret = virt_exception_kernel(regs, ve); + insn_len = virt_exception_kernel(regs, ve); + if (insn_len < 0) + return false; /* After successful #VE handling, move the IP */ - if (ret) - regs->ip += ve->instr_len; + regs->ip += insn_len; - return ret; + return true; } static bool tdx_tlb_flush_required(bool private) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7fec5dcf643868aff199f72a839c7f8510309ddb..eeadbd7d92cc55e0b44163278525826a083940e5 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_common.o += -fno-stack-protector -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 29b36e9e4e741e2e36c69219a25934d0bfa2f4b4..f6907627172ba96d1be4976e65b843f294886327 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include /* @@ -282,6 +284,66 @@ For 32-bit we have the following conventions - kernel is built with #endif +/* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + /* * Mitigate Spectre v1 for conditional swapgs code paths. * diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 0000000000000000000000000000000000000000..bfb7bcb362bcfca2f84947e0925766925687d77c --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include +#include +#include + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 887420844066454f696f60dd78aa69a520580699..e309e7156038935ab92cd025616c5c765e3a5214 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm) movl %ebx, PER_CPU_VAR(__stack_chk_guard) #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* Restore flags or the incoming task to restore AC state. */ popfl diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4300ba49b5eeace08b31c83247e0e5a6a37f8d35..9953d966d12443115bc7ecb3d5c083c9b5c8847f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -85,7 +85,7 @@ */ SYM_CODE_START(entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR swapgs @@ -112,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi /* Sign extend the lower 32bit as syscall numbers are treated as int */ movslq %eax, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + call do_syscall_64 /* returns with IRQs disabled */ /* @@ -191,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: + IBRS_EXIT POP_REGS pop_rdi=0 /* @@ -249,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -258,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 @@ -322,13 +326,13 @@ SYM_CODE_END(ret_from_fork) #endif .endm -/* Save all registers in pt_regs */ -SYM_CODE_START_LOCAL(push_and_clear_regs) +SYM_CODE_START_LOCAL(xen_error_entry) UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 + UNTRAIN_RET RET -SYM_CODE_END(push_and_clear_regs) +SYM_CODE_END(xen_error_entry) /** * idtentry_body - Macro to emit code calling the C function @@ -337,9 +341,6 @@ SYM_CODE_END(push_and_clear_regs) */ .macro idtentry_body cfunc has_error_code:req - call push_and_clear_regs - UNWIND_HINT_REGS - /* * Call error_entry() and switch to the task stack if from userspace. * @@ -349,7 +350,7 @@ SYM_CODE_END(push_and_clear_regs) * switch the CR3. So it can skip invoking error_entry(). */ ALTERNATIVE "call error_entry; movq %rax, %rsp", \ - "", X86_FEATURE_XENPV + "call xen_error_entry", X86_FEATURE_XENPV ENCODE_FRAME_POINTER UNWIND_HINT_REGS @@ -612,6 +613,7 @@ __irqentry_text_end: SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -897,6 +899,9 @@ SYM_CODE_END(xen_failsafe_callback) * 1 -> no SWAPGS on exit * * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC @@ -940,7 +945,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - RET + jmp .Lparanoid_gsbase_done .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -959,8 +964,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) xorl %ebx, %ebx swapgs .Lparanoid_kernel_gsbase: - FENCE_SWAPGS_KERNEL_ENTRY +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + RET SYM_CODE_END(paranoid_entry) @@ -982,9 +995,19 @@ SYM_CODE_END(paranoid_entry) * 1 -> no SWAPGS on exit * * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* * The order of operations is important. RESTORE_CR3 requires * kernel GSBASE. @@ -1017,6 +1040,10 @@ SYM_CODE_END(paranoid_exit) */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1028,9 +1055,12 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ call sync_regs RET @@ -1065,6 +1095,7 @@ SYM_CODE_START_LOCAL(error_entry) .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY leaq 8(%rsp), %rax /* return pt_regs pointer */ + ANNOTATE_UNRET_END RET .Lbstep_iret: @@ -1080,6 +1111,8 @@ SYM_CODE_START_LOCAL(error_entry) swapgs FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET /* * Pretend that the exception came from user mode: set up pt_regs @@ -1185,6 +1218,9 @@ SYM_CODE_START(asm_exc_nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + UNTRAIN_RET + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1409,6 +1445,9 @@ end_repeat_nmi: movq $-1, %rsi call exc_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d1052742ad0cd51c3628978ec53d1a50b12445be..682338e7e2a38dbedcdef57ac6cbb6078e8abe75 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include #include #include @@ -14,9 +13,12 @@ #include #include #include +#include #include #include +#include "calling.h" + .section .entry.text, "ax" /* @@ -47,7 +49,7 @@ * 0(%ebp) arg6 */ SYM_CODE_START(entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ swapgs @@ -88,6 +90,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld + IBRS_ENTER + UNTRAIN_RET + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -174,7 +179,7 @@ SYM_CODE_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ swapgs @@ -203,6 +208,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS UNWIND_HINT_REGS + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -217,6 +225,8 @@ sysret32_from_system_call: */ STACKLEAK_ERASE + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -295,7 +305,7 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* * Interrupts are off on entry. @@ -337,6 +347,9 @@ SYM_CODE_START(entry_INT80_compat) cld + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_int80_syscall_32 jmp swapgs_restore_regs_and_return_to_usermode diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c2a8b76ae0bce2d1b77d420fa588b65d6bbdf0a4..76cd790ed0bd565e5ccb16a64d574115f8c97014 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -92,6 +92,7 @@ endif endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 15e35159ebb68d950fb605de0d3868c5c77b46fa..ef2dd182724314f24db5ba592699c304ff231523 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -19,17 +19,20 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_time, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall - RET + ret + int3 .balign 4096, 0xcc diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 8b392b6b7b93422fd9c4a72966a59035e881e4af..3de6d8b533672bea57e9b15468de3ce835e4e76b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -405,6 +406,11 @@ void __init hyperv_init(void) } if (hv_isolation_type_snp()) { + /* Negotiate GHCB Version. */ + if (!hv_ghcb_negotiate_protocol()) + hv_ghcb_terminate(SEV_TERM_SET_GEN, + GHCB_SEV_ES_PROT_UNSUPPORTED); + hv_ghcb_pg = alloc_percpu(union hv_ghcb *); if (!hv_ghcb_pg) goto free_vp_assist_page; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 2b994117581e2f19226bbc481d3d9de64021f5f3..1dbcbd9da74d445e609e2c0bcc689bf922813740 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -53,6 +53,8 @@ union hv_ghcb { } hypercall; } __packed __aligned(HV_HYP_PAGE_SIZE); +static u16 hv_ghcb_version __ro_after_init; + u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { union hv_ghcb *hv_ghcb; @@ -96,12 +98,85 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) return status; } +static inline u64 rd_ghcb_msr(void) +{ + return __rdmsr(MSR_AMD64_SEV_ES_GHCB); +} + +static inline void wr_ghcb_msr(u64 val) +{ + native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val); +} + +static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, + u64 exit_info_1, u64 exit_info_2) +{ + /* Fill in protocol and format specifiers */ + ghcb->protocol_version = hv_ghcb_version; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, exit_code); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + VMGEXIT(); + + if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0)) + return ES_VMM_ERROR; + else + return ES_OK; +} + +void hv_ghcb_terminate(unsigned int set, unsigned int reason) +{ + u64 val = GHCB_MSR_TERM_REQ; + + /* Tell the hypervisor what went wrong. */ + val |= GHCB_SEV_TERM_REASON(set, reason); + + /* Request Guest Termination from Hypvervisor */ + wr_ghcb_msr(val); + VMGEXIT(); + + while (true) + asm volatile("hlt\n" : : : "memory"); +} + +bool hv_ghcb_negotiate_protocol(void) +{ + u64 ghcb_gpa; + u64 val; + + /* Save ghcb page gpa. */ + ghcb_gpa = rd_ghcb_msr(); + + /* Do the GHCB protocol version negotiation */ + wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); + VMGEXIT(); + val = rd_ghcb_msr(); + + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) + return false; + + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) + return false; + + hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), + GHCB_PROTOCOL_MAX); + + /* Write ghcb page back after negotiating protocol. */ + wr_ghcb_msr(ghcb_gpa); + VMGEXIT(); + + return true; +} + void hv_ghcb_msr_write(u64 msr, u64 value) { union hv_ghcb *hv_ghcb; void **ghcb_base; unsigned long flags; - struct es_em_ctxt ctxt; if (!hv_ghcb_pg) return; @@ -120,8 +195,7 @@ void hv_ghcb_msr_write(u64 msr, u64 value) ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value)); ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value)); - if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt, - SVM_EXIT_MSR, 1, 0)) + if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0)) pr_warn("Fail to write msr via ghcb %llx.\n", msr); local_irq_restore(flags); @@ -133,7 +207,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) union hv_ghcb *hv_ghcb; void **ghcb_base; unsigned long flags; - struct es_em_ctxt ctxt; /* Check size of union hv_ghcb here. */ BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE); @@ -152,8 +225,7 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) } ghcb_set_rcx(&hv_ghcb->ghcb, msr); - if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt, - SVM_EXIT_MSR, 0, 0)) + if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0)) pr_warn("Fail to read msr via ghcb %llx.\n", msr); else *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9b10c8c76087aabe701c7908d760710b53e181ed..9542c582d546b2ae270f3568794c57de4478d0b4 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 393f2bbb5e3a8405e22094e45cdef4c6953cab01..00f5227c8459870d142a4dce759a997467448b50 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -296,6 +296,12 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -316,6 +322,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ #define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ @@ -446,5 +453,7 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 36369e76cc631ecbb488c175d3c330cebee88737..33d2cd04d2544791b1363f8d7578a7d45b7fc749 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -82,7 +101,7 @@ #define DISABLED_MASK8 (DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index 5a39ed59b6db7d1effbdc46bd2c0ba1798f709b3..e8f58ddd06d97fbce6ef1f701f9f91766ef2847d 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -4,9 +4,6 @@ #include -struct device; -struct resource; - extern struct e820_table *e820_table; extern struct e820_table *e820_table_kexec; extern struct e820_table *e820_table_firmware; @@ -46,8 +43,6 @@ extern void e820__register_nosave_regions(unsigned long limit_pfn); extern int e820__get_entry_type(u64 start, u64 end); -extern void remove_e820_regions(struct device *dev, struct resource *avail); - /* * Returns true iff the specified range [start,end) is completely contained inside * the ISA region. diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 71943dce691ed72ce4839adf9fb3b0a51cc0525d..9636742a80f299c70a29a6b34551f1aa523d7ee9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -323,7 +323,7 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_get_memory_space_descriptor(phys, desc) \ (__efi64_split(phys), (desc)) -#define __efi64_argmap_set_memory_space_descriptor(phys, size, flags) \ +#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) /* diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a240a64ac68f4274f093b99b54f4a8914f9372d..9217bd6cf0d14888bd2e8e7034858100934feaf1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1047,14 +1047,77 @@ struct kvm_x86_msr_filter { }; enum kvm_apicv_inhibit { + + /********************************************************************/ + /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ + /********************************************************************/ + + /* + * APIC acceleration is disabled by a module parameter + * and/or not supported in hardware. + */ APICV_INHIBIT_REASON_DISABLE, + + /* + * APIC acceleration is inhibited because AutoEOI feature is + * being used by a HyperV guest. + */ APICV_INHIBIT_REASON_HYPERV, + + /* + * APIC acceleration is inhibited because the userspace didn't yet + * enable the kernel/split irqchip. + */ + APICV_INHIBIT_REASON_ABSENT, + + /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ + * (out of band, debug measure of blocking all interrupts on this vCPU) + * was enabled, to avoid AVIC/APICv bypassing it. + */ + APICV_INHIBIT_REASON_BLOCKIRQ, + + /* + * For simplicity, the APIC acceleration is inhibited + * first time either APIC ID or APIC base are changed by the guest + * from their reset values. + */ + APICV_INHIBIT_REASON_APIC_ID_MODIFIED, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, + + /******************************************************/ + /* INHIBITs that are relevant only to the AMD's AVIC. */ + /******************************************************/ + + /* + * AVIC is inhibited on a vCPU because it runs a nested guest. + * + * This is needed because unlike APICv, the peers of this vCPU + * cannot use the doorbell mechanism to signal interrupts via AVIC when + * a vCPU runs nested. + */ APICV_INHIBIT_REASON_NESTED, + + /* + * On SVM, the wait for the IRQ window is implemented with pending vIRQ, + * which cannot be injected when the AVIC is enabled, thus AVIC + * is inhibited while KVM waits for IRQ window. + */ APICV_INHIBIT_REASON_IRQWIN, + + /* + * PIT (i8254) 're-inject' mode, relies on EOI intercept, + * which AVIC doesn't support for edge triggered interrupts. + */ APICV_INHIBIT_REASON_PIT_REINJ, + + /* + * AVIC is inhibited because the guest has x2apic in its CPUID. + */ APICV_INHIBIT_REASON_X2APIC, - APICV_INHIBIT_REASON_BLOCKIRQ, - APICV_INHIBIT_REASON_ABSENT, + + /* + * AVIC is disabled because SEV doesn't support it. + */ APICV_INHIBIT_REASON_SEV, }; diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 85865f1645bd3cced0d77b202a3edc77b616990e..73ca2004983562ca3774d5a6796cf80e231c312f 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -19,19 +19,27 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define RET ret; int3 #else #define RET ret #endif +#endif /* CONFIG_RETPOLINE */ #else /* __ASSEMBLY__ */ +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define ASM_RET "ret; int3\n\t" #else #define ASM_RET "ret\n\t" #endif +#endif /* CONFIG_RETPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index a82f603d4312d16e77b51ef404e227f2980728c3..61f0c206bff0f638fb1caeba3bb42694deb088cc 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -179,9 +179,13 @@ int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible); #ifdef CONFIG_AMD_MEM_ENCRYPT void hv_ghcb_msr_write(u64 msr, u64 value); void hv_ghcb_msr_read(u64 msr, u64 *value); +bool hv_ghcb_negotiate_protocol(void); +void hv_ghcb_terminate(unsigned int set, unsigned int reason); #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +static inline bool hv_ghcb_negotiate_protocol(void) { return false; } +static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} #endif extern bool hv_isolation_type_snp(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 403e83b4adc88ea3d61337279920de39f75df6ae..cc615be27a54b2daf55352ab1d34c244ee0790c6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -93,6 +95,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -116,6 +119,37 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -133,6 +167,7 @@ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ #define RTM_ALLOW BIT(1) /* TSX development mode */ +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 @@ -542,6 +577,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index acbaeaf83b61adc1f9d0e103eab18e4328047775..10a3bfc1eb230e7c1c7f49e29d7e7f453341b75c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,6 +11,7 @@ #include #include #include +#include #define RETPOLINE_THUNK_SIZE 32 @@ -75,6 +76,23 @@ .popsection .endm +/* + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. + */ +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + +/* + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into it's own annotation. + */ +.macro ANNOTATE_UNRET_END +#ifdef CONFIG_DEBUG_ENTRY + ANNOTATE_RETPOLINE_SAFE + nop +#endif +.endm + /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 @@ -105,10 +123,34 @@ * monstrosity above, manually. */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: +.endm + +#ifdef CONFIG_CPU_UNRET_ENTRY +#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#else +#define CALL_ZEN_UNTRAIN_RET "" +#endif + +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * While zen_untrain_ret() doesn't clobber anything but requires stack, + * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro UNTRAIN_RET +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -120,17 +162,20 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#ifdef CONFIG_RETPOLINE - typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + +extern void __x86_return_thunk(void); +extern void zen_untrain_ret(void); +extern void entry_ibpb(void); + +#ifdef CONFIG_RETPOLINE #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; #include #undef GEN -extern retpoline_thunk_t __x86_indirect_thunk_array[]; - #ifdef CONFIG_X86_64 /* @@ -193,6 +238,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_EIBRS, SPECTRE_V2_EIBRS_RETPOLINE, SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ @@ -235,6 +281,9 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); +extern void write_spec_ctrl_current(u64 val, bool force); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction @@ -244,18 +293,16 @@ extern u64 x86_spec_ctrl_base; */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) @@ -269,6 +316,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + #include /** diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index f52a886d35cf8a97bfdae701496af4a943605666..70533fdcbf02c9a6fce1dedfd80694a5a5e9d837 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -69,6 +69,8 @@ void pcibios_scan_specific_bus(int busn); /* pci-irq.c */ +struct pci_dev; + struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { @@ -246,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) # define x86_default_pci_init_irq NULL # define x86_default_pci_fixup_irqs NULL #endif + +#if defined(CONFIG_PCI) && defined(CONFIG_ACPI) +extern bool pci_use_e820; +#else +#define pci_use_e820 false +#endif diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 7590ac2570b964a80ff0e78f8044a1046626cec7..f37cbff7354c49e211e51743f1b229286dae656d 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -108,21 +108,21 @@ extern unsigned long _brk_end; void *extend_brk(size_t size, size_t align); /* - * Reserve space in the brk section. The name must be unique within the file, - * and somewhat descriptive. The size is in bytes. + * Reserve space in the .brk section, which is a block of memory from which the + * caller is allowed to allocate very early (before even memblock is available) + * by calling extend_brk(). All allocated memory will be eventually converted + * to memblock. Any leftover unallocated memory will be freed. * - * The allocation is done using inline asm (rather than using a section - * attribute on a normal variable) in order to allow the use of @nobits, so - * that it doesn't take up any space in the vmlinux file. + * The size is in bytes. */ -#define RESERVE_BRK(name, size) \ - asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t" \ - ".brk." #name ":\n\t" \ - ".skip " __stringify(size) "\n\t" \ - ".size .brk." #name ", " __stringify(size) "\n\t" \ - ".popsection\n\t") +#define RESERVE_BRK(name, size) \ + __section(".bss..brk") __aligned(1) __used \ + static char __brk_##name[size] extern void probe_roms(void); + +void clear_bss(void); + #ifdef __i386__ asmlinkage void __init i386_start_kernel(void); @@ -133,12 +133,19 @@ asmlinkage void __init x86_64_start_reservations(char *real_mode_data); #endif /* __i386__ */ #endif /* _SETUP */ -#else -#define RESERVE_BRK(name,sz) \ - .pushsection .brk_reservation,"aw",@nobits; \ -.brk.name: \ -1: .skip sz; \ - .size .brk.name,.-1b; \ + +#else /* __ASSEMBLY */ + +.macro __RESERVE_BRK name, size + .pushsection .bss..brk, "aw" +SYM_DATA_START(__brk_\name) + .skip \size +SYM_DATA_END(__brk_\name) .popsection +.endm + +#define RESERVE_BRK(name, size) __RESERVE_BRK name, size + #endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_SETUP_H */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 2d8dacd026437a2968c29226f7f9aeaefa96f45e..343b722ccaf21d0654b5d7dc19596a4ef548a8e5 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -21,6 +21,16 @@ * relative displacement across sections. */ +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ @@ -28,7 +38,7 @@ STATIC_CALL_TRAMP_STR(name) ": \n" \ ANNOTATE_NOENDBR \ insns " \n" \ - ".byte 0x53, 0x43, 0x54 \n" \ + ".byte 0x0f, 0xb9, 0xcc \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ".popsection \n") @@ -36,8 +46,13 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") +#ifdef CONFIG_RETHUNK +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) @@ -48,4 +63,6 @@ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ ".popsection \n") +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 8b33674288ea7b2bbe23f4ddbca32555fc6d7daf..f66fbe6537dd7abac82b6bca5e6258f88a81d4b7 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -8,7 +8,11 @@ #ifdef __ASSEMBLY__ .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 +.endm + +.macro UNWIND_HINT_ENTRY + UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 @@ -52,6 +56,14 @@ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + #else #define UNWIND_HINT_FUNC \ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index bea5cdcdf53252bf4fed4199ee2c7b11a48416ac..e02a8a8ef23cec816ab256135623baa11b0de2f3 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -15,7 +15,7 @@ #define SETUP_INDIRECT (1<<31) /* SETUP_INDIRECT | max(SETUP_*) */ -#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE) +#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_CC_BLOB) /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 03364dc40d8d9195a0115f8bb5245fc08cbf4fb1..4c8b6ae802ac397dca639fae33b8e24538618892 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,10 +36,6 @@ KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD_test_nx.o := y -ifdef CONFIG_FRAME_POINTER -OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y -endif - # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, # boot, dumpstack/stacktrace, etc are either non-interesting or can lead to diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 8b8cbf22461a4feff2249a2d731b538a31c30501..8d8752b44f11395298a40acce79de2027614248b 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -11,6 +11,22 @@ /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ +bool cpc_supported_by_cpu(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + if (boot_cpu_data.x86 == 0x19 && ((boot_cpu_data.x86_model <= 0x0f) || + (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) + return true; + else if (boot_cpu_data.x86 == 0x17 && + boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + return true; + return boot_cpu_has(X86_FEATURE_CPPC); + } + return false; +} + bool cpc_ffh_supported(void) { return true; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e257f6c80372139d83401fcda0b64828fabd0408..d6858533e6e595bfc2a0de630e503b0f3d6a460d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; @@ -507,9 +508,76 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } +#ifdef CONFIG_RETHUNK +/* + * Rewrite the compiler generated return thunk tail-calls. + * + * For example, convert: + * + * JMP __x86_return_thunk + * + * into: + * + * RET + */ +static int patch_return(void *addr, struct insn *insn, u8 *bytes) +{ + int i = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return -1; + + bytes[i++] = RET_INSN_OPCODE; + + for (; i < insn->length;) + bytes[i++] = INT3_INSN_OPCODE; + + return i; +} + +void __init_or_module noinline apply_returns(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *dest = NULL, *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op = insn.opcode.bytes[0]; + if (op == JMP32_INSN_OPCODE) + dest = addr + insn.length + insn.immediate.value; + + if (__static_call_fixup(addr, op, dest) || + WARN_ON_ONCE(dest != &__x86_return_thunk)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_return(addr, &insn, bytes); + if (len == insn.length) { + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} +#else +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +#endif /* CONFIG_RETHUNK */ + #else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */ @@ -860,6 +928,7 @@ void __init alternative_instructions(void) * those can rewrite the retpoline thunks. */ apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); /* * Then patch alternatives, such that those paravirt calls that are in diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 437308004ef2e4f1345947ce318c62bc56a6036f..cb50589a7102fa3825703d6a7a87c1d00b906149 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -19,6 +19,7 @@ #include #include #include +#include "../kvm/vmx/vmx.h" #ifdef CONFIG_XEN #include @@ -107,4 +108,9 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); + + if (IS_ENABLED(CONFIG_KVM_INTEL)) { + BLANK(); + OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); + } } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0c0b09796ced30153adbaf5cee1c686ba6063201..35d5288394cb82f187b4314e0795f6cef5343dd8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -862,6 +862,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +void init_spectral_chicken(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_CPU_UNRET_ENTRY + u64 value; + + /* + * On Zen2 we offer this chicken (bit) on the altar of Speculation. + * + * This suppresses speculation from the middle of a basic block, i.e. it + * suppresses non-branch predictions. + * + * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { + if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { + value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } +#endif +} + static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); @@ -870,12 +892,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. - * Always set it, except when running under a hypervisor. - */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) - set_cpu_cap(c, X86_FEATURE_CPB); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); + + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } } static void init_amd(struct cpuinfo_x86 *c) @@ -907,7 +938,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: fallthrough; + case 0x17: init_spectral_chicken(c); + fallthrough; case 0x19: init_amd_zn(c); break; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d879a6c93609a6f655bd76f1efbb7750b68de5b7..aa34f908c39ff0136dd9f0055167611c377eabd7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -38,24 +38,52 @@ static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); +static void __init retbleed_select_mitigation(void); +static void __init spectre_v2_user_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); -static void __init mds_print_mitigation(void); +static void __init md_clear_update_mitigation(void); +static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); +static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + static DEFINE_MUTEX(spec_ctrl_mutex); /* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +void write_spec_ctrl_current(u64 val, bool force) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); + + /* + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + +u64 spec_ctrl_current(void) +{ + return this_cpu_read(x86_spec_ctrl_current); +} +EXPORT_SYMBOL_GPL(spec_ctrl_current); /* * AMD specific MSR info for Speculative Store Bypass control. @@ -85,6 +113,10 @@ EXPORT_SYMBOL_GPL(mds_idle_clear); */ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); +/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ +DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); +EXPORT_SYMBOL_GPL(mmio_stale_data_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -108,26 +140,27 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - /* Allow STIBP in MSR_SPEC_CTRL if supported */ - if (boot_cpu_has(X86_FEATURE_STIBP)) - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); + /* + * retbleed_select_mitigation() relies on the state set by + * spectre_v2_select_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ + retbleed_select_mitigation(); + /* + * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ + spectre_v2_user_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); + md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); - /* - * As MDS and TAA mitigations are inter-related, print MDS - * mitigation until after TAA mitigation selection is done. - */ - mds_print_mitigation(); - arch_smt_update(); #ifdef CONFIG_X86_32 @@ -162,31 +195,17 @@ void __init check_bugs(void) #endif } +/* + * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is + * done in vmenter.S. + */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = x86_spec_ctrl_base; + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); - /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - /* - * Restrict guest_spec_ctrl to supported values. Clear the - * modifiable bits in the host base value and or the - * modifiable bits from the guest value. - */ - guestval = hostval & ~x86_spec_ctrl_mask; - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - - /* Conditional STIBP enabled? */ - if (static_branch_unlikely(&switch_to_cond_stibp)) - hostval |= stibp_tif_to_spec_ctrl(ti->flags); - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -267,14 +286,6 @@ static void __init mds_select_mitigation(void) } } -static void __init mds_print_mitigation(void) -{ - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) - return; - - pr_info("%s\n", mds_strings[mds_mitigation]); -} - static int __init mds_cmdline(char *str) { if (!boot_cpu_has_bug(X86_BUG_MDS)) @@ -329,7 +340,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; - goto out; + return; } if (cpu_mitigations_off()) { @@ -343,7 +354,7 @@ static void __init taa_select_mitigation(void) */ if (taa_mitigation == TAA_MITIGATION_OFF && mds_mitigation == MDS_MITIGATION_OFF) - goto out; + return; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; @@ -375,18 +386,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); - - /* - * Update MDS mitigation, if necessary, as the mds_user_clear is - * now enabled for TAA mitigation. - */ - if (mds_mitigation == MDS_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MDS)) { - mds_mitigation = MDS_MITIGATION_FULL; - mds_select_mitigation(); - } -out: - pr_info("%s\n", taa_strings[taa_mitigation]); } static int __init tsx_async_abort_parse_cmdline(char *str) @@ -410,6 +409,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "MMIO Stale Data: " fmt + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; +static bool mmio_nosmt __ro_after_init = false; + +static const char * const mmio_strings[] = { + [MMIO_MITIGATION_OFF] = "Vulnerable", + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", +}; + +static void __init mmio_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || + cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + /* + * Enable CPU buffer clear mitigation for host and VMM, if also affected + * by MDS or TAA. Otherwise, enable mitigation for VMM only. + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) + static_branch_enable(&mds_user_clear); + else + static_branch_enable(&mmio_stale_data_clear); + + /* + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can + * be propagated to uncore buffers, clearing the Fill buffers on idle + * is required irrespective of SMT state. + */ + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + static_branch_enable(&mds_idle_clear); + + /* + * Check if the system has the right microcode. + * + * CPU Fill buffer clear mitigation is enumerated by either an explicit + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS + * affected systems. + */ + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && + !(ia32_cap & ARCH_CAP_MDS_NO))) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; + + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +} + +static int __init mmio_stale_data_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + mmio_mitigation = MMIO_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_nosmt = true; + } + + return 0; +} +early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); + +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static void __init md_clear_update_mitigation(void) +{ + if (cpu_mitigations_off()) + return; + + if (!static_key_enabled(&mds_user_clear)) + goto out; + + /* + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data + * mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } + if (taa_mitigation == TAA_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_select_mitigation(); + } + if (mmio_mitigation == MMIO_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_select_mitigation(); + } +out: + if (boot_cpu_has_bug(X86_BUG_MDS)) + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); + if (boot_cpu_has_bug(X86_BUG_TAA)) + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); +} + +static void __init md_clear_select_mitigation(void) +{ + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + + /* + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. + */ + md_clear_update_mitigation(); +} + #undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt @@ -478,11 +622,13 @@ static void __init srbds_select_mitigation(void) return; /* - * Check to see if this is one of the MDS_NO systems supporting - * TSX that are only exposed to SRBDS when TSX is enabled. + * Check to see if this is one of the MDS_NO systems supporting TSX that + * are only exposed to SRBDS when TSX is enabled or when CPU is affected + * by Processor MMIO Stale Data vulnerability. */ ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; @@ -626,12 +772,180 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +#undef pr_fmt +#define pr_fmt(fmt) "RETBleed: " fmt + +enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBPB, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, +}; + +enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, + RETBLEED_CMD_IBPB, +}; + +static const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", +}; + +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + RETBLEED_MITIGATION_NONE; +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + +static int __ro_after_init retbleed_nosmt = false; + +static int __init retbleed_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "off")) { + retbleed_cmd = RETBLEED_CMD_OFF; + } else if (!strcmp(str, "auto")) { + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "ibpb")) { + retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { + pr_err("Ignoring unknown retbleed option (%s).", str); + } + + str = next; + } + + return 0; +} +early_param("retbleed", retbleed_parse_cmdline); + +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + +static void __init retbleed_select_mitigation(void) +{ + bool mitigate_smt = false; + + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + + switch (retbleed_cmd) { + case RETBLEED_CMD_OFF: + return; + + case RETBLEED_CMD_UNRET: + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + } else { + pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + goto do_cmd_auto; + } + break; + + case RETBLEED_CMD_IBPB: + if (!boot_cpu_has(X86_FEATURE_IBPB)) { + pr_err("WARNING: CPU does not support IBPB.\n"); + goto do_cmd_auto; + } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto do_cmd_auto; + } + break; + +do_cmd_auto: + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } + + /* + * The Intel mitigation (IBRS or eIBRS) was already selected in + * spectre_v2_select_mitigation(). 'retbleed_mitigation' will + * be set accordingly below. + */ + + break; + } + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); + + mitigate_smt = true; + break; + + case RETBLEED_MITIGATION_IBPB: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; + break; + + default: + break; + } + + if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch (spectre_v2_enabled) { + case SPECTRE_V2_IBRS: + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + break; + case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_EIBRS_LFENCE: + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + break; + default: + pr_err(RETBLEED_INTEL_MSG); + } + } + + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = SPECTRE_V2_USER_NONE; static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = @@ -702,6 +1016,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_EIBRS, SPECTRE_V2_CMD_EIBRS_RETPOLINE, SPECTRE_V2_CMD_EIBRS_LFENCE, + SPECTRE_V2_CMD_IBRS, }; enum spectre_v2_user_cmd { @@ -742,13 +1057,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; + static enum spectre_v2_user_cmd __init -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_parse_user_cmdline(void) { char arg[20]; int ret, i; - switch (v2_cmd) { + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: @@ -774,15 +1091,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { - return (mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE); + return mode == SPECTRE_V2_IBRS || + mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; } static void __init -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); @@ -795,7 +1113,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(v2_cmd); + cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; @@ -843,12 +1161,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* - * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not - * required. + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, + * STIBP is not required. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return; /* @@ -860,6 +1178,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (mode != SPECTRE_V2_USER_STRICT && + mode != SPECTRE_V2_USER_STRICT_PREFERRED) + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n"); + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + } + spectre_v2_user_stibp = mode; set_mode: @@ -873,6 +1198,7 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; static const struct { @@ -890,6 +1216,7 @@ static const struct { { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, }; static void __init spec_v2_print_cond(const char *reason, bool secure) @@ -952,6 +1279,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; @@ -967,6 +1318,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +/* Disable in-kernel use of non-RSB RET predictors */ +static void __init spec_ctrl_disable_kernel_rrsba(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -991,6 +1358,15 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; + } + mode = spectre_v2_select_retpoline(); break; @@ -1007,6 +1383,10 @@ static void __init spectre_v2_select_mitigation(void) mode = spectre_v2_select_retpoline(); break; + case SPECTRE_V2_CMD_IBRS: + mode = SPECTRE_V2_IBRS; + break; + case SPECTRE_V2_CMD_EIBRS: mode = SPECTRE_V2_EIBRS; break; @@ -1023,10 +1403,9 @@ static void __init spectre_v2_select_mitigation(void) if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); - if (spectre_v2_in_eibrs_mode(mode)) { - /* Force it so VMEXIT will restore correctly */ + if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } switch (mode) { @@ -1034,6 +1413,10 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_EIBRS: break; + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + break; + case SPECTRE_V2_LFENCE: case SPECTRE_V2_EIBRS_LFENCE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); @@ -1045,43 +1428,107 @@ static void __init spectre_v2_select_mitigation(void) break; } + /* + * Disable alternate RSB predictions in kernel when indirect CALLs and + * JMPs gets protection against BHI and Intramode-BTI, but RET + * prediction from a non-RSB predictor is still a risk. + */ + if (mode == SPECTRE_V2_EIBRS_LFENCE || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); /* - * If spectre v2 protection has been enabled, unconditionally fill - * RSB during a context switch; this protects against two independent - * issues: + * If Spectre v2 protection has been enabled, fill the RSB during a + * context switch. In general there are two types of RSB attacks + * across context switches, for which the CALLs/RETs may be unbalanced. + * + * 1) RSB underflow + * + * Some Intel parts have "bottomless RSB". When the RSB is empty, + * speculated return targets may come from the branch predictor, + * which could have a user-poisoned BTB or BHB entry. + * + * AMD has it even worse: *all* returns are speculated from the BTB, + * regardless of the state of the RSB. + * + * When IBRS or eIBRS is enabled, the "user -> kernel" attack + * scenario is mitigated by the IBRS branch prediction isolation + * properties, so the RSB buffer filling wouldn't be necessary to + * protect against this type of attack. + * + * The "user -> user" attack scenario is mitigated by RSB filling. + * + * 2) Poisoned RSB entry + * + * If the 'next' in-kernel return stack is shorter than 'prev', + * 'next' could be tricked into speculating with a user-poisoned RSB + * entry. * - * - RSB underflow (and switch to BTB) on Skylake+ - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs + * The "user -> kernel" attack scenario is mitigated by SMEP and + * eIBRS. + * + * The "user -> user" scenario, also known as SpectreBHB, requires + * RSB clearing. + * + * So to mitigate all cases, unconditionally fill RSB on context + * switches. + * + * FIXME: Is this pointless for retbleed-affected AMD? */ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); /* - * Retpoline means the kernel is safe because it has no indirect - * branches. Enhanced IBRS protects firmware too, so, enable restricted - * speculation around firmware calls only when Enhanced IBRS isn't - * supported. + * Similar to context switches, there are two types of RSB attacks + * after vmexit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS, on the other hand, has RSB-poisoning protections, so it + * doesn't need RSB clearing after vmexit. + */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) || + boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + + /* + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise + * enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } /* Set up IBPB and STIBP depending on the general spectre V2 command */ - spectre_v2_user_select_mitigation(cmd); + spectre_v2_cmd = cmd; } static void update_stibp_msr(void * __unused) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + write_spec_ctrl_current(val, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1116,6 +1563,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { + u64 ia32_cap = x86_read_arch_cap_msr(); + /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1127,14 +1576,17 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; - if (sched_smt_active()) + if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); - else + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + (ia32_cap & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); + } } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" void cpu_bugs_smt_update(void) { @@ -1179,6 +1631,16 @@ void cpu_bugs_smt_update(void) break; } + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1282,16 +1744,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } - /* - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper - * bit in the mask to allow guests to use the mitigation even in the - * case where the host does not enable it. - */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - } - /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. @@ -1309,7 +1761,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } } @@ -1560,7 +2012,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); @@ -1781,9 +2233,23 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t mmio_stale_data_show_state(char *buf) +{ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mmio_strings[mmio_mitigation]); + } + + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { @@ -1839,6 +2305,24 @@ static ssize_t srbds_show_state(char *buf) return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); } +static ssize_t retbleed_show_state(char *buf) +{ + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s; SMT %s\n", + retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); + } + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -1881,6 +2365,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRBDS: return srbds_show_state(buf); + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + + case X86_BUG_RETBLEED: + return retbleed_show_state(buf); + default: break; } @@ -1932,4 +2422,14 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * { return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); } + +ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); +} + +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c296cb1c01133e7cd195576e7ed6fa0d1df288b2..736262a76a12b74541c855dd231a47befa81ebbc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1205,24 +1205,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, steppings, \ X86_FEATURE_ANY, issues) +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + #define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED), + VULNBL_HYGON(0x18, RETBLEED), {} }; @@ -1243,6 +1279,13 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1296,12 +1339,32 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. */ if ((cpu_has(c, X86_FEATURE_RDRAND) || cpu_has(c, X86_FEATURE_RDSEED)) && - cpu_matches(cpu_vuln_blacklist, SRBDS)) + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + */ + if (cpu_matches(cpu_vuln_blacklist, MMIO) && + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a8e584fc991384813cab20de6b1746fbe8f1e38..7c9b5893c30aba0d3eb21c758264292d26a2e767 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,6 +61,8 @@ static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ +extern void init_spectral_chicken(struct cpuinfo_x86 *c); + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 3fcdda4c1e114f6a83a589f92508df1061cad467..21fd425088fe5801ff5b62ca212a44477f62c5fb 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); + /* + * XXX someone from Hygon needs to confirm this DTRT + * + init_spectral_chicken(c); + */ + set_cpu_cap(c, X86_FEATURE_ZEN); set_cpu_cap(c, X86_FEATURE_CPB); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbaa8326d6f289d16a3dc431261c80fa063cebb7..fd44b54c90d50483646909e581e8cc7eaad67188 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5b4efc927d808b68d5d5a18adaf1991dcd6fef52..24b9fa89aa276cb4a589ab315499230fce7b1b4d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -301,7 +301,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) +#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) @@ -357,7 +357,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - memcpy(ip, retq, RET_SIZE); + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + else + memcpy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 4ec13608d3c62bd8acae0d897b264cde3eb41e14..dfeb227de5617c6d55ad6ace1f321b7ac87b6a70 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -175,6 +175,7 @@ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) jmp ftrace_epilogue SYM_FUNC_END(ftrace_caller); +STACK_FRAME_NON_STANDARD_FP(ftrace_caller) SYM_FUNC_START(ftrace_epilogue) /* @@ -282,6 +283,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) jmp ftrace_epilogue SYM_FUNC_END(ftrace_regs_caller) +STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -311,10 +313,14 @@ trace: jmp ftrace_stub SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) +STACK_FRAME_NON_STANDARD_FP(__fentry__) + #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -SYM_FUNC_START(return_to_handler) +SYM_CODE_START(return_to_handler) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR subq $16, %rsp /* Save the return values */ @@ -339,7 +345,6 @@ SYM_FUNC_START(return_to_handler) int3 .Ldo_rop: mov %rdi, (%rsp) - UNWIND_HINT_FUNC RET -SYM_FUNC_END(return_to_handler) +SYM_CODE_END(return_to_handler) #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bd4a34100ed0cd53acf56114dcb976ec6186a669..6a3cfaf6b72ad6351dadcb837459eedefe3834ba 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -426,10 +426,12 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr) /* Don't add a printk in there. printk relies on the PDA which is not initialized yet. */ -static void __init clear_bss(void) +void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index eb8656bac99b6be6ccc4ebb6b182278ec41ce4ab..9b7acc9c7874c135fc83d4e62806977e01837212 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 92c4afa2b7298d2a45f6fc3e67843fdb73354bfe..d860d437631b649dd37476ff96195fadfbf0b2e5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -389,6 +389,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -448,6 +450,7 @@ SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) UNWIND_HINT_IRET_REGS offset=16 + ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -497,6 +500,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b98ffcf4d250f1e62a145d4c79483fe2440a5828..67828d97338902b7fe3aca46bf297b369c00ee01 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".return_sites", secstrings + s->sh_name)) + returns = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + if (returns) { + void *rseg = (void *)returns->sh_addr; + apply_returns(rseg, rseg + returns->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9b2772b7e1f3990eee3760100880054cf7942a31..d456ce21c255269a49b70f387d8e0580f85dea11 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + write_spec_ctrl_current(msr, false); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index fcc8a7699103a4ad416c1bf5cdf08e07c4b6063c..c7c4b1917336d1d95b5b998b0b065a89513287cd 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,10 +7,12 @@ #include #include #include +#include #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 2) @@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movl %edi, %eax addl $(identity_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %edx, %edx xorl %esi, %esi xorl %ebp, %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popl %edx movl CP_PA_SWAP_PAGE(%edi), %esp addl $PAGE_SIZE, %esp 2: + ANNOTATE_RETPOLINE_SAFE call *%edx /* get the re-entry point of the peer system */ @@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movl %edi, %eax addl $(virtual_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popl %edi popl %esi popl %ebx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) popl %edi popl %ebx popl %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c1d8626c53b663e952c06f69468596f51eabbc6b..4809c0dc4eb0ceb64b8dbab19cf5cc9525352609 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -13,7 +13,8 @@ #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 3) @@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %r14d, %r14d xorl %r15d, %r15d - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popq %rdx @@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) call swap_pages movq $virtual_mapped, %rax pushq %rax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popq %r12 popq %rbp popq %rbx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) lea PAGE_SIZE(%rax), %rsi jmp 0b 3: - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index db2b350a37b774c051f37129860c9a4fe73daa3b..bba1abd05bfeb1d94144ad2593048e7ddfbe74c0 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include +#include #include +#include static void resource_clip(struct resource *res, resource_size_t start, resource_size_t end) @@ -24,14 +25,14 @@ static void resource_clip(struct resource *res, resource_size_t start, res->start = end + 1; } -void remove_e820_regions(struct device *dev, struct resource *avail) +static void remove_e820_regions(struct resource *avail) { int i; struct e820_entry *entry; u64 e820_start, e820_end; struct resource orig = *avail; - if (!(avail->flags & IORESOURCE_MEM)) + if (!pci_use_e820) return; for (i = 0; i < e820_table->nr_entries; i++) { @@ -41,7 +42,7 @@ void remove_e820_regions(struct device *dev, struct resource *avail) resource_clip(avail, e820_start, e820_end); if (orig.start != avail->start || orig.end != avail->end) { - dev_info(dev, "clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n", + pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n", &orig, avail, e820_start, e820_end); orig = *avail; } @@ -55,6 +56,9 @@ void arch_remove_reservations(struct resource *avail) * the low 1MB unconditionally, as this area is needed for some ISA * cards requiring a memory range, e.g. the i82365 PCMCIA controller. */ - if (avail->flags & IORESOURCE_MEM) + if (avail->flags & IORESOURCE_MEM) { resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); + + remove_e820_regions(avail); + } } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3ebb85327edb1b5f21070e89c1bb939a87f42fa8..bd6c6fd373aeed4301018a11decaec7271e28e11 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -67,11 +67,6 @@ RESERVE_BRK(dmi_alloc, 65536); #endif -/* - * Range of the BSS area. The size of the BSS area is determined - * at link time, with RESERVE_BRK() facility reserving additional - * chunks. - */ unsigned long _brk_start = (unsigned long)__brk_base; unsigned long _brk_end = (unsigned long)__brk_base; diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index aa72cefdd5be61552e820f6ff65a949bf424fd97..aaaba85d6d7ff01e596f8b77c42ac72c707b0f0d 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,6 +11,13 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ }; +/* + * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such + * that there is no false-positive trampoline identification while also being a + * speculation stop. + */ +static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; + /* * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ @@ -18,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; -static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +static void __ref __static_call_transform(void *insn, enum insn_type type, + void *func, bool modinit) { const void *emulate = NULL; int size = CALL_INSN_SIZE; @@ -43,14 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case RET: - code = &retinsn; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + else + code = &retinsn; break; } if (memcmp(insn, code, size) == 0) return; - if (unlikely(system_state == SYSTEM_BOOTING)) + if (system_state == SYSTEM_BOOTING || modinit) return text_poke_early(insn, code, size); text_poke_bp(insn, code, size, emulate); @@ -60,7 +71,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp) { u8 opcode = *(u8 *)insn; - if (tramp && memcmp(insn+5, "SCT", 3)) { + if (tramp && memcmp(insn+5, tramp_ud, 3)) { pr_err("trampoline signature fail"); BUG(); } @@ -104,14 +115,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) if (tramp) { __static_call_validate(tramp, true, true); - __static_call_transform(tramp, __sc_insn(!func, true), func); + __static_call_transform(tramp, __sc_insn(!func, true), func, false); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { __static_call_validate(site, tail, false); - __static_call_transform(site, __sc_insn(!func, tail), func); + __static_call_transform(site, __sc_insn(!func, tail), func, false); } mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); + +#ifdef CONFIG_RETHUNK +/* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as + * having a return trampoline. + * + * The problem is that static_call() is available before determining + * X86_FEATURE_RETHUNK and, by implication, running alternatives. + * + * This means that __static_call_transform() above can have overwritten the + * return trampoline and we now need to fix things up to be consistent. + */ +bool __static_call_fixup(void *tramp, u8 op, void *dest) +{ + if (memcmp(tramp+5, tramp_ud, 3)) { + /* Not a trampoline site, not our problem. */ + return false; + } + + mutex_lock(&text_mutex); + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) + __static_call_transform(tramp, RET, NULL, true); + mutex_unlock(&text_mutex); + + return true; +} +#endif diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index f5f6dc2e80072e0a160ff4151a40b37442fd2911..15f29053cec46e462aa01c84c2b1f33e7c89ceaf 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,7 +141,7 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) + *(.text.__x86.*) __indirect_thunk_end = .; #endif } :text =0xcccc @@ -283,6 +283,13 @@ SECTIONS *(.retpoline_sites) __retpoline_sites_end = .; } + + . = ALIGN(8); + .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { + __return_sites = .; + *(.return_sites) + __return_sites_end = .; + } #endif #ifdef CONFIG_X86_KERNEL_IBT @@ -388,7 +395,7 @@ SECTIONS .brk : AT(ADDR(.brk) - LOAD_OFFSET) { __brk_base = .; . += 64 * 1024; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ + *(.bss..brk) /* areas brk users have reserved */ __brk_limit = .; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 89b11e7dca8aa5d994122fdf7d1831bda5952df7..f8382abe22ff811f9faddf6995f4f8cf5899416c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,9 +189,6 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) - struct opcode { u64 flags; u8 intercept; @@ -306,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump * table (which would be bigger than the code). + * + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR + * and 1 for the straight line speculation INT3, leaves 7 bytes for the + * body of the function. Currently none is larger than 4. */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); +#define FASTOP_SIZE 16 + #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ @@ -325,13 +328,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define FOP_RET(name) \ __FOP_RET(#name) -#define FOP_START(op) \ +#define __FOP_START(op, align) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ ".global em_" #op " \n\t" \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ + ".align " __stringify(align) " \n\t" \ "em_" #op ":\n\t" +#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) + #define FOP_END \ ".popsection") @@ -435,17 +440,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] - * SETcc %al [3 bytes] - * RET [1 byte] - * INT3 [1 byte; CONFIG_SLS] - * - * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the - * next power-of-two alignment they become 4, 8 or 16 resp. + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] + * SETcc %al [3 bytes] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] + * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) -static_assert(SETCC_LENGTH <= SETCC_ALIGN); +#define SETCC_ALIGN 16 #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ @@ -453,9 +453,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN); #op ": \n\t" \ ASM_ENDBR \ #op " %al \n\t" \ - __FOP_RET(#op) + __FOP_RET(#op) \ + ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" -FOP_START(setcc) +__FOP_START(setcc, SETCC_ALIGN) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f1bdac3f5aa8a845cb34dbb02cb3321878be8048..0e68b4c937fcd9971d5f24c943a8868618bb3fb2 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) } } +static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) +{ + struct kvm *kvm = apic->vcpu->kvm; + + if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm)) + return; + + if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id) + return; + + kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); +} + static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) { int ret = 0; @@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ - if (!apic_x2apic_mode(apic)) + if (!apic_x2apic_mode(apic)) { kvm_apic_set_xapic_id(apic, val >> 24); - else + kvm_lapic_xapic_id_updated(apic); + } else { ret = 1; + } break; case APIC_TASKPRI: @@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) MSR_IA32_APICBASE_BASE; if ((value & MSR_IA32_APICBASE_ENABLE) && - apic->base_address != APIC_DEFAULT_PHYS_BASE) - pr_warn_once("APIC base relocation is unsupported by KVM"); + apic->base_address != APIC_DEFAULT_PHYS_BASE) { + kvm_set_apicv_inhibit(apic->vcpu->kvm, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); + } } void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) @@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); } + } else { + kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e826ee9138fa895dcc48ce5b7d784dd202c0c4bd..17252f39bd7c2ef466b1519e6c97de025f0f6562 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, true); mmu->pae_root[i] = root | PT_PRESENT_MASK | - shadow_me_mask; + shadow_me_value; } mmu->root.hpa = __pa(mmu->pae_root); } else { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 54fe03714f8a6af9cc929f595d2f0b2ff86cb439..d1bc5820ea469349bedb6f61cc8d209cdb75190f 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu) static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh, u32 index) { - u32 dest, apic_id; - struct kvm_vcpu *vcpu; + u32 l1_physical_id, dest; + struct kvm_vcpu *target_vcpu; int dest_mode = icrl & APIC_DEST_MASK; int shorthand = icrl & APIC_SHORT_MASK; struct kvm_svm *kvm_svm = to_kvm_svm(kvm); - u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page); if (shorthand != APIC_DEST_NOSHORT) return -EINVAL; - /* - * The AVIC incomplete IPI #vmexit info provides index into - * the physical APIC ID table, which can be used to derive - * guest physical APIC ID. - */ + if (apic_x2apic_mode(source)) + dest = icrh; + else + dest = GET_APIC_DEST_FIELD(icrh); + if (dest_mode == APIC_DEST_PHYSICAL) { - apic_id = index; + /* broadcast destination, use slow path */ + if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST) + return -EINVAL; + if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST) + return -EINVAL; + + l1_physical_id = dest; + + if (WARN_ON_ONCE(l1_physical_id != index)) + return -EINVAL; + } else { - if (!apic_x2apic_mode(source)) { - /* For xAPIC logical mode, the index is for logical APIC table. */ - apic_id = avic_logical_id_table[index] & 0x1ff; + u32 bitmap, cluster; + int logid_index; + + if (apic_x2apic_mode(source)) { + /* 16 bit dest mask, 16 bit cluster id */ + bitmap = dest & 0xFFFF0000; + cluster = (dest >> 16) << 4; + } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) { + /* 8 bit dest mask*/ + bitmap = dest; + cluster = 0; } else { - return -EINVAL; + /* 4 bit desk mask, 4 bit cluster id */ + bitmap = dest & 0xF; + cluster = (dest >> 4) << 2; } - } - /* - * Assuming vcpu ID is the same as physical apic ID, - * and use it to retrieve the target vCPU. - */ - vcpu = kvm_get_vcpu_by_id(kvm, apic_id); - if (!vcpu) - return -EINVAL; + if (unlikely(!bitmap)) + /* guest bug: nobody to send the logical interrupt to */ + return 0; - if (apic_x2apic_mode(vcpu->arch.apic)) - dest = icrh; - else - dest = GET_APIC_DEST_FIELD(icrh); + if (!is_power_of_2(bitmap)) + /* multiple logical destinations, use slow path */ + return -EINVAL; - /* - * Try matching the destination APIC ID with the vCPU. - */ - if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) { - vcpu->arch.apic->irr_pending = true; - svm_complete_interrupt_delivery(vcpu, - icrl & APIC_MODE_MASK, - icrl & APIC_INT_LEVELTRIG, - icrl & APIC_VECTOR_MASK); - return 0; + logid_index = cluster + __ffs(bitmap); + + if (apic_x2apic_mode(source)) { + l1_physical_id = logid_index; + } else { + u32 *avic_logical_id_table = + page_address(kvm_svm->avic_logical_id_table_page); + + u32 logid_entry = avic_logical_id_table[logid_index]; + + if (WARN_ON_ONCE(index != logid_index)) + return -EINVAL; + + /* guest bug: non existing/reserved logical destination */ + if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) + return 0; + + l1_physical_id = logid_entry & + AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; + } } - return -EINVAL; + target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id); + if (unlikely(!target_vcpu)) + /* guest bug: non existing vCPU is a target of this IPI*/ + return 0; + + target_vcpu->arch.apic->irr_pending = true; + svm_complete_interrupt_delivery(target_vcpu, + icrl & APIC_MODE_MASK, + icrl & APIC_INT_LEVELTRIG, + icrl & APIC_VECTOR_MASK); + return 0; } static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, @@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) return ret; } -static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) -{ - u64 *old, *new; - struct vcpu_svm *svm = to_svm(vcpu); - u32 id = kvm_xapic_id(vcpu->arch.apic); - - if (vcpu->vcpu_id == id) - return 0; - - old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id); - new = avic_get_physical_id_entry(vcpu, id); - if (!new || !old) - return 1; - - /* We need to move physical_id_entry to new offset */ - *new = *old; - *old = 0ULL; - to_svm(vcpu)->avic_physical_id_cache = new; - - /* - * Also update the guest physical APIC ID in the logical - * APIC ID table entry if already setup the LDR. - */ - if (svm->ldr_reg) - avic_handle_ldr_update(vcpu); - - return 0; -} - static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu) AVIC_UNACCEL_ACCESS_OFFSET_MASK; switch (offset) { - case APIC_ID: - if (avic_handle_apic_id_update(vcpu)) - return 0; - break; case APIC_LDR: if (avic_handle_ldr_update(vcpu)) return 0; @@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm) void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) { - if (avic_handle_apic_id_update(vcpu) != 0) - return; avic_handle_dfr_update(vcpu); avic_handle_ldr_update(vcpu); } @@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) BIT(APICV_INHIBIT_REASON_PIT_REINJ) | BIT(APICV_INHIBIT_REASON_X2APIC) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | - BIT(APICV_INHIBIT_REASON_SEV); + BIT(APICV_INHIBIT_REASON_SEV) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } @@ -946,7 +946,7 @@ out: return ret; } -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; int h_physical_id = kvm_cpu_get_apicid(cpu); @@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void __avic_vcpu_put(struct kvm_vcpu *vcpu) +void avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -static void avic_vcpu_load(struct kvm_vcpu *vcpu) -{ - int cpu = get_cpu(); - - WARN_ON(cpu != vcpu->cpu); - - __avic_vcpu_load(vcpu, cpu); - - put_cpu(); -} - -static void avic_vcpu_put(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - - __avic_vcpu_put(vcpu); - - preempt_enable(); -} void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { @@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) vmcb_mark_dirty(vmcb, VMCB_AVIC); if (activated) - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); else avic_vcpu_put(vcpu); @@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3361258640a27e15f3ab04f46e6daedbfb09079a..ba7cd26f438fc8a1eb7bb87e053a7e2117d2eb3b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; struct vmcb *vmcb01 = svm->vmcb01.ptr; struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + u32 pause_count12; + u32 pause_thresh12; /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, @@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; + pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; if (kvm_pause_in_guest(svm->vcpu.kvm)) { - /* use guest values since host doesn't use them */ - vmcb02->control.pause_filter_count = - svm->pause_filter_enabled ? - svm->nested.ctl.pause_filter_count : 0; + /* use guest values since host doesn't intercept PAUSE */ + vmcb02->control.pause_filter_count = pause_count12; + vmcb02->control.pause_filter_thresh = pause_thresh12; - vmcb02->control.pause_filter_thresh = - svm->pause_threshold_enabled ? - svm->nested.ctl.pause_filter_thresh : 0; - - } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { - /* use host values when guest doesn't use them */ + } else { + /* start from host values otherwise */ vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count; vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh; - } else { - /* - * Intercept every PAUSE otherwise and - * ignore both host and guest values - */ - vmcb02->control.pause_filter_count = 0; - vmcb02->control.pause_filter_thresh = 0; + + /* ... but ensure filtering is disabled if so requested. */ + if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { + if (!pause_count12) + vmcb02->control.pause_filter_count = 0; + if (!pause_thresh12) + vmcb02->control.pause_filter_thresh = 0; + } } nested_svm_transition_tlb_flush(vcpu); @@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; - if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count) + if (!kvm_pause_in_guest(vcpu->kvm)) { vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); + + } nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 51fd985cf21dde4c51885c743d3080d3a3891694..0c240ed04f96c5ade0085c3b5c770090c956057f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, /* If source buffer is not aligned then use an intermediate buffer */ if (!IS_ALIGNED((unsigned long)vaddr, 16)) { - src_tpage = alloc_page(GFP_KERNEL); + src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!src_tpage) return -ENOMEM; @@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; - dst_tpage = alloc_page(GFP_KERNEL); + dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!dst_tpage) { ret = -ENOMEM; goto e_free; @@ -1665,19 +1665,24 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; + struct kvm_vcpu *dst_vcpu, *src_vcpu; + struct vcpu_svm *dst_svm, *src_svm; struct kvm_sev_info *mirror; + unsigned long i; dst->active = true; dst->asid = src->asid; dst->handle = src->handle; dst->pages_locked = src->pages_locked; dst->enc_context_owner = src->enc_context_owner; + dst->es_active = src->es_active; src->asid = 0; src->active = false; src->handle = 0; src->pages_locked = 0; src->enc_context_owner = NULL; + src->es_active = false; list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); @@ -1704,26 +1709,21 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) list_del(&src->mirror_entry); list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); } -} -static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) -{ - unsigned long i; - struct kvm_vcpu *dst_vcpu, *src_vcpu; - struct vcpu_svm *dst_svm, *src_svm; + kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { + dst_svm = to_svm(dst_vcpu); - if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) - return -EINVAL; + sev_init_vmcb(dst_svm); - kvm_for_each_vcpu(i, src_vcpu, src) { - if (!src_vcpu->arch.guest_state_protected) - return -EINVAL; - } + if (!dst->es_active) + continue; - kvm_for_each_vcpu(i, src_vcpu, src) { + /* + * Note, the source is not required to have the same number of + * vCPUs as the destination when migrating a vanilla SEV VM. + */ + src_vcpu = kvm_get_vcpu(dst_kvm, i); src_svm = to_svm(src_vcpu); - dst_vcpu = kvm_get_vcpu(dst, i); - dst_svm = to_svm(dst_vcpu); /* * Transfer VMSA and GHCB state to the destination. Nullify and @@ -1740,8 +1740,23 @@ static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; src_vcpu->arch.guest_state_protected = false; } - to_kvm_svm(src)->sev_info.es_active = false; - to_kvm_svm(dst)->sev_info.es_active = true; +} + +static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) +{ + struct kvm_vcpu *src_vcpu; + unsigned long i; + + if (!sev_es_guest(src)) + return 0; + + if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) + return -EINVAL; + + kvm_for_each_vcpu(i, src_vcpu, src) { + if (!src_vcpu->arch.guest_state_protected) + return -EINVAL; + } return 0; } @@ -1789,11 +1804,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_dst_vcpu; - if (sev_es_guest(source_kvm)) { - ret = sev_es_migrate_from(kvm, source_kvm); - if (ret) - goto out_source_vcpu; - } + ret = sev_check_source_vcpus(kvm, source_kvm); + if (ret) + goto out_source_vcpu; sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); @@ -2914,7 +2927,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } -void sev_es_init_vmcb(struct vcpu_svm *svm) +static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; @@ -2967,6 +2980,15 @@ void sev_es_init_vmcb(struct vcpu_svm *svm) } } +void sev_init_vmcb(struct vcpu_svm *svm) +{ + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; + clr_exception_intercept(svm, UD_VECTOR); + + if (sev_es_guest(svm->vcpu.kvm)) + sev_es_init_vmcb(svm); +} + void sev_es_vcpu_reset(struct vcpu_svm *svm) { /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1dc02cdf69602e19934f9ac08f7cdcc2ce7c25ee..44bbf25dfeb950b0fd4df14262a5803144824e08 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = __grow_ple_window(old, @@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = @@ -1212,15 +1212,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; } - if (sev_guest(vcpu->kvm)) { - svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; - clr_exception_intercept(svm, UD_VECTOR); - - if (sev_es_guest(vcpu->kvm)) { - /* Perform SEV-ES specific VMCB updates */ - sev_es_init_vmcb(svm); - } - } + if (sev_guest(vcpu->kvm)) + sev_init_vmcb(svm); svm_hv_init_vmcb(vmcb); init_vmcb_after_set_cpuid(vcpu); @@ -1400,13 +1393,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_load(vcpu, cpu); + avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_put(vcpu); + avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 500348c1cb350871b0d996d3154a1a509662ad70..9223ac100ef57fdf283103b89a1841a2946a3262 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void __avic_vcpu_put(struct kvm_vcpu *vcpu); +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); @@ -649,10 +649,10 @@ void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); +void sev_init_vmcb(struct vcpu_svm *svm); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index dfaeb47fcf2a75cbd2756931efe22a9d2a70ec43..723f8534986c31b505a2e5d314c347f720fbecd9 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -110,6 +110,15 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize 'ret' if the return is + * from the kernel. + */ + UNTRAIN_RET + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded @@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize RET if the return is + * from the kernel. + */ + UNTRAIN_RET + pop %_ASM_BX #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 3f430e2183753a5d594d8308a3d73477d921ebdc..c0e24826a86f718bcb7b2859dc8739c04d75d8a3 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -4,8 +4,8 @@ #include -#include "lapic.h" -#include "x86.h" +#include "../lapic.h" +#include "../x86.h" extern bool __read_mostly enable_vpid; extern bool __read_mostly flexpriority_enabled; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f5cb18e00e789259b591c677ad5ddac18a3ce621..ab135f9ef52f28673e3f3b9e906166944d669488 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2278,7 +2278,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_TSC_SCALING | SECONDARY_EXEC_DESC); if (nested_cpu_has(vmcs12, @@ -3087,7 +3086,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) } vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + __vmx_vcpu_run_flags(vmx)); if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h new file mode 100644 index 0000000000000000000000000000000000000000..edc3f16cc1896f29e4eef46da685d22b4c31c668 --- /dev/null +++ b/arch/x86/kvm/vmx/run_flags.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_RUN_FLAGS_H +#define __KVM_X86_VMX_RUN_FLAGS_H + +#define VMX_RUN_VMRESUME (1 << 0) +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 435c187927c4853deec9f5d14032e8964c4ed75e..4182c7ffc90910a30a8e7f465cc44bfdc8b35d7a 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,10 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #include #include #include +#include #include +#include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) @@ -30,73 +33,12 @@ .section .noinstr.text, "ax" -/** - * vmx_vmenter - VM-Enter the current loaded VMCS - * - * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME - * - * Returns: - * %RFLAGS.CF is set on VM-Fail Invalid - * %RFLAGS.ZF is set on VM-Fail Valid - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * Note that VMRESUME/VMLAUNCH fall-through and return directly if - * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump - * to vmx_vmexit. - */ -SYM_FUNC_START_LOCAL(vmx_vmenter) - /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ - je 2f - -1: vmresume - RET - -2: vmlaunch - RET - -3: cmpb $0, kvm_rebooting - je 4f - RET -4: ud2 - - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) - -SYM_FUNC_END(vmx_vmenter) - -/** - * vmx_vmexit - Handle a VMX VM-Exit - * - * Returns: - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump - * here after hardware loads the host's state, i.e. this is the destination - * referred to by VMCS.HOST_RIP. - */ -SYM_FUNC_START(vmx_vmexit) -#ifdef CONFIG_RETPOLINE - ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE - /* Preserve guest's RAX, it's used to stuff the RSB. */ - push %_ASM_AX - - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ - or $1, %_ASM_AX - - pop %_ASM_AX -.Lvmexit_skip_rsb: -#endif - RET -SYM_FUNC_END(vmx_vmexit) - /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode - * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @vmx: struct vcpu_vmx * * @regs: unsigned long * (to guest registers) - * @launched: %true if the VMCS has been launched + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -115,24 +57,56 @@ SYM_FUNC_START(__vmx_vcpu_run) #endif push %_ASM_BX + /* Save @vmx for SPEC_CTRL handling */ + push %_ASM_ARG1 + + /* Save @flags for SPEC_CTRL handling */ + push %_ASM_ARG3 + /* * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and * @regs is needed after VM-Exit to save the guest's register values. */ push %_ASM_ARG2 - /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + /* Copy @flags to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl - /* Adjust RSP to account for the CALL to vmx_vmenter(). */ - lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 + lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp + ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL + + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI + movl VMX_spec_ctrl(%_ASM_DI), %edi + movl PER_CPU_VAR(x86_spec_ctrl_current), %esi + cmp %edi, %esi + je .Lspec_ctrl_done + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + mov %edi, %eax + wrmsr + +.Lspec_ctrl_done: + + /* + * Since vmentry is serializing on affected CPUs, there's no need for + * an LFENCE to stop speculation from skipping the wrmsr. + */ + /* Load @regs to RAX. */ mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb %bl, %bl + testb $VMX_RUN_VMRESUME, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -154,11 +128,37 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Enter guest mode */ - call vmx_vmenter + /* Check EFLAGS.ZF from 'testb' above */ + jz .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" + * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. + * So this isn't a typical function and objtool needs to be told to + * save the unwind state here and restore it below. + */ + UNWIND_HINT_SAVE + +/* + * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at + * the 'vmx_vmexit' label below. + */ +.Lvmresume: + vmresume + jmp .Lvmfail + +.Lvmlaunch: + vmlaunch + jmp .Lvmfail - /* Jump on VM-Fail. */ - jbe 2f + _ASM_EXTABLE(.Lvmresume, .Lfixup) + _ASM_EXTABLE(.Lvmlaunch, .Lfixup) + +SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + + /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ + UNWIND_HINT_RESTORE + ENDBR /* Temporarily save guest's RAX. */ push %_ASM_AX @@ -185,21 +185,23 @@ SYM_FUNC_START(__vmx_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif - /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ - xor %eax, %eax + /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ + xor %ebx, %ebx +.Lclear_regs: /* - * Clear all general purpose registers except RSP and RAX to prevent + * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially * free. RSP and RAX are exempt as RSP is restored by hardware during - * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return + * value. */ -1: xor %ecx, %ecx + xor %eax, %eax + xor %ecx, %ecx xor %edx, %edx - xor %ebx, %ebx xor %ebp, %ebp xor %esi, %esi xor %edi, %edi @@ -216,8 +218,30 @@ SYM_FUNC_START(__vmx_vcpu_run) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP - pop %_ASM_BX + /* + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * + * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB + * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ + + call vmx_spec_ctrl_restore_host + + /* Put return value in AX */ + mov %_ASM_BX, %_ASM_AX + + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 pop %r13 @@ -230,9 +254,15 @@ SYM_FUNC_START(__vmx_vcpu_run) pop %_ASM_BP RET - /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ -2: mov $1, %eax - jmp 1b +.Lfixup: + cmpb $0, kvm_rebooting + jne .Lvmfail + ud2 +.Lvmfail: + /* VM-Fail: set return value to 1 */ + mov $1, %_ASM_BX + jmp .Lclear_regs + SYM_FUNC_END(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9bd86ecccdab56d455fdcc30e7fab6039b127eee..be7c19374fdd94d7347ea4c83588e9454e164530 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -229,6 +229,9 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; +/* Control for disabling CPU Fill buffer clear */ +static bool __read_mostly vmx_fb_clear_ctrl_available; + static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -360,6 +363,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } +static void vmx_setup_fb_clear_ctrl(void) +{ + u64 msr; + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA)) { + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_FB_CLEAR_CTRL) + vmx_fb_clear_ctrl_available = true; + } +} + +static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) +{ + u64 msr; + + if (!vmx->disable_fb_clear) + return; + + msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); + msr |= FB_CLEAR_DIS; + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; +} + +static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) +{ + if (!vmx->disable_fb_clear) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); +} + +static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +{ + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS + * at VMEntry. Skip the MSR read/write when a guest has no use case to + * execute VERW. + */ + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) + vmx->disable_fb_clear = false; +} + static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, @@ -782,6 +839,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) MSR_IA32_SPEC_CTRL); } +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) +{ + unsigned int flags = 0; + + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + + /* + * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free + * to change it directly without causing a vmexit. In that case read + * it after vmexit and store it in vmx->spec_ctrl. + */ + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + flags |= VMX_RUN_SAVE_SPEC_CTRL; + + return flags; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2252,6 +2327,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); } + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) + vmx_update_fb_clear_dis(vcpu, vmx); + return ret; } @@ -4553,6 +4632,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); + + vmx_update_fb_clear_dis(vcpu, vmx); } static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) @@ -6750,6 +6831,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, + unsigned int flags) +{ + u64 hostval = this_cpu_read(x86_spec_ctrl_current); + + if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) + return; + + if (flags & VMX_RUN_SAVE_SPEC_CTRL) + vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. + * + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. + */ + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || + vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); +} + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { @@ -6763,7 +6869,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, - struct vcpu_vmx *vmx) + struct vcpu_vmx *vmx, + unsigned long flags) { guest_state_enter_irqoff(); @@ -6772,15 +6879,22 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); + + vmx_disable_fb_clear(vmx); if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + flags); vcpu->arch.cr2 = native_read_cr2(); + vmx_enable_fb_clear(vmx); + guest_state_exit_irqoff(); } @@ -6874,36 +6988,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_wait_lapic_expire(vcpu); - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - /* The actual VMENTER/EXIT is in the .noinstr.text section. */ - vmx_vcpu_enter_exit(vcpu, vmx); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) { @@ -7709,7 +7795,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } @@ -8212,6 +8300,8 @@ static int __init vmx_init(void) return r; } + vmx_setup_fb_clear_ctrl(); + for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b98c7e96697a9a4925909a418831cdf5d5138b24..1e7f9453894b1456984d51d4485307c2f3ffb876 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -8,11 +8,12 @@ #include #include "capabilities.h" -#include "kvm_cache_regs.h" +#include "../kvm_cache_regs.h" #include "posted_intr.h" #include "vmcs.h" #include "vmx_ops.h" -#include "cpuid.h" +#include "../cpuid.h" +#include "run_flags.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -348,6 +349,8 @@ struct vcpu_vmx { u64 msr_ia32_feature_control_valid_bits; /* SGX Launch Control public key hash */ u64 msr_ia32_sgxlepubkeyhash[4]; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; struct lbr_desc lbr_desc; @@ -402,7 +405,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 5e7f4122578014a630b3ba99d82e464b6dd7bf7a..5cfc49ddb1b442801bf058779e86f0102a7536ec 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -8,7 +8,7 @@ #include "evmcs.h" #include "vmcs.h" -#include "x86.h" +#include "../x86.h" asmlinkage void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 03fbfbbec460fab308dc3a490bf20c22d119ba39..143e37298d8a41f7c8616cb2b7d77188d96dbd1e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -298,7 +298,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_COUNTER(VCPU, preemption_reported), STATS_DESC_COUNTER(VCPU, preemption_other), - STATS_DESC_ICOUNTER(VCPU, guest_mode) + STATS_DESC_IBOOLEAN(VCPU, guest_mode) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -1617,6 +1617,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + /* Guests don't need to know "Fill buffer clear control" exists */ + data &= ~ARCH_CAP_FB_CLEAR_CTRL; + return data; } @@ -9140,15 +9143,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) { - struct kvm_lapic_irq lapic_irq; - - lapic_irq.shorthand = APIC_DEST_NOSHORT; - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; - lapic_irq.level = 0; - lapic_irq.dest_id = apicid; - lapic_irq.msi_redir_hint = false; + /* + * All other fields are unused for APIC_DM_REMRD, but may be consumed by + * common code, e.g. for tracing. Defer initialization to the compiler. + */ + struct kvm_lapic_irq lapic_irq = { + .delivery_mode = APIC_DM_REMRD, + .dest_mode = APIC_DEST_PHYSICAL, + .shorthand = APIC_DEST_NOSHORT, + .dest_id = apicid, + }; - lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } @@ -9850,6 +9855,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) return; down_read(&vcpu->kvm->arch.apicv_update_lock); + preempt_disable(); activate = kvm_vcpu_apicv_activated(vcpu); @@ -9870,6 +9876,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); out: + preempt_enable(); up_read(&vcpu->kvm->arch.apicv_update_lock); } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); @@ -12626,9 +12633,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); -bool kvm_arch_has_assigned_device(struct kvm *kvm) +bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return atomic_read(&kvm->arch.assigned_device_count); + return arch_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index d83cba364e31d1e9b3c653971c79e5057a33a149..724bbf83eb5b0910819b83a8cb5b349d45a6fe8d 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS + ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS /* * movsq instruction have many startup latency @@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET + +.Lmemmove_erms: + movq %rdx, %rcx + rep movsb + RET SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b2b2366885a2b672960805bbb17a7c33a50952cf..073289a55f8495cb3f175d97d847485b05eb7a8b 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE + ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ + __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) .endm @@ -67,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array) #define GEN(reg) EXPORT_THUNK(reg) #include #undef GEN + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +#ifdef CONFIG_RETHUNK + + .section .text.__x86.return_thunk + +/* + * Safety details here pertain to the AMD Zen{1,2} microarchitecture: + * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * alignment within the BTB. + * 2) The instruction at zen_untrain_ret must contain, and not + * end with, the 0xc3 byte of the RET. + * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread + * from re-poisioning the BTB prediction. + */ + .align 64 + .skip 63, 0xcc +SYM_FUNC_START_NOALIGN(zen_untrain_ret); + + /* + * As executed from zen_untrain_ret, this is: + * + * TEST $0xcc, %bl + * LFENCE + * JMP __x86_return_thunk + * + * Executing the TEST instruction has a side effect of evicting any BTB + * prediction (potentially attacker controlled) attached to the RET, as + * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + */ + .byte 0xf6 + + /* + * As executed from __x86_return_thunk, this is a plain RET. + * + * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. + * + * We subsequently jump backwards and architecturally execute the RET. + * This creates a correct BTB prediction (type=ret), but in the + * meantime we suffer Straight Line Speculation (because the type was + * no branch) which is halted by the INT3. + * + * With SMT enabled and STIBP active, a sibling thread cannot poison + * RET's prediction to a type of its choice, but can evict the + * prediction due to competitive sharing. If the prediction is + * evicted, __x86_return_thunk will suffer Straight Line Speculation + * which will be contained safely by the INT3. + */ +SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) + ret + int3 +SYM_CODE_END(__x86_return_thunk) + + /* + * Ensure the TEST decoding / BTB invalidation is complete. + */ + lfence + + /* + * Jump back and execute the RET in the middle of the TEST instruction. + * INT3 is for SLS protection. + */ + jmp __x86_return_thunk + int3 +SYM_FUNC_END(zen_untrain_ret) +__EXPORT_THUNK(zen_untrain_ret) + +EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d8cfce221275e078e06a4371a31acad6467671b9..57ba5502aecf97196d9e40ac28dfbabb8b95b15b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -77,10 +77,20 @@ static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -/* Check that the write-protect PAT entry is set for write-protect */ +/* + * Check that the write-protect PAT entry is set for write-protect. + * To do this without making assumptions how PAT has been set up (Xen has + * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache + * mode via the __cachemode2pte_tbl[] into protection bits (those protection + * bits will select a cache mode of WP or better), and then translate the + * protection bits back into the cache mode using __pte2cm_idx() and the + * __pte2cachemode_tbl[] array. This will return the really used cache mode. + */ bool x86_has_pat_wp(void) { - return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; + uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; + + return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 3d1dba05fce4aea38ffa67c25240eadbbe171b76..9de3d900bc927142b5ec1688551464c26aa60851 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute) movq %rbp, %rsp /* Restore original stack pointer */ pop %rbp - RET + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE + ret + int3 SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) @@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy) pop %r12 pop %r15 - RET + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE + ret + int3 .L__enc_copy_end: SYM_FUNC_END(__enc_copy) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index f298b18a9a3d10b70cedf7c43fa7f17bced0f78b..b808c9a80d1bebc627d34828d4782147fcbaa815 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -412,16 +412,30 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; -#ifdef CONFIG_RETPOLINE if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); - } else -#endif - EMIT2(0xFF, 0xE0 + reg); + } else { + EMIT2(0xFF, 0xE0 + reg); + } + + *pprog = prog; +} + +static void emit_return(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + emit_jump(&prog, &__x86_return_thunk, ip); + } else { + EMIT1(0xC3); /* ret */ + if (IS_ENABLED(CONFIG_SLS)) + EMIT1(0xCC); /* int3 */ + } *pprog = prog; } @@ -1420,8 +1434,9 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { + /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ EMIT3_off32(0x48, 0x8B, 0x85, - -(bpf_prog->aux->stack_depth + 8)); + -round_up(bpf_prog->aux->stack_depth, 8) - 8); if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) return -EINVAL; } else { @@ -1685,7 +1700,7 @@ emit_jmp: ctx->cleanup_addr = proglen; pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; default: @@ -2188,7 +2203,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, prog); /* Make sure the trampoline generation logic doesn't overflow */ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index a4f43054bc79c1eb62d39653f5d865bd095cf99b..2f82480fd430571895ce194243f37f04b8010071 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -8,7 +8,6 @@ #include #include #include -#include struct pci_root_info { struct acpi_pci_root_info common; @@ -20,7 +19,7 @@ struct pci_root_info { #endif }; -static bool pci_use_e820 = true; +bool pci_use_e820 = true; static bool pci_use_crs = true; static bool pci_ignore_seg; @@ -387,11 +386,6 @@ static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) status = acpi_pci_probe_root_resources(ci); - if (pci_use_e820) { - resource_list_for_each_entry(entry, &ci->resources) - remove_e820_regions(&device->dev, entry->res); - } - if (pci_use_crs) { resource_list_for_each_entry_safe(entry, tmp, &ci->resources) if (resource_is_pcicfg_ioport(entry->res)) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 9ffe2bad27d50382cc47062d41a43e9d0825fba1..4e5257a4811b28e980b9bd48f813be1448154273 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -23,6 +23,7 @@ #include #include #include +#include .text .code64 @@ -75,7 +76,9 @@ STACK_FRAME_NON_STANDARD __efi64_thunk 1: movq 0x20(%rsp), %rsp pop %rbx pop %rbp - RET + ANNOTATE_UNRET_SAFE + ret + int3 .code32 2: pushl $__KERNEL_CS diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index e3297b15701c67fa648fb6085fafef5152ce879c..70fb2ea85e907468df9500091d9b8f27aa279e7d 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1183,15 +1183,19 @@ static void __init xen_domu_set_legacy_features(void) extern void early_xen_iret_patch(void); /* First C function to be called on Xen boot */ -asmlinkage __visible void __init xen_start_kernel(void) +asmlinkage __visible void __init xen_start_kernel(struct start_info *si) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; int rc; - if (!xen_start_info) + if (!si) return; + clear_bss(); + + xen_start_info = si; + __text_gen_insn(&early_xen_iret_patch, JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, JMP32_INSN_SIZE); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 81aa46f770c540f1ab0780cc9837c32bcdebeab8..cfa99e8f054be5dba1e339e268c20115acc79d9b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -918,7 +918,7 @@ void xen_enable_sysenter(void) if (!boot_cpu_has(sysenter_feature)) return; - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); if(ret != 0) setup_clear_cpu_cap(sysenter_feature); } @@ -927,7 +927,7 @@ void xen_enable_syscall(void) { int ret; - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); if (ret != 0) { printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other @@ -936,7 +936,7 @@ void xen_enable_syscall(void) if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); + xen_entry_SYSCALL_compat); if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index caa9bc2fa100897cff0d5fa79ec4fb27a249871d..6b4fdf6b95422b3787269f8950d6c10a3ed9b9da 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR pop %rcx pop %r11 @@ -234,8 +234,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) */ /* Normal 64-bit system call target */ -SYM_CODE_START(xen_syscall_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -249,13 +249,13 @@ SYM_CODE_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_CODE_END(xen_syscall_target) +SYM_CODE_END(xen_entry_SYSCALL_64) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_CODE_START(xen_syscall32_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -269,11 +269,11 @@ SYM_CODE_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means @@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_CODE_END(xen_sysenter_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) #else /* !CONFIG_IA32_EMULATION */ -SYM_CODE_START(xen_syscall32_target) -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_CODE_END(xen_sysenter_target) -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) +SYM_CODE_END(xen_entry_SYSCALL_compat) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 3a2cd93bf0590e6acb5b2bad7d8a7e282050152f..ffaa62167f6e8ddf1da9fcc439503e5fa1215700 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE ret /* * Xen will write the hypercall page, and sort out ENDBR. @@ -48,15 +49,6 @@ SYM_CODE_START(startup_xen) ANNOTATE_NOENDBR cld - /* Clear .bss */ - xor %eax,%eax - mov $__bss_start, %rdi - mov $__bss_stop, %rcx - sub %rdi, %rcx - shr $3, %rcx - rep stosq - - mov %rsi, xen_start_info mov initial_stack(%rip), %rsp /* Set up %gs. @@ -71,6 +63,7 @@ SYM_CODE_START(startup_xen) cdq wrmsr + mov %rsi, %rdi call xen_start_kernel SYM_CODE_END(startup_xen) __FINIT diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index fd0fec6e92f4ca66a828acb32c592777a2f741e9..9a8bb972193d884e097adb1b58d71d7aa9d614e0 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,10 +10,10 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_failsafe_callback[]; -void xen_sysenter_target(void); +void xen_entry_SYSENTER_compat(void); #ifdef CONFIG_X86_64 -void xen_syscall_target(void); -void xen_syscall32_target(void); +void xen_entry_SYSCALL_64(void); +void xen_entry_SYSCALL_compat(void); #endif extern void *xen_initial_gdt; diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index e3eae648ba2e924f263f4b0feb4ca4ceb4ecbdff..ab30bcb462903b585a8086401a061598507e469b 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -2173,7 +2173,7 @@ ENDPROC(ret_from_kernel_thread) #ifdef CONFIG_HIBERNATION - .bss + .section .bss, "aw" .align 4 .Lsaved_regs: #if defined(__XTENSA_WINDOWED_ABI__) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index e8ceb1528608188cb1f335b187e2279dde841b3c..16b8a6273772cb2507a38d33e5ad0dc1e352c9ed 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -154,6 +154,7 @@ static void __init calibrate_ccount(void) cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu"); if (cpu) { clk = of_clk_get(cpu, 0); + of_node_put(cpu); if (!IS_ERR(clk)) { ccount_freq = clk_get_rate(clk); return; diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 538e6748e85a7dbb912bd7df2e04860ff51dbb03..c79c1d09ea8631d84496ca5b0e8f6de90b233ac3 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -133,6 +133,7 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); + of_node_put(eth); return 0; } arch_initcall(machine_setup); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0d46cb728bbfab177eaf505b2760637ec2e27d97..e6d7e6b01a05c744c4264220b6ae7a9f48c9178b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7046,6 +7046,7 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif + blk_stat_disable_accounting(bfqd->queue); wbt_enable_default(bfqd->queue); kfree(bfqd); @@ -7188,7 +7189,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfq_init_root_group(bfqd->root_group, bfqd); bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); + /* We dispatch from request queue wide instead of hw queue */ + blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + wbt_disable_default(q); + blk_stat_enable_accounting(q); + return 0; out_free: diff --git a/block/blk-core.c b/block/blk-core.c index 06ff5bbfe8f6627f13c920e39d9e02dbea13c2d4..27fb1357ad4b81bd5940ede8908a37e56abfa158 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -322,19 +322,6 @@ void blk_cleanup_queue(struct request_queue *q) blk_mq_exit_queue(q); } - /* - * In theory, request pool of sched_tags belongs to request queue. - * However, the current implementation requires tag_set for freeing - * requests, so free the pool now. - * - * Queue has become frozen, there can't be any in-queue requests, so - * it is safe to free requests now. - */ - mutex_lock(&q->sysfs_lock); - if (q->elevator) - blk_mq_sched_free_rqs(q); - mutex_unlock(&q->sysfs_lock); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index 56ed48d2954e66ce6d51a032cb3993eef3c28d63..47c89e65b57fac0c11052a364c3738286cd2db3e 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -144,7 +144,6 @@ int disk_register_independent_access_ranges(struct gendisk *disk, } for (i = 0; i < iars->nr_ia_ranges; i++) { - iars->ia_range[i].queue = q; ret = kobject_init_and_add(&iars->ia_range[i].kobj, &blk_ia_range_ktype, &iars->kobj, "%d", i); diff --git a/block/blk-merge.c b/block/blk-merge.c index 7771dacc99cb7d30328b277ac6866c8b2bf39f44..f5e6527ebc9c11d7a9d5b9641c17a4072ff7e2cb 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -345,6 +345,7 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, /* there isn't chance to merge the splitted bio */ split->bi_opf |= REQ_NOMERGE; + blkcg_bio_issue_init(split); bio_chain(split, *bio); trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7e4136a60e1cc69d1fddf9fb6f9b8342c5d329f7..4d1ce9ef4318789e968586a3f5e6af4b5b80f3bf 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -711,11 +711,6 @@ void blk_mq_debugfs_register(struct request_queue *q) } } -void blk_mq_debugfs_unregister(struct request_queue *q) -{ - q->sched_debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -746,6 +741,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -773,6 +770,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. @@ -790,6 +789,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; } @@ -811,6 +812,10 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id) void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { + lockdep_assert_held(&rqos->q->debugfs_mutex); + + if (!rqos->q->debugfs_dir) + return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; } @@ -820,6 +825,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); + lockdep_assert_held(&q->debugfs_mutex); + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) return; @@ -833,17 +840,13 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ - debugfs_remove_recursive(q->rqos_debugfs_dir); - q->rqos_debugfs_dir = NULL; -} - void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent debugfs directory has not been created yet, return; * We will be called again later on with appropriate parent debugfs @@ -863,6 +866,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 69918f4170d694de3ea17bf2dd20c591b0031aa5..9c7d4b6117d41ed343dab8c20b1859da2b9c6f60 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -21,7 +21,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); void blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -36,16 +35,11 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else static inline void blk_mq_debugfs_register(struct request_queue *q) { } -static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { @@ -87,10 +81,6 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { } - -static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ -} #endif #ifdef CONFIG_BLK_DEBUG_FS_ZONED diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 9e56a69422b655ce433fd93cf534a9d0980e068d..a4f7c101b53b28cbeceb4edac405301bf209e35e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -564,6 +564,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) int ret; if (!e) { + blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q); q->elevator = NULL; q->nr_requests = q->tag_set->queue_depth; return 0; @@ -593,7 +594,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err_free_map_and_rqs; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { @@ -606,7 +609,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } return 0; @@ -647,14 +652,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int flags = 0; queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); diff --git a/block/blk-mq.c b/block/blk-mq.c index e9bf950983c71cb979a6dbb61c1b25cd20200ea5..93d9d60980fb524934817ca48b37a3174e882bb1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -579,6 +579,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, if (!blk_mq_hw_queue_mapped(data.hctx)) goto out_queue_exit; cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + goto out_queue_exit; data.ctx = __blk_mq_get_ctx(q, cpu); if (!q->elevator) @@ -2140,20 +2142,6 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) } EXPORT_SYMBOL(blk_mq_run_hw_queue); -/* - * Is the request queue handled by an IO scheduler that does not respect - * hardware queues when dispatching? - */ -static bool blk_mq_has_sqsched(struct request_queue *q) -{ - struct elevator_queue *e = q->elevator; - - if (e && e->type->ops.dispatch_request && - !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE)) - return true; - return false; -} - /* * Return prefered queue to dispatch from (if any) for non-mq aware IO * scheduler. @@ -2186,7 +2174,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async) unsigned long i; sq_hctx = NULL; - if (blk_mq_has_sqsched(q)) + if (blk_queue_sq_sched(q)) sq_hctx = blk_mq_get_sq_hctx(q); queue_for_each_hw_ctx(q, hctx, i) { if (blk_mq_hctx_stopped(hctx)) @@ -2214,7 +2202,7 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) unsigned long i; sq_hctx = NULL; - if (blk_mq_has_sqsched(q)) + if (blk_queue_sq_sched(q)) sq_hctx = blk_mq_get_sq_hctx(q); queue_for_each_hw_ctx(q, hctx, i) { if (blk_mq_hctx_stopped(hctx)) @@ -2777,15 +2765,20 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, return NULL; } - rq_qos_throttle(q, *bio); - if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type) return NULL; if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) return NULL; - rq->cmd_flags = (*bio)->bi_opf; + /* + * If any qos ->throttle() end up blocking, we will have flushed the + * plug and hence killed the cached_rq list as well. Pop this entry + * before we throttle. + */ plug->cached_rq = rq_list_next(rq); + rq_qos_throttle(q, *bio); + + rq->cmd_flags = (*bio)->bi_opf; INIT_LIST_HEAD(&rq->queuelist); return rq; } @@ -3443,8 +3436,9 @@ static void blk_mq_exit_hctx(struct request_queue *q, if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); - blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], - set->queue_depth, flush_rq); + if (blk_queue_init_done(q)) + blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], + set->queue_depth, flush_rq); if (set->ops->exit_request) set->ops->exit_request(set, flush_rq, hctx_idx); @@ -4438,12 +4432,14 @@ static bool blk_mq_elv_switch_none(struct list_head *head, if (!qe) return false; + /* q->elevator needs protection from ->sysfs_lock */ + mutex_lock(&q->sysfs_lock); + INIT_LIST_HEAD(&qe->node); qe->q = q; qe->type = q->elevator->type; list_add(&qe->node, head); - mutex_lock(&q->sysfs_lock); /* * After elevator_switch_mq, the previous elevator_queue will be * released by elevator_release. The reference of the io scheduler diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc759194126a645bea59eef2b841603961..d3a75693adbf4ddf9ffb94b07d4302ffcd2ea0cb 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,8 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { - blk_mq_debugfs_unregister_queue_rqos(q); - while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 68267007da1c673efd721a559a7232b5014c69ff..0e46052b018a45222190d3084ec36bc48dcd67db 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -104,8 +104,11 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); - if (rqos->ops->debugfs_attrs) + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } } static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) @@ -129,7 +132,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 88bd41d4cb59338d987908fac356d92f5621b81d..9b905e9443e49606e09311757985eddeebc6cbef 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -779,14 +779,6 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - blk_trace_shutdown(q); - mutex_lock(&q->debugfs_mutex); - debugfs_remove_recursive(q->debugfs_dir); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split); if (blk_queue_has_srcu(q)) @@ -836,17 +828,16 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } + if (queue_is_mq(q)) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) { - __blk_mq_register_dev(dev, q); + if (queue_is_mq(q)) blk_mq_debugfs_register(q); - } - - mutex_lock(&q->sysfs_lock); + mutex_unlock(&q->debugfs_mutex); ret = disk_register_independent_access_ranges(disk, NULL); if (ret) @@ -948,8 +939,15 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - mutex_unlock(&q->sysfs_dir_lock); + mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; + q->rqos_debugfs_dir = NULL; + mutex_unlock(&q->debugfs_mutex); + kobject_put(&disk_to_dev(disk)->kobj); } diff --git a/block/genhd.c b/block/genhd.c index 27205ae47d593b35076630d514be6baa299055af..278227ba1d531a561a45ef57f86b684edc6c138e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -623,6 +623,7 @@ void del_gendisk(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(q); + blk_mq_freeze_queue_wait(q); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -646,12 +647,21 @@ void del_gendisk(struct gendisk *disk) pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); - blk_mq_freeze_queue_wait(q); - blk_throtl_cancel_bios(disk->queue); blk_sync_queue(q); blk_flush_integrity(); + blk_mq_cancel_work_sync(q); + + blk_mq_quiesce_queue(q); + if (q->elevator) { + mutex_lock(&q->sysfs_lock); + elevator_exit(q); + mutex_unlock(&q->sysfs_lock); + } + rq_qos_exit(q); + blk_mq_unquiesce_queue(q); + /* * Allow using passthrough request again after the queue is torn down. */ @@ -1120,31 +1130,6 @@ static const struct attribute_group *disk_attr_groups[] = { NULL }; -static void disk_release_mq(struct request_queue *q) -{ - blk_mq_cancel_work_sync(q); - - /* - * There can't be any non non-passthrough bios in flight here, but - * requests stay around longer, including passthrough ones so we - * still need to freeze the queue here. - */ - blk_mq_freeze_queue(q); - - /* - * Since the I/O scheduler exit code may access cgroup information, - * perform I/O scheduler exit before disassociating from the block - * cgroup controller. - */ - if (q->elevator) { - mutex_lock(&q->sysfs_lock); - elevator_exit(q); - mutex_unlock(&q->sysfs_lock); - } - rq_qos_exit(q); - __blk_mq_unfreeze_queue(q, true); -} - /** * disk_release - releases all allocated resources of the gendisk * @dev: the device representing this disk @@ -1166,9 +1151,6 @@ static void disk_release(struct device *dev) might_sleep(); WARN_ON_ONCE(disk_live(disk)); - if (queue_is_mq(disk->queue)) - disk_release_mq(disk->queue); - blkcg_exit_queue(disk->queue); disk_release_events(disk); diff --git a/block/holder.c b/block/holder.c index 8d750281a1cd958f9686151f940a27c1e65c7be5..5283bc804cc14c3882f1dee28a90f3a0e45ce6e1 100644 --- a/block/holder.c +++ b/block/holder.c @@ -79,10 +79,6 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) WARN_ON_ONCE(!bdev->bd_holder); - /* FIXME: remove the following once add_disk() handles errors */ - if (WARN_ON(!bdev->bd_holder_dir)) - goto out_unlock; - holder = bd_find_holder_disk(bdev, disk); if (holder) { holder->refcnt++; diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 70ff2a599ef6168423d2c6edac18889279168099..8f7c745b4a57c650e14a031034e31e7a285b8763 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -421,6 +421,8 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) blk_stat_enable_accounting(q); + blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q); + eq->elevator_data = kqd; q->elevator = eq; @@ -1033,7 +1035,6 @@ static struct elevator_type kyber_sched = { #endif .elevator_attrs = kyber_sched_attrs, .elevator_name = "kyber", - .elevator_features = ELEVATOR_F_MQ_AWARE, .elevator_owner = THIS_MODULE, }; diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 6ed602b2f80a5904892717bd2bbe19203045260a..1a9e835e816cd96d7494069d1f3767b5b29b502e 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -642,6 +642,9 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) spin_lock_init(&dd->lock); spin_lock_init(&dd->zone_lock); + /* We dispatch from request queue wide instead of hw queue */ + blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + q->elevator = eq; return 0; diff --git a/certs/.gitignore b/certs/.gitignore index 56637aceaf81c62cfe63cfe6a10fe7e82835266b..cec5465f31c1c43f7395091902a70b1184c62e63 100644 --- a/certs/.gitignore +++ b/certs/.gitignore @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -/blacklist_hashes_checked +/blacklist_hash_list /extract-cert /x509_certificate_list /x509_revocation_list diff --git a/certs/Makefile b/certs/Makefile index cb1a9da3fc5816c6c31d198f8e87fa8072aefe0d..88a73b28d254a2da623e3a5e645b44f711d2cf0d 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -3,26 +3,26 @@ # Makefile for the linux kernel signature checking certificates. # -obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o -obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o +obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),) -quiet_cmd_check_blacklist_hashes = CHECK $(patsubst "%",%,$(2)) - cmd_check_blacklist_hashes = $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(2); touch $@ -$(eval $(call config_filename,SYSTEM_BLACKLIST_HASH_LIST)) +$(obj)/blacklist_hashes.o: $(obj)/blacklist_hash_list +CFLAGS_blacklist_hashes.o := -I $(obj) -$(obj)/blacklist_hashes.o: $(obj)/blacklist_hashes_checked +quiet_cmd_check_and_copy_blacklist_hash_list = GEN $@ + cmd_check_and_copy_blacklist_hash_list = \ + $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) >&2; \ + cat $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) > $@ -CFLAGS_blacklist_hashes.o += -I$(srctree) - -targets += blacklist_hashes_checked -$(obj)/blacklist_hashes_checked: $(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(SYSTEM_BLACKLIST_HASH_LIST_FILENAME) scripts/check-blacklist-hashes.awk FORCE - $(call if_changed,check_blacklist_hashes,$(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(CONFIG_SYSTEM_BLACKLIST_HASH_LIST)) +$(obj)/blacklist_hash_list: $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) FORCE + $(call if_changed,check_and_copy_blacklist_hash_list) obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o else obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o endif +targets += blacklist_hash_list quiet_cmd_extract_certs = CERT $@ cmd_extract_certs = $(obj)/extract-cert $(extract-cert-in) $@ @@ -33,7 +33,7 @@ $(obj)/system_certificates.o: $(obj)/x509_certificate_list $(obj)/x509_certificate_list: $(CONFIG_SYSTEM_TRUSTED_KEYS) $(obj)/extract-cert FORCE $(call if_changed,extract_certs) -targets += x509_certificate_list blacklist_hashes_checked +targets += x509_certificate_list # If module signing is requested, say by allyesconfig, but a key has not been # supplied, then one will need to be generated to make sure the build does not diff --git a/certs/blacklist.c b/certs/blacklist.c index 25094ea7360078946a27ddf6073ff1e87088d569..41f10601cc724f59ccf9d94468383b2b092fbbf9 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -15,10 +15,9 @@ #include #include #include -#include +#include #include #include "blacklist.h" -#include "common.h" /* * According to crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo(), @@ -365,8 +364,9 @@ static __init int load_revocation_certificate_list(void) if (revocation_certificate_list_size) pr_notice("Loading compiled-in revocation X.509 certificates\n"); - return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size, - blacklist_keyring); + return x509_load_certificate_list(revocation_certificate_list, + revocation_certificate_list_size, + blacklist_keyring); } late_initcall(load_revocation_certificate_list); #endif diff --git a/certs/blacklist_hashes.c b/certs/blacklist_hashes.c index 344892337be0797cc713877ad64014b8cacaec20..86d66fe1134899479caab9c38824fe10d4b7d133 100644 --- a/certs/blacklist_hashes.c +++ b/certs/blacklist_hashes.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "blacklist.h" -const char __initdata *const blacklist_hashes[] = { -#include CONFIG_SYSTEM_BLACKLIST_HASH_LIST +const char __initconst *const blacklist_hashes[] = { +#include "blacklist_hash_list" , NULL }; diff --git a/certs/common.h b/certs/common.h deleted file mode 100644 index abdb5795936b76f6ed5f958eec9b0918a7cac0d6..0000000000000000000000000000000000000000 --- a/certs/common.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef _CERT_COMMON_H -#define _CERT_COMMON_H - -int load_certificate_list(const u8 cert_list[], const unsigned long list_size, - const struct key *keyring); - -#endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 05b66ce9d1c9ed1b6a0fc2abc81ac22fca452152..5042cc54fa5ec68d71d54185f438df3959479865 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -16,7 +16,6 @@ #include #include #include -#include "common.h" static struct key *builtin_trusted_keys; #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING @@ -183,7 +182,8 @@ __init int load_module_cert(struct key *keyring) pr_notice("Loading compiled-in module X.509 certificates\n"); - return load_certificate_list(system_certificate_list, module_cert_size, keyring); + return x509_load_certificate_list(system_certificate_list, + module_cert_size, keyring); } /* @@ -204,7 +204,7 @@ static __init int load_system_certificate_list(void) size = system_certificate_list_size - module_cert_size; #endif - return load_certificate_list(p, size, builtin_trusted_keys); + return x509_load_certificate_list(p, size, builtin_trusted_keys); } late_initcall(load_system_certificate_list); diff --git a/crypto/Kconfig b/crypto/Kconfig index 19197469cfab3d7c6d5c10a53afbf239c88b0044..7b81685b56550f9f87623cfef298e8ae378d07e6 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -15,6 +15,7 @@ source "crypto/async_tx/Kconfig" # menuconfig CRYPTO tristate "Cryptographic API" + select LIB_MEMNEQ help This option provides the core Cryptographic API. @@ -665,6 +666,18 @@ config CRYPTO_CRC32_MIPS CRC32c and CRC32 CRC algorithms implemented using mips crypto instructions, when available. +config CRYPTO_CRC32_S390 + tristate "CRC-32 algorithms" + depends on S390 + select CRYPTO_HASH + select CRC32 + help + Select this option if you want to use hardware accelerated + implementations of CRC algorithms. With this option, you + can optimize the computation of CRC-32 (IEEE 802.3 Ethernet) + and CRC-32C (Castagnoli). + + It is available with IBM z13 or later. config CRYPTO_XXHASH tristate "xxHash hash algorithm" @@ -897,6 +910,16 @@ config CRYPTO_SHA512_SSSE3 Extensions version 1 (AVX1), or Advanced Vector Extensions version 2 (AVX2) instructions, when available. +config CRYPTO_SHA512_S390 + tristate "SHA384 and SHA512 digest algorithm" + depends on S390 + select CRYPTO_HASH + help + This is the s390 hardware accelerated implementation of the + SHA512 secure hash standard. + + It is available as of z10. + config CRYPTO_SHA1_OCTEON tristate "SHA1 digest algorithm (OCTEON)" depends on CPU_CAVIUM_OCTEON @@ -929,6 +952,16 @@ config CRYPTO_SHA1_PPC_SPE SHA-1 secure hash standard (DFIPS 180-4) implemented using powerpc SPE SIMD instruction set. +config CRYPTO_SHA1_S390 + tristate "SHA1 digest algorithm" + depends on S390 + select CRYPTO_HASH + help + This is the s390 hardware accelerated implementation of the + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). + + It is available as of z990. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH @@ -969,6 +1002,16 @@ config CRYPTO_SHA256_SPARC64 SHA-256 secure hash standard (DFIPS 180-2) implemented using sparc64 crypto instructions, when available. +config CRYPTO_SHA256_S390 + tristate "SHA256 digest algorithm" + depends on S390 + select CRYPTO_HASH + help + This is the s390 hardware accelerated implementation of the + SHA256 secure hash standard (DFIPS 180-2). + + It is available as of z9. + config CRYPTO_SHA512 tristate "SHA384 and SHA512 digest algorithms" select CRYPTO_HASH @@ -1009,6 +1052,26 @@ config CRYPTO_SHA3 References: http://keccak.noekeon.org/ +config CRYPTO_SHA3_256_S390 + tristate "SHA3_224 and SHA3_256 digest algorithm" + depends on S390 + select CRYPTO_HASH + help + This is the s390 hardware accelerated implementation of the + SHA3_256 secure hash standard. + + It is available as of z14. + +config CRYPTO_SHA3_512_S390 + tristate "SHA3_384 and SHA3_512 digest algorithm" + depends on S390 + select CRYPTO_HASH + help + This is the s390 hardware accelerated implementation of the + SHA3_512 secure hash standard. + + It is available as of z14. + config CRYPTO_SM3 tristate @@ -1069,6 +1132,16 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL This is the x86_64 CLMUL-NI accelerated implementation of GHASH, the hash function used in GCM (Galois/Counter mode). +config CRYPTO_GHASH_S390 + tristate "GHASH hash function" + depends on S390 + select CRYPTO_HASH + help + This is the s390 hardware accelerated implementation of GHASH, + the hash function used in GCM (Galois/Counter mode). + + It is available as of z196. + comment "Ciphers" config CRYPTO_AES @@ -1184,6 +1257,23 @@ config CRYPTO_AES_PPC_SPE architecture specific assembler implementations that work on 1KB tables or 256 bytes S-boxes. +config CRYPTO_AES_S390 + tristate "AES cipher algorithms" + depends on S390 + select CRYPTO_ALGAPI + select CRYPTO_SKCIPHER + help + This is the s390 hardware accelerated implementation of the + AES cipher algorithms (FIPS-197). + + As of z9 the ECB and CBC modes are hardware accelerated + for 128 bit keys. + As of z10 the ECB and CBC modes are hardware accelerated + for all AES key sizes. + As of z196 the CTR mode is hardware accelerated for all AES + key sizes and XTS mode is hardware accelerated for 256 and + 512 bit keys. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" depends on CRYPTO_USER_API_ENABLE_OBSOLETE @@ -1414,6 +1504,19 @@ config CRYPTO_DES3_EDE_X86_64 algorithm are provided; regular processing one input block and one that processes three blocks parallel. +config CRYPTO_DES_S390 + tristate "DES and Triple DES cipher algorithms" + depends on S390 + select CRYPTO_ALGAPI + select CRYPTO_SKCIPHER + select CRYPTO_LIB_DES + help + This is the s390 hardware accelerated implementation of the + DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). + + As of z990 the ECB and CBC mode are hardware accelerated. + As of z196 the CTR mode is hardware accelerated. + config CRYPTO_FCRYPT tristate "FCrypt cipher algorithm" select CRYPTO_ALGAPI @@ -1473,6 +1576,18 @@ config CRYPTO_CHACHA_MIPS select CRYPTO_SKCIPHER select CRYPTO_ARCH_HAVE_LIB_CHACHA +config CRYPTO_CHACHA_S390 + tristate "ChaCha20 stream cipher" + depends on S390 + select CRYPTO_SKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + This is the s390 SIMD implementation of the ChaCha20 stream + cipher (RFC 7539). + + It is available as of z13. + config CRYPTO_SEED tristate "SEED cipher algorithm" depends on CRYPTO_USER_API_ENABLE_OBSOLETE diff --git a/crypto/Makefile b/crypto/Makefile index 43bc33e247d19ffb09613f2678a3743364b84d23..ceaaa9f34145ad316f53e46f69c824bc87b132c8 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_CRYPTO) += crypto.o -crypto-y := api.o cipher.o compress.o memneq.o +crypto-y := api.o cipher.o compress.o obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o obj-$(CONFIG_CRYPTO_FIPS) += fips.o diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig index 460bc5d0a828ce21be17433e367d9a035f4845a5..3df3fe4ed95fa906ce7283a45018878160ed21ce 100644 --- a/crypto/asymmetric_keys/Kconfig +++ b/crypto/asymmetric_keys/Kconfig @@ -75,4 +75,14 @@ config SIGNED_PE_FILE_VERIFICATION This option provides support for verifying the signature(s) on a signed PE binary. +config FIPS_SIGNATURE_SELFTEST + bool "Run FIPS selftests on the X.509+PKCS7 signature verification" + help + This option causes some selftests to be run on the signature + verification code, using some built in data. This is required + for FIPS. + depends on KEYS + depends on ASYMMETRIC_KEY_TYPE + depends on PKCS7_MESSAGE_PARSER + endif # ASYMMETRIC_KEY_TYPE diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile index c38424f55b08d0a7942921e9e9125fbaa3cf660f..0d1fa1b692c6b23ae7508802b68fcf52d6dd9cd7 100644 --- a/crypto/asymmetric_keys/Makefile +++ b/crypto/asymmetric_keys/Makefile @@ -20,7 +20,9 @@ x509_key_parser-y := \ x509.asn1.o \ x509_akid.asn1.o \ x509_cert_parser.o \ + x509_loader.o \ x509_public_key.o +x509_key_parser-$(CONFIG_FIPS_SIGNATURE_SELFTEST) += selftest.o $(obj)/x509_cert_parser.o: \ $(obj)/x509.asn1.h \ diff --git a/crypto/asymmetric_keys/selftest.c b/crypto/asymmetric_keys/selftest.c new file mode 100644 index 0000000000000000000000000000000000000000..fa0bf7f2428495c14146bddafd838d1144aa1999 --- /dev/null +++ b/crypto/asymmetric_keys/selftest.c @@ -0,0 +1,224 @@ +/* Self-testing for signature checking. + * + * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include "x509_parser.h" + +struct certs_test { + const u8 *data; + size_t data_len; + const u8 *pkcs7; + size_t pkcs7_len; +}; + +/* + * Set of X.509 certificates to provide public keys for the tests. These will + * be loaded into a temporary keyring for the duration of the testing. + */ +static const __initconst u8 certs_selftest_keys[] = { + "\x30\x82\x05\x55\x30\x82\x03\x3d\xa0\x03\x02\x01\x02\x02\x14\x73" + "\x98\xea\x98\x2d\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a" + "\xfc\x8c\x0a\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01\x0b" + "\x05\x00\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29\x43" + "\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66" + "\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65\x73" + "\x74\x69\x6e\x67\x20\x6b\x65\x79\x30\x20\x17\x0d\x32\x32\x30\x35" + "\x31\x38\x32\x32\x33\x32\x34\x31\x5a\x18\x0f\x32\x31\x32\x32\x30" + "\x34\x32\x34\x32\x32\x33\x32\x34\x31\x5a\x30\x34\x31\x32\x30\x30" + "\x06\x03\x55\x04\x03\x0c\x29\x43\x65\x72\x74\x69\x66\x69\x63\x61" + "\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63\x61\x74\x69\x6f\x6e\x20" + "\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x6b\x65\x79" + "\x30\x82\x02\x22\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01" + "\x01\x05\x00\x03\x82\x02\x0f\x00\x30\x82\x02\x0a\x02\x82\x02\x01" + "\x00\xcc\xac\x49\xdd\x3b\xca\xb0\x15\x7e\x84\x6a\xb2\x0a\x69\x5f" + "\x1c\x0a\x61\x82\x3b\x4f\x2c\xa3\x95\x2c\x08\x58\x4b\xb1\x5d\x99" + "\xe0\xc3\xc1\x79\xc2\xb3\xeb\xc0\x1e\x6d\x3e\x54\x1d\xbd\xb7\x92" + "\x7b\x4d\xb5\x95\x58\xb2\x52\x2e\xc6\x24\x4b\x71\x63\x80\x32\x77" + "\xa7\x38\x5e\xdb\x72\xae\x6e\x0d\xec\xfb\xb6\x6d\x01\x7f\xe9\x55" + "\x66\xdf\xbf\x1d\x76\x78\x02\x31\xe8\xe5\x07\xf8\xb7\x82\x5c\x0d" + "\xd4\xbb\xfb\xa2\x59\x0d\x2e\x3a\x78\x95\x3a\x8b\x46\x06\x47\x44" + "\x46\xd7\xcd\x06\x6a\x41\x13\xe3\x19\xf6\xbb\x6e\x38\xf4\x83\x01" + "\xa3\xbf\x4a\x39\x4f\xd7\x0a\xe9\x38\xb3\xf5\x94\x14\x4e\xdd\xf7" + "\x43\xfd\x24\xb2\x49\x3c\xa5\xf7\x7a\x7c\xd4\x45\x3d\x97\x75\x68" + "\xf1\xed\x4c\x42\x0b\x70\xca\x85\xf3\xde\xe5\x88\x2c\xc5\xbe\xb6" + "\x97\x34\xba\x24\x02\xcd\x8b\x86\x9f\xa9\x73\xca\x73\xcf\x92\x81" + "\xee\x75\x55\xbb\x18\x67\x5c\xff\x3f\xb5\xdd\x33\x1b\x0c\xe9\x78" + "\xdb\x5c\xcf\xaa\x5c\x43\x42\xdf\x5e\xa9\x6d\xec\xd7\xd7\xff\xe6" + "\xa1\x3a\x92\x1a\xda\xae\xf6\x8c\x6f\x7b\xd5\xb4\x6e\x06\xe9\x8f" + "\xe8\xde\x09\x31\x89\xed\x0e\x11\xa1\xfa\x8a\xe9\xe9\x64\x59\x62" + "\x53\xda\xd1\x70\xbe\x11\xd4\x99\x97\x11\xcf\x99\xde\x0b\x9d\x94" + "\x7e\xaa\xb8\x52\xea\x37\xdb\x90\x7e\x35\xbd\xd9\xfe\x6d\x0a\x48" + "\x70\x28\xdd\xd5\x0d\x7f\x03\x80\x93\x14\x23\x8f\xb9\x22\xcd\x7c" + "\x29\xfe\xf1\x72\xb5\x5c\x0b\x12\xcf\x9c\x15\xf6\x11\x4c\x7a\x45" + "\x25\x8c\x45\x0a\x34\xac\x2d\x9a\x81\xca\x0b\x13\x22\xcd\xeb\x1a" + "\x38\x88\x18\x97\x96\x08\x81\xaa\xcc\x8f\x0f\x8a\x32\x7b\x76\x68" + "\x03\x68\x43\xbf\x11\xba\x55\x60\xfd\x80\x1c\x0d\x9b\x69\xb6\x09" + "\x72\xbc\x0f\x41\x2f\x07\x82\xc6\xe3\xb2\x13\x91\xc4\x6d\x14\x95" + "\x31\xbe\x19\xbd\xbc\xed\xe1\x4c\x74\xa2\xe0\x78\x0b\xbb\x94\xec" + "\x4c\x53\x3a\xa2\xb5\x84\x1d\x4b\x65\x7e\xdc\xf7\xdb\x36\x7d\xbe" + "\x9e\x3b\x36\x66\x42\x66\x76\x35\xbf\xbe\xf0\xc1\x3c\x7c\xe9\x42" + "\x5c\x24\x53\x03\x05\xa8\x67\x24\x50\x02\x75\xff\x24\x46\x3b\x35" + "\x89\x76\xe6\x70\xda\xc5\x51\x8c\x9a\xe5\x05\xb0\x0b\xd0\x2d\xd4" + "\x7d\x57\x75\x94\x6b\xf9\x0a\xad\x0e\x41\x00\x15\xd0\x4f\xc0\x7f" + "\x90\x2d\x18\x48\x8f\x28\xfe\x5d\xa7\xcd\x99\x9e\xbd\x02\x6c\x8a" + "\x31\xf3\x1c\xc7\x4b\xe6\x93\xcd\x42\xa2\xe4\x68\x10\x47\x9d\xfc" + "\x21\x02\x03\x01\x00\x01\xa3\x5d\x30\x5b\x30\x0c\x06\x03\x55\x1d" + "\x13\x01\x01\xff\x04\x02\x30\x00\x30\x0b\x06\x03\x55\x1d\x0f\x04" + "\x04\x03\x02\x07\x80\x30\x1d\x06\x03\x55\x1d\x0e\x04\x16\x04\x14" + "\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88\x17" + "\x51\x8f\xe3\xdb\x30\x1f\x06\x03\x55\x1d\x23\x04\x18\x30\x16\x80" + "\x14\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88" + "\x17\x51\x8f\xe3\xdb\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01" + "\x01\x0b\x05\x00\x03\x82\x02\x01\x00\xc0\x2e\x12\x41\x7b\x73\x85" + "\x16\xc8\xdb\x86\x79\xe8\xf5\xcd\x44\xf4\xc6\xe2\x81\x23\x5e\x47" + "\xcb\xab\x25\xf1\x1e\x58\x3e\x31\x7f\x78\xad\x85\xeb\xfe\x14\x88" + "\x60\xf7\x7f\xd2\x26\xa2\xf4\x98\x2a\xfd\xba\x05\x0c\x20\x33\x12" + "\xcc\x4d\x14\x61\x64\x81\x93\xd3\x33\xed\xc8\xff\xf1\x78\xcc\x5f" + "\x51\x9f\x09\xd7\xbe\x0d\x5c\x74\xfd\x9b\xdf\x52\x4a\xc9\xa8\x71" + "\x25\x33\x04\x10\x67\x36\xd0\xb3\x0b\xc9\xa1\x40\x72\xae\x41\x7b" + "\x68\xe6\xe4\x7b\xd0\x28\xf7\x6d\xe7\x3f\x50\xfc\x91\x7c\x91\x56" + "\xd4\xdf\xa6\xbb\xe8\x4d\x1b\x58\xaa\x28\xfa\xc1\x19\xeb\x11\x2f" + "\x24\x8b\x7c\xc5\xa9\x86\x26\xaa\x6e\xb7\x9b\xd5\xf8\x06\xfb\x02" + "\x52\x7b\x9c\x9e\xa1\xe0\x07\x8b\x5e\xe4\xb8\x55\x29\xf6\x48\x52" + "\x1c\x1b\x54\x2d\x46\xd8\xe5\x71\xb9\x60\xd1\x45\xb5\x92\x89\x8a" + "\x63\x58\x2a\xb3\xc6\xb2\x76\xe2\x3c\x82\x59\x04\xae\x5a\xc4\x99" + "\x7b\x2e\x4b\x46\x57\xb8\x29\x24\xb2\xfd\xee\x2c\x0d\xa4\x83\xfa" + "\x65\x2a\x07\x35\x8b\x97\xcf\xbd\x96\x2e\xd1\x7e\x6c\xc2\x1e\x87" + "\xb6\x6c\x76\x65\xb5\xb2\x62\xda\x8b\xe9\x73\xe3\xdb\x33\xdd\x13" + "\x3a\x17\x63\x6a\x76\xde\x8d\x8f\xe0\x47\x61\x28\x3a\x83\xff\x8f" + "\xe7\xc7\xe0\x4a\xa3\xe5\x07\xcf\xe9\x8c\x35\x35\x2e\xe7\x80\x66" + "\x31\xbf\x91\x58\x0a\xe1\x25\x3d\x38\xd3\xa4\xf0\x59\x34\x47\x07" + "\x62\x0f\xbe\x30\xdd\x81\x88\x58\xf0\x28\xb0\x96\xe5\x82\xf8\x05" + "\xb7\x13\x01\xbc\xfa\xc6\x1f\x86\x72\xcc\xf9\xee\x8e\xd9\xd6\x04" + "\x8c\x24\x6c\xbf\x0f\x5d\x37\x39\xcf\x45\xc1\x93\x3a\xd2\xed\x5c" + "\x58\x79\x74\x86\x62\x30\x7e\x8e\xbb\xdd\x7a\xa9\xed\xca\x40\xcb" + "\x62\x47\xf4\xb4\x9f\x52\x7f\x72\x63\xa8\xf0\x2b\xaf\x45\x2a\x48" + "\x19\x6d\xe3\xfb\xf9\x19\x66\x69\xc8\xcc\x62\x87\x6c\x53\x2b\x2d" + "\x6e\x90\x6c\x54\x3a\x82\x25\x41\xcb\x18\x6a\xa4\x22\xa8\xa1\xc4" + "\x47\xd7\x81\x00\x1c\x15\x51\x0f\x1a\xaf\xef\x9f\xa6\x61\x8c\xbd" + "\x6b\x8b\xed\xe6\xac\x0e\xb6\x3a\x4c\x92\xe6\x0f\x91\x0a\x0f\x71" + "\xc7\xa0\xb9\x0d\x3a\x17\x5a\x6f\x35\xc8\xe7\x50\x4f\x46\xe8\x70" + "\x60\x48\x06\x82\x8b\x66\x58\xe6\x73\x91\x9c\x12\x3d\x35\x8e\x46" + "\xad\x5a\xf5\xb3\xdb\x69\x21\x04\xfd\xd3\x1c\xdf\x94\x9d\x56\xb0" + "\x0a\xd1\x95\x76\x8d\xec\x9e\xdd\x0b\x15\x97\x64\xad\xe5\xf2\x62" + "\x02\xfc\x9e\x5f\x56\x42\x39\x05\xb3" +}; + +/* + * Signed data and detached signature blobs that form the verification tests. + */ +static const __initconst u8 certs_selftest_1_data[] = { + "\x54\x68\x69\x73\x20\x69\x73\x20\x73\x6f\x6d\x65\x20\x74\x65\x73" + "\x74\x20\x64\x61\x74\x61\x20\x75\x73\x65\x64\x20\x66\x6f\x72\x20" + "\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x63\x65\x72" + "\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63" + "\x61\x74\x69\x6f\x6e\x2e\x0a" +}; + +static const __initconst u8 certs_selftest_1_pkcs7[] = { + "\x30\x82\x02\xab\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x07\x02\xa0" + "\x82\x02\x9c\x30\x82\x02\x98\x02\x01\x01\x31\x0d\x30\x0b\x06\x09" + "\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0b\x06\x09\x2a\x86\x48" + "\x86\xf7\x0d\x01\x07\x01\x31\x82\x02\x75\x30\x82\x02\x71\x02\x01" + "\x01\x30\x4c\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29" + "\x43\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69" + "\x66\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65" + "\x73\x74\x69\x6e\x67\x20\x6b\x65\x79\x02\x14\x73\x98\xea\x98\x2d" + "\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a\xfc\x8c\x0a\x30" + "\x0b\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0d\x06\x09" + "\x2a\x86\x48\x86\xf7\x0d\x01\x01\x01\x05\x00\x04\x82\x02\x00\xac" + "\xb0\xf2\x07\xd6\x99\x6d\xc0\xc0\xd9\x8d\x31\x0d\x7e\x04\xeb\xc3" + "\x88\x90\xc4\x58\x46\xd4\xe2\xa0\xa3\x25\xe3\x04\x50\x37\x85\x8c" + "\x91\xc6\xfc\xc5\xd4\x92\xfd\x05\xd8\xb8\xa3\xb8\xba\x89\x13\x00" + "\x88\x79\x99\x51\x6b\x5b\x28\x31\xc0\xb3\x1b\x7a\x68\x2c\x00\xdb" + "\x4b\x46\x11\xf3\xfa\x50\x8e\x19\x89\xa2\x4c\xda\x4c\x89\x01\x11" + "\x89\xee\xd3\xc8\xc1\xe7\xa7\xf6\xb2\xa2\xf8\x65\xb8\x35\x20\x33" + "\xba\x12\x62\xd5\xbd\xaa\x71\xe5\x5b\xc0\x6a\x32\xff\x6a\x2e\x23" + "\xef\x2b\xb6\x58\xb1\xfb\x5f\x82\x34\x40\x6d\x9f\xbc\x27\xac\x37" + "\x23\x99\xcf\x7d\x20\xb2\x39\x01\xc0\x12\xce\xd7\x5d\x2f\xb6\xab" + "\xb5\x56\x4f\xef\xf4\x72\x07\x58\x65\xa9\xeb\x1f\x75\x1c\x5f\x0c" + "\x88\xe0\xa4\xe2\xcd\x73\x2b\x9e\xb2\x05\x7e\x12\xf8\xd0\x66\x41" + "\xcc\x12\x63\xd4\xd6\xac\x9b\x1d\x14\x77\x8d\x1c\x57\xd5\x27\xc6" + "\x49\xa2\x41\x43\xf3\x59\x29\xe5\xcb\xd1\x75\xbc\x3a\x97\x2a\x72" + "\x22\x66\xc5\x3b\xc1\xba\xfc\x53\x18\x98\xe2\x21\x64\xc6\x52\x87" + "\x13\xd5\x7c\x42\xe8\xfb\x9c\x9a\x45\x32\xd5\xa5\x22\x62\x9d\xd4" + "\xcb\xa4\xfa\x77\xbb\x50\x24\x0b\x8b\x88\x99\x15\x56\xa9\x1e\x92" + "\xbf\x5d\x94\x77\xb6\xf1\x67\x01\x60\x06\x58\x5c\xdf\x18\x52\x79" + "\x37\x30\x93\x7d\x87\x04\xf1\xe0\x55\x59\x52\xf3\xc2\xb1\x1c\x5b" + "\x12\x7c\x49\x87\xfb\xf7\xed\xdd\x95\x71\xec\x4b\x1a\x85\x08\xb0" + "\xa0\x36\xc4\x7b\xab\x40\xe0\xf1\x98\xcc\xaf\x19\x40\x8f\x47\x6f" + "\xf0\x6c\x84\x29\x7f\x7f\x04\x46\xcb\x08\x0f\xe0\xc1\xc9\x70\x6e" + "\x95\x3b\xa4\xbc\x29\x2b\x53\x67\x45\x1b\x0d\xbc\x13\xa5\x76\x31" + "\xaf\xb9\xd0\xe0\x60\x12\xd2\xf4\xb7\x7c\x58\x7e\xf6\x2d\xbb\x24" + "\x14\x5a\x20\x24\xa8\x12\xdf\x25\xbd\x42\xce\x96\x7c\x2e\xba\x14" + "\x1b\x81\x9f\x18\x45\xa4\xc6\x70\x3e\x0e\xf0\xd3\x7b\x9c\x10\xbe" + "\xb8\x7a\x89\xc5\x9e\xd9\x97\xdf\xd7\xe7\xc6\x1d\xc0\x20\x6c\xb8" + "\x1e\x3a\x63\xb8\x39\x8e\x8e\x62\xd5\xd2\xb4\xcd\xff\x46\xfc\x8e" + "\xec\x07\x35\x0c\xff\xb0\x05\xe6\xf4\xe5\xfe\xa2\xe3\x0a\xe6\x36" + "\xa7\x4a\x7e\x62\x1d\xc4\x50\x39\x35\x4e\x28\xcb\x4a\xfb\x9d\xdb" + "\xdd\x23\xd6\x53\xb1\x74\x77\x12\xf7\x9c\xf0\x9a\x6b\xf7\xa9\x64" + "\x2d\x86\x21\x2a\xcf\xc6\x54\xf5\xc9\xad\xfa\xb5\x12\xb4\xf3\x51" + "\x77\x55\x3c\x6f\x0c\x32\xd3\x8c\x44\x39\x71\x25\xfe\x96\xd2" +}; + +/* + * List of tests to be run. + */ +#define TEST(data, pkcs7) { data, sizeof(data) - 1, pkcs7, sizeof(pkcs7) - 1 } +static const struct certs_test certs_tests[] __initconst = { + TEST(certs_selftest_1_data, certs_selftest_1_pkcs7), +}; + +int __init fips_signature_selftest(void) +{ + struct key *keyring; + int ret, i; + + pr_notice("Running certificate verification selftests\n"); + + keyring = keyring_alloc(".certs_selftest", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ | + KEY_USR_SEARCH, + KEY_ALLOC_NOT_IN_QUOTA, + NULL, NULL); + if (IS_ERR(keyring)) + panic("Can't allocate certs selftest keyring: %ld\n", + PTR_ERR(keyring)); + + ret = x509_load_certificate_list(certs_selftest_keys, + sizeof(certs_selftest_keys) - 1, keyring); + if (ret < 0) + panic("Can't allocate certs selftest keyring: %d\n", ret); + + for (i = 0; i < ARRAY_SIZE(certs_tests); i++) { + const struct certs_test *test = &certs_tests[i]; + struct pkcs7_message *pkcs7; + + pkcs7 = pkcs7_parse_message(test->pkcs7, test->pkcs7_len); + if (IS_ERR(pkcs7)) + panic("Certs selftest %d: pkcs7_parse_message() = %d\n", i, ret); + + pkcs7_supply_detached_data(pkcs7, test->data, test->data_len); + + ret = pkcs7_verify(pkcs7, VERIFYING_MODULE_SIGNATURE); + if (ret < 0) + panic("Certs selftest %d: pkcs7_verify() = %d\n", i, ret); + + ret = pkcs7_validate_trust(pkcs7, keyring); + if (ret < 0) + panic("Certs selftest %d: pkcs7_validate_trust() = %d\n", i, ret); + + pkcs7_free_message(pkcs7); + } + + key_put(keyring); + return 0; +} diff --git a/certs/common.c b/crypto/asymmetric_keys/x509_loader.c similarity index 87% rename from certs/common.c rename to crypto/asymmetric_keys/x509_loader.c index 16a220887a53e8d74cb80c4e9f534a013e313c14..1bc169dee22e0e2145d78b4892d111ba78aac12d 100644 --- a/certs/common.c +++ b/crypto/asymmetric_keys/x509_loader.c @@ -2,11 +2,11 @@ #include #include -#include "common.h" +#include -int load_certificate_list(const u8 cert_list[], - const unsigned long list_size, - const struct key *keyring) +int x509_load_certificate_list(const u8 cert_list[], + const unsigned long list_size, + const struct key *keyring) { key_ref_t key; const u8 *p, *end; diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h index 97a886cbe01c3de4271eddbe6e28cf9ff7432389..a299c9c56f409ef4600f5b1c8d6270f0084b16bc 100644 --- a/crypto/asymmetric_keys/x509_parser.h +++ b/crypto/asymmetric_keys/x509_parser.h @@ -40,6 +40,15 @@ struct x509_certificate { bool blacklisted; }; +/* + * selftest.c + */ +#ifdef CONFIG_FIPS_SIGNATURE_SELFTEST +extern int __init fips_signature_selftest(void); +#else +static inline int fips_signature_selftest(void) { return 0; } +#endif + /* * x509_cert_parser.c */ diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 77ed4e93ad56ff4055b4909487d1fb0f52e7f35c..0b4943a4592b7f9e2b4c69a2f249cc4ae5dfe103 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -244,9 +244,15 @@ static struct asymmetric_key_parser x509_key_parser = { /* * Module stuff */ +extern int __init certs_selftest(void); static int __init x509_key_init(void) { - return register_asymmetric_key_parser(&x509_key_parser); + int ret; + + ret = register_asymmetric_key_parser(&x509_key_parser); + if (ret < 0) + return ret; + return fips_signature_selftest(); } static void __exit x509_key_exit(void) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index e07782b1fbb68eb6ca3d7a7552387266519f1d83..eaea733b368aeb7cd6ee44a80fce5b1f64d1d841 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,6 +73,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); +static bool may_report_brightness_keys; static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); @@ -1222,6 +1223,9 @@ acpi_video_bus_get_one_device(struct acpi_device *device, acpi_video_device_bind(video, data); acpi_video_device_find_cap(data); + if (data->cap._BCM && data->cap._BCL) + may_report_brightness_keys = true; + mutex_lock(&video->device_list_lock); list_add_tail(&data->entry, &video->video_device_list); mutex_unlock(&video->device_list_lock); @@ -1689,6 +1693,9 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data) break; } + if (keycode) + may_report_brightness_keys = true; + acpi_notifier_call_chain(device, event, 0); if (keycode && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS)) { @@ -2249,6 +2256,7 @@ void acpi_video_unregister(void) if (register_count) { acpi_bus_unregister_driver(&acpi_video_bus); register_count = 0; + may_report_brightness_keys = false; } mutex_unlock(®ister_count_mutex); } @@ -2270,13 +2278,7 @@ void acpi_video_unregister_backlight(void) bool acpi_video_handles_brightness_key_presses(void) { - bool have_video_busses; - - mutex_lock(&video_list_lock); - have_video_busses = !list_empty(&video_bus_head); - mutex_unlock(&video_list_lock); - - return have_video_busses && + return may_report_brightness_keys && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS); } EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 86fa61a21826c3f25d7eef67255aa065da402bb6..e2db1bdd9dd2580d73a86e0552daae5f8f4ed4fb 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -298,7 +298,7 @@ EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed); bool osc_sb_native_usb4_support_confirmed; EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed); -bool osc_sb_cppc_not_supported; +bool osc_sb_cppc2_support_acked; static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; static void acpi_bus_osc_negotiate_platform_control(void) @@ -358,11 +358,6 @@ static void acpi_bus_osc_negotiate_platform_control(void) return; } -#ifdef CONFIG_ACPI_CPPC_LIB - osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] & - (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT)); -#endif - /* * Now run _OSC again with query flag clear and with the caps * supported by both the OS and the platform. @@ -376,6 +371,10 @@ static void acpi_bus_osc_negotiate_platform_control(void) capbuf_ret = context.ret.pointer; if (context.ret.length > OSC_SUPPORT_DWORD) { +#ifdef CONFIG_ACPI_CPPC_LIB + osc_sb_cppc2_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPCV2_SUPPORT; +#endif + osc_sb_apei_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT; osc_pc_lpi_support_confirmed = diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 903528f7e187e4bc1698e4d24ed5d2693f080527..6ff1901d7d4360f901f618bfeb90fcdae5367307 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -577,6 +577,19 @@ bool __weak cpc_ffh_supported(void) return false; } +/** + * cpc_supported_by_cpu() - check if CPPC is supported by CPU + * + * Check if the architectural support for CPPC is present even + * if the _OSC hasn't prescribed it + * + * Return: true for supported, false for not supported + */ +bool __weak cpc_supported_by_cpu(void) +{ + return false; +} + /** * pcc_data_alloc() - Allocate the pcc_data memory for pcc subspace * @@ -684,8 +697,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) acpi_status status; int ret = -ENODATA; - if (osc_sb_cppc_not_supported) - return -ENODEV; + if (!osc_sb_cppc2_support_acked) { + pr_debug("CPPC v2 _OSC not acked\n"); + if (!cpc_supported_by_cpu()) + return -ENODEV; + } /* Parse the ACPI _CPC table for this CPU. */ status = acpi_evaluate_object_typed(handle, "_CPC", NULL, &output, diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 0e3ed5eb367b2846f1b36541fb8bf731d8de33ae..0cb20324da164a5bd1086148692c350d995c6c9e 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -493,13 +493,8 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) goto skip_probe; ret = amba_read_periphid(dev); - if (ret) { - if (ret != -EPROBE_DEFER) { - amba_device_put(dev); - goto err_out; - } + if (ret) goto err_release; - } skip_probe: ret = device_add(&dev->dev); @@ -546,6 +541,7 @@ static int amba_deferred_retry(void) continue; list_del_init(&ddev->node); + amba_device_put(ddev->dev); kfree(ddev); } diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c index 6725931f3c35f571c09912f871b0d89b4c13c47b..c2c3238ff84bfae1b82c7ec2732c94a2d66a930a 100644 --- a/drivers/ata/pata_cs5535.c +++ b/drivers/ata/pata_cs5535.c @@ -90,7 +90,7 @@ static void cs5535_set_piomode(struct ata_port *ap, struct ata_device *adev) static const u16 pio_cmd_timings[5] = { 0xF7F4, 0x53F3, 0x13F1, 0x5131, 0x1131 }; - u32 reg, dummy; + u32 reg, __maybe_unused dummy; struct ata_device *pair = ata_dev_pair(adev); int mode = adev->pio_mode - XFER_PIO_0; @@ -129,7 +129,7 @@ static void cs5535_set_dmamode(struct ata_port *ap, struct ata_device *adev) static const u32 mwdma_timings[3] = { 0x7F0FFFF3, 0x7F035352, 0x7F024241 }; - u32 reg, dummy; + u32 reg, __maybe_unused dummy; int mode = adev->dma_mode; rdmsr(ATAC_CH0D0_DMA + 2 * adev->devno, reg, dummy); diff --git a/drivers/base/core.c b/drivers/base/core.c index 7cd789c4985d40e956fe554929b66b7dc7dccf11..460d6f163e41bf1c849b3b54ef9ef551b75094f3 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -486,7 +486,18 @@ static void device_link_release_fn(struct work_struct *work) /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + /* + * If supplier_preactivated is set, the link has been dropped between + * the pm_runtime_get_suppliers() and pm_runtime_put_suppliers() calls + * in __driver_probe_device(). In that case, drop the supplier's + * PM-runtime usage counter to remove the reference taken by + * pm_runtime_get_suppliers(). + */ + if (link->supplier_preactivated) + pm_runtime_put_noidle(link->supplier); + + pm_request_idle(link->supplier); put_device(link->consumer); put_device(link->supplier); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 2ef23fce0860c5d780d41c3eb13e51ad6132da26..4c98849577d4ed262292db4ae29d21465e2944fc 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -564,6 +564,18 @@ ssize_t __weak cpu_show_srbds(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -573,6 +585,8 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); +static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); +static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -584,6 +598,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_tsx_async_abort.attr, &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, + &dev_attr_retbleed.attr, NULL }; diff --git a/drivers/base/init.c b/drivers/base/init.c index d8d0fe687111a0e0cec44c388a470df8708bdf44..397eb9880cecb8ed39914880bf64e879a8c94b32 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "base.h" @@ -20,6 +21,7 @@ void __init driver_init(void) { /* These are the core pieces */ + bdi_init(&noop_backing_dev_info); devtmpfs_init(); devices_init(); buses_init(); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 084d67fd55cc8c4ce90299774ba860fbeb1b0bec..bc60c9cd3230821c8b2c34bebbc06b59901b2d24 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -558,7 +558,7 @@ static ssize_t hard_offline_page_store(struct device *dev, if (kstrtoull(buf, 0, &pfn) < 0) return -EINVAL; pfn >>= PAGE_SHIFT; - ret = memory_failure(pfn, 0); + ret = memory_failure(pfn, MF_SW_SIMULATED); if (ret == -EOPNOTSUPP) ret = 0; return ret ? ret : count; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 676dc72d912d1bd6f091584dd0aef331d2bbb1b6..949907e2e24232925b824bcd3f427a7704c45137 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -308,13 +308,10 @@ static int rpm_get_suppliers(struct device *dev) /** * pm_runtime_release_supplier - Drop references to device link's supplier. * @link: Target device link. - * @check_idle: Whether or not to check if the supplier device is idle. * - * Drop all runtime PM references associated with @link to its supplier device - * and if @check_idle is set, check if that device is idle (and so it can be - * suspended). + * Drop all runtime PM references associated with @link to its supplier device. */ -void pm_runtime_release_supplier(struct device_link *link, bool check_idle) +void pm_runtime_release_supplier(struct device_link *link) { struct device *supplier = link->supplier; @@ -327,9 +324,6 @@ void pm_runtime_release_supplier(struct device_link *link, bool check_idle) while (refcount_dec_not_one(&link->rpm_active) && atomic_read(&supplier->power.usage_count) > 0) pm_runtime_put_noidle(supplier); - - if (check_idle) - pm_request_idle(supplier); } static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) @@ -337,8 +331,11 @@ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) - pm_runtime_release_supplier(link, try_to_suspend); + device_links_read_lock_held()) { + pm_runtime_release_supplier(link); + if (try_to_suspend) + pm_request_idle(link->supplier); + } } static void rpm_put_suppliers(struct device *dev) @@ -1771,7 +1768,6 @@ void pm_runtime_get_suppliers(struct device *dev) if (link->flags & DL_FLAG_PM_RUNTIME) { link->supplier_preactivated = true; pm_runtime_get_sync(link->supplier); - refcount_inc(&link->rpm_active); } device_links_read_unlock(idx); @@ -1791,19 +1787,8 @@ void pm_runtime_put_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) if (link->supplier_preactivated) { - bool put; - link->supplier_preactivated = false; - - spin_lock_irq(&dev->power.lock); - - put = pm_runtime_status_suspended(dev) && - refcount_dec_not_one(&link->rpm_active); - - spin_unlock_irq(&dev->power.lock); - - if (put) - pm_runtime_put(link->supplier); + pm_runtime_put(link->supplier); } device_links_read_unlock(idx); @@ -1838,7 +1823,8 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + pm_request_idle(link->supplier); } static bool pm_runtime_need_not_resume(struct device *dev) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 400c7412a7dcf9339b74d7d6f2224aa0b0d8ba68..a6db605707b0007d8b13bbcb891a2a0d4535e24d 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -252,6 +252,7 @@ static void regmap_irq_enable(struct irq_data *data) struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); struct regmap *map = d->map; const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); + unsigned int reg = irq_data->reg_offset / map->reg_stride; unsigned int mask, type; type = irq_data->type.type_falling_val | irq_data->type.type_rising_val; @@ -268,14 +269,14 @@ static void regmap_irq_enable(struct irq_data *data) * at the corresponding offset in regmap_irq_set_type(). */ if (d->chip->type_in_mask && type) - mask = d->type_buf[irq_data->reg_offset / map->reg_stride]; + mask = d->type_buf[reg] & irq_data->mask; else mask = irq_data->mask; if (d->chip->clear_on_unmask) d->clear_status = true; - d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask; + d->mask_buf[reg] &= ~mask; } static void regmap_irq_disable(struct irq_data *data) @@ -386,6 +387,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, subreg = &chip->sub_reg_offsets[b]; for (i = 0; i < subreg->num_regs; i++) { unsigned int offset = subreg->offset[i]; + unsigned int index = offset / map->reg_stride; if (chip->not_fixed_stride) ret = regmap_read(map, @@ -394,7 +396,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, else ret = regmap_read(map, chip->status_base + offset, - &data->status_buf[offset]); + &data->status_buf[index]); if (ret) break; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 2221d98638317a7b1003c5befed3b860f4f94847..c3517ccc315918bbe9c4de099410000bb38688f3 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1880,8 +1880,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, */ bool regmap_can_raw_write(struct regmap *map) { - return map->bus && map->bus->write && map->format.format_val && - map->format.format_reg; + return map->write && map->format.format_val && map->format.format_reg; } EXPORT_SYMBOL_GPL(regmap_can_raw_write); @@ -2155,10 +2154,9 @@ int regmap_noinc_write(struct regmap *map, unsigned int reg, size_t write_len; int ret; - if (!map->bus) - return -EINVAL; - if (!map->bus->write) + if (!map->write) return -ENOTSUPP; + if (val_len % map->format.val_bytes) return -EINVAL; if (!IS_ALIGNED(reg, map->reg_stride)) @@ -2278,7 +2276,7 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val, * Some devices don't support bulk write, for them we have a series of * single write operations. */ - if (!map->bus || !map->format.parse_inplace) { + if (!map->write || !map->format.parse_inplace) { map->lock(map->lock_arg); for (i = 0; i < val_count; i++) { unsigned int ival; @@ -2904,6 +2902,9 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, size_t read_len; int ret; + if (!map->read) + return -ENOTSUPP; + if (val_len % map->format.val_bytes) return -EINVAL; if (!IS_ALIGNED(reg, map->reg_stride)) @@ -3017,7 +3018,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, if (val_count == 0) return -EINVAL; - if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { + if (map->read && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { ret = regmap_raw_read(map, reg, val, val_bytes * val_count); if (ret != 0) return ret; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a88ce4426400e2cfea8806e2c3b1278e67cf1252..3646c0cae672a338bc92d64c649417b328ebe3b6 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -152,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order; module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +static bool __read_mostly xen_blkif_trusted = true; +module_param_named(trusted, xen_blkif_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) @@ -210,6 +214,7 @@ struct blkfront_info unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int feature_persistent:1; + unsigned int bounce:1; unsigned int discard_granularity; unsigned int discard_alignment; /* Number of 4KB segments handled */ @@ -310,8 +315,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) if (!gnt_list_entry) goto out_of_memory; - if (info->feature_persistent) { - granted_page = alloc_page(GFP_NOIO); + if (info->bounce) { + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); goto out_of_memory; @@ -330,7 +335,7 @@ out_of_memory: list_for_each_entry_safe(gnt_list_entry, n, &rinfo->grants, node) { list_del(&gnt_list_entry->node); - if (info->feature_persistent) + if (info->bounce) __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; @@ -376,7 +381,7 @@ static struct grant *get_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (info->feature_persistent) + if (info->bounce) grant_foreign_access(gnt_list_entry, info); else { /* Grant access to the GFN passed by the caller */ @@ -400,7 +405,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (!info->feature_persistent) { + if (!info->bounce) { struct page *indirect_page; /* Fetch a pre-allocated page to use for indirect grefs */ @@ -703,7 +708,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri .grant_idx = 0, .segments = NULL, .rinfo = rinfo, - .need_copy = rq_data_dir(req) && info->feature_persistent, + .need_copy = rq_data_dir(req) && info->bounce, }; /* @@ -981,11 +986,12 @@ static void xlvbd_flush(struct blkfront_info *info) { blk_queue_write_cache(info->rq, info->feature_flush ? true : false, info->feature_fua ? true : false); - pr_info("blkfront: %s: %s %s %s %s %s\n", + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", - info->max_indirect_segments ? "enabled;" : "disabled;"); + info->max_indirect_segments ? "enabled;" : "disabled;", + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -1207,7 +1213,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) if (!list_empty(&rinfo->indirect_pages)) { struct page *indirect_page, *n; - BUG_ON(info->feature_persistent); + BUG_ON(info->bounce); list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { list_del(&indirect_page->lru); __free_page(indirect_page); @@ -1224,7 +1230,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) NULL); rinfo->persistent_gnts_c--; } - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1245,7 +1251,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; gnttab_end_foreign_access(persistent_gnt->gref, NULL); - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1428,7 +1434,7 @@ static int blkif_completion(unsigned long *id, data.s = s; num_sg = s->num_sg; - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { + if (bret->operation == BLKIF_OP_READ && info->bounce) { for_each_sg(s->sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -1487,7 +1493,7 @@ static int blkif_completion(unsigned long *id, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - if (!info->feature_persistent) { + if (!info->bounce) { indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &rinfo->indirect_pages); } @@ -1764,6 +1770,10 @@ static int talk_to_blkback(struct xenbus_device *dev, if (!info) return -ENODEV; + /* Check if backend is trusted. */ + info->bounce = !xen_blkif_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + max_page_order = xenbus_read_unsigned(info->xbdev->otherend, "max-ring-page-order", 0); ring_page_order = min(xen_blkif_max_ring_order, max_page_order); @@ -2114,9 +2124,11 @@ static void blkfront_closing(struct blkfront_info *info) return; /* No more blkif_request(). */ - blk_mq_stop_hw_queues(info->rq); - blk_mark_disk_dead(info->gd); - set_capacity(info->gd, 0); + if (info->rq && info->gd) { + blk_mq_stop_hw_queues(info->rq); + blk_mark_disk_dead(info->gd); + set_capacity(info->gd, 0); + } for_each_rinfo(info, rinfo, i) { /* No more gnttab callback work. */ @@ -2171,17 +2183,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) if (err) goto out_of_memory; - if (!info->feature_persistent && info->max_indirect_segments) { + if (!info->bounce && info->max_indirect_segments) { /* - * We are using indirect descriptors but not persistent - * grants, we need to allocate a set of pages that can be + * We are using indirect descriptors but don't have a bounce + * buffer, we need to allocate a set of pages that can be * used for mapping indirect grefs */ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_KERNEL); + struct page *indirect_page = alloc_page(GFP_KERNEL | + __GFP_ZERO); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); @@ -2274,6 +2287,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); + if (info->feature_persistent) + info->bounce = true; indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); @@ -2457,16 +2472,19 @@ static int blkfront_remove(struct xenbus_device *xbdev) dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); - del_gendisk(info->gd); + if (info->gd) + del_gendisk(info->gd); mutex_lock(&blkfront_mutex); list_del(&info->info_list); mutex_unlock(&blkfront_mutex); blkif_free(info, 0); - xlbd_release_minors(info->gd->first_minor, info->gd->minors); - blk_cleanup_disk(info->gd); - blk_mq_free_tag_set(&info->tag_set); + if (info->gd) { + xlbd_release_minors(info->gd->first_minor, info->gd->minors); + blk_cleanup_disk(info->gd); + blk_mq_free_tag_set(&info->tag_set); + } kfree(info); return 0; @@ -2542,6 +2560,13 @@ static void blkfront_delay_work(struct work_struct *work) struct blkfront_info *info; bool need_schedule_work = false; + /* + * Note that when using bounce buffers but not persistent grants + * there's no need to run blkfront_delay_work because grants are + * revoked in blkif_completion or else an error is reported and the + * connection is closed. + */ + mutex_lock(&blkfront_mutex); list_for_each_entry(info, &info_list, info_list) { diff --git a/drivers/bus/bt1-apb.c b/drivers/bus/bt1-apb.c index b25ff941e7c7f52c34f1050a403f5b28a11d7b2f..63b1b4a76671d837017e5923b4bbdbf0e8d4a841 100644 --- a/drivers/bus/bt1-apb.c +++ b/drivers/bus/bt1-apb.c @@ -175,10 +175,9 @@ static int bt1_apb_request_rst(struct bt1_apb *apb) int ret; apb->prst = devm_reset_control_get_optional_exclusive(apb->dev, "prst"); - if (IS_ERR(apb->prst)) { - dev_warn(apb->dev, "Couldn't get reset control line\n"); - return PTR_ERR(apb->prst); - } + if (IS_ERR(apb->prst)) + return dev_err_probe(apb->dev, PTR_ERR(apb->prst), + "Couldn't get reset control line\n"); ret = reset_control_deassert(apb->prst); if (ret) @@ -199,10 +198,9 @@ static int bt1_apb_request_clk(struct bt1_apb *apb) int ret; apb->pclk = devm_clk_get(apb->dev, "pclk"); - if (IS_ERR(apb->pclk)) { - dev_err(apb->dev, "Couldn't get APB clock descriptor\n"); - return PTR_ERR(apb->pclk); - } + if (IS_ERR(apb->pclk)) + return dev_err_probe(apb->dev, PTR_ERR(apb->pclk), + "Couldn't get APB clock descriptor\n"); ret = clk_prepare_enable(apb->pclk); if (ret) { diff --git a/drivers/bus/bt1-axi.c b/drivers/bus/bt1-axi.c index e7a6744acc7b82e1292edf0138d29ab24a1c1946..70e49a6e53746a4e6f55f7629623f9ca6b77ada3 100644 --- a/drivers/bus/bt1-axi.c +++ b/drivers/bus/bt1-axi.c @@ -135,10 +135,9 @@ static int bt1_axi_request_rst(struct bt1_axi *axi) int ret; axi->arst = devm_reset_control_get_optional_exclusive(axi->dev, "arst"); - if (IS_ERR(axi->arst)) { - dev_warn(axi->dev, "Couldn't get reset control line\n"); - return PTR_ERR(axi->arst); - } + if (IS_ERR(axi->arst)) + return dev_err_probe(axi->dev, PTR_ERR(axi->arst), + "Couldn't get reset control line\n"); ret = reset_control_deassert(axi->arst); if (ret) @@ -159,10 +158,9 @@ static int bt1_axi_request_clk(struct bt1_axi *axi) int ret; axi->aclk = devm_clk_get(axi->dev, "aclk"); - if (IS_ERR(axi->aclk)) { - dev_err(axi->dev, "Couldn't get AXI Interconnect clock\n"); - return PTR_ERR(axi->aclk); - } + if (IS_ERR(axi->aclk)) + return dev_err_probe(axi->dev, PTR_ERR(axi->aclk), + "Couldn't get AXI Interconnect clock\n"); ret = clk_prepare_enable(axi->aclk); if (ret) { diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index e81a9700cfd03fbcfb5f14c8520179e1dd74e14f..6143dbf31f3119ac12a95a64d90ef0cfb1be5302 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -1239,14 +1239,14 @@ error_cleanup_mc_io: static int fsl_mc_bus_remove(struct platform_device *pdev) { struct fsl_mc *mc = platform_get_drvdata(pdev); + struct fsl_mc_io *mc_io; if (!fsl_mc_is_root_dprc(&mc->root_mc_bus_dev->dev)) return -EINVAL; + mc_io = mc->root_mc_bus_dev->mc_io; fsl_mc_device_remove(mc->root_mc_bus_dev); - - fsl_destroy_mc_io(mc->root_mc_bus_dev->mc_io); - mc->root_mc_bus_dev->mc_io = NULL; + fsl_destroy_mc_io(mc_io); bus_unregister_notifier(&fsl_mc_bus_type, &fsl_mc_nb); diff --git a/drivers/char/lp.c b/drivers/char/lp.c index 0e22e3b0a04e7f621f67f919b3c00ce500f62dd6..38aad99ebb61586c9ef4f86f5347920938e96358 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -1019,7 +1019,7 @@ static struct parport_driver lp_driver = { static int __init lp_init(void) { - int i, err = 0; + int i, err; if (parport_nr[0] == LP_PARPORT_OFF) return 0; diff --git a/drivers/char/random.c b/drivers/char/random.c index 655e327d425ec34bde22f711acb5be7ded9f5f41..a1af90bacc9f8b99a7f3c307711620213be3132a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -87,7 +87,7 @@ static struct fasync_struct *fasync; /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = - RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); + RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); static int ratelimit_disable __read_mostly = IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); @@ -408,7 +408,7 @@ static ssize_t get_random_bytes_user(struct iov_iter *iter) /* * Immediately overwrite the ChaCha key at index 4 with random - * bytes, in case userspace causes copy_to_user() below to sleep + * bytes, in case userspace causes copy_to_iter() below to sleep * forever, so that we still retain forward secrecy in that case. */ crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); @@ -1009,7 +1009,7 @@ void add_interrupt_randomness(int irq) if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && !time_is_before_jiffies(fast_pool->last + HZ)) + if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ)) return; if (unlikely(!fast_pool->mix.func)) @@ -1174,7 +1174,7 @@ static void __cold entropy_timer(struct timer_list *timer) */ static void __cold try_to_generate_entropy(void) { - enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = 32 }; + enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 30 }; struct entropy_timer_state stack; unsigned int i, num_different = 0; unsigned long last = random_get_entropy(); diff --git a/drivers/clk/stm32/reset-stm32.c b/drivers/clk/stm32/reset-stm32.c index 040870130e4b8058086bf011b49aaf5a81f5a320..e89381528af99179887b2e662155697e1c283a66 100644 --- a/drivers/clk/stm32/reset-stm32.c +++ b/drivers/clk/stm32/reset-stm32.c @@ -111,6 +111,7 @@ int stm32_rcc_reset_init(struct device *dev, const struct of_device_id *match, if (!reset_data) return -ENOMEM; + spin_lock_init(&reset_data->lock); reset_data->membase = base; reset_data->rcdev.owner = THIS_MODULE; reset_data->rcdev.ops = &stm32_reset_ops; diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index ff188ab68496e0d646f5e15ea8c5b0ef2e321c83..bb47610bbd1c4ddee413a626cf8a6eed333b451a 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -565,4 +565,3 @@ void __init hv_init_clocksource(void) hv_sched_clock_offset = hv_read_reference_counter(); hv_setup_sched_clock(read_hv_sched_clock_msr); } -EXPORT_SYMBOL_GPL(hv_init_clocksource); diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c index 46023adc539585aaf5e081234845ebe1907c5b4d..4536ed43f65b2763ec4612a000eb4dae70875c17 100644 --- a/drivers/comedi/drivers/vmk80xx.c +++ b/drivers/comedi/drivers/vmk80xx.c @@ -684,7 +684,7 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) if (!devpriv->usb_rx_buf) return -ENOMEM; - size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE); + size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 7be38bc6a673b09c90d578413b38c731a135dc84..9ac75c1cde9c22fc3bcee6885521477ea1352625 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -566,6 +566,28 @@ static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) return 0; } +static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) +{ + int ret; + + ret = amd_pstate_enable(true); + if (ret) + pr_err("failed to enable amd-pstate during resume, return %d\n", ret); + + return ret; +} + +static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) +{ + int ret; + + ret = amd_pstate_enable(false); + if (ret) + pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); + + return ret; +} + /* Sysfs attributes */ /* @@ -636,6 +658,8 @@ static struct cpufreq_driver amd_pstate_driver = { .target = amd_pstate_target, .init = amd_pstate_cpu_init, .exit = amd_pstate_cpu_exit, + .suspend = amd_pstate_cpu_suspend, + .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, .name = "amd-pstate", .attr = amd_pstate_attr, diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 96de1536e1cbf922dd063d87c99cdad4a5da055c..2c96de3f2d83ccd1ccc57839f9c447513ba54da7 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -127,6 +127,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, { .compatible = "mediatek,mt8183", }, + { .compatible = "mediatek,mt8186", }, { .compatible = "mediatek,mt8365", }, { .compatible = "mediatek,mt8516", }, diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 37a1eb20f5ba6dff4634bcf52172b74b2687d3b5..76f6b3884e6b2dc070e20f51f592d48ef04bf15d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -439,9 +439,13 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) /* Both presence and absence of sram regulator are valid cases. */ info->sram_reg = regulator_get_optional(cpu_dev, "sram"); - if (IS_ERR(info->sram_reg)) + if (IS_ERR(info->sram_reg)) { + ret = PTR_ERR(info->sram_reg); + if (ret == -EPROBE_DEFER) + goto out_free_resources; + info->sram_reg = NULL; - else { + } else { ret = regulator_enable(info->sram_reg); if (ret) { dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu); diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 20f64a8b0a354d8a7aad6caa12aff6969a20ba38..4b8ee2014da6dbb05a15900e629fea8b63c60a93 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -470,6 +470,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (slew_done_gpio_np) slew_done_gpio = read_gpio(slew_done_gpio_np); + of_node_put(volt_gpio_np); + of_node_put(freq_gpio_np); + of_node_put(slew_done_gpio_np); + /* If we use the frequency GPIOs, calculate the min/max speeds based * on the bus frequencies */ diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 0253731d6d25d20ad132eb4756bb634fda561781..36c79580fba25475de26cd8722625fd7c3ad2d69 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -442,6 +442,9 @@ static int qcom_cpufreq_hw_cpu_online(struct cpufreq_policy *policy) struct platform_device *pdev = cpufreq_get_driver_data(); int ret; + if (data->throttle_irq <= 0) + return 0; + ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus); if (ret) dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n", @@ -469,6 +472,9 @@ static int qcom_cpufreq_hw_cpu_offline(struct cpufreq_policy *policy) static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) { + if (data->throttle_irq <= 0) + return; + free_irq(data->throttle_irq, data); } diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 6b6b20da2bcfc83846d37a3524de5c4d3f7598f2..573b417e1483307ee23599f08fb4bf17a12c070f 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -275,6 +275,7 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev) np = of_find_matching_node(NULL, qoriq_cpufreq_blacklist); if (np) { + of_node_put(np); dev_info(&pdev->dev, "Disabling due to erratum A-008083"); return -ENODEV; } diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index ee99c02c84e81e516b5e986519790ac044472650..3e6aa319920b708375b56eb4210ab4bc54b078cb 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -133,98 +133,6 @@ config CRYPTO_PAES_S390 Select this option if you want to use the paes cipher for example to use protected key encrypted devices. -config CRYPTO_SHA1_S390 - tristate "SHA1 digest algorithm" - depends on S390 - select CRYPTO_HASH - help - This is the s390 hardware accelerated implementation of the - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). - - It is available as of z990. - -config CRYPTO_SHA256_S390 - tristate "SHA256 digest algorithm" - depends on S390 - select CRYPTO_HASH - help - This is the s390 hardware accelerated implementation of the - SHA256 secure hash standard (DFIPS 180-2). - - It is available as of z9. - -config CRYPTO_SHA512_S390 - tristate "SHA384 and SHA512 digest algorithm" - depends on S390 - select CRYPTO_HASH - help - This is the s390 hardware accelerated implementation of the - SHA512 secure hash standard. - - It is available as of z10. - -config CRYPTO_SHA3_256_S390 - tristate "SHA3_224 and SHA3_256 digest algorithm" - depends on S390 - select CRYPTO_HASH - help - This is the s390 hardware accelerated implementation of the - SHA3_256 secure hash standard. - - It is available as of z14. - -config CRYPTO_SHA3_512_S390 - tristate "SHA3_384 and SHA3_512 digest algorithm" - depends on S390 - select CRYPTO_HASH - help - This is the s390 hardware accelerated implementation of the - SHA3_512 secure hash standard. - - It is available as of z14. - -config CRYPTO_DES_S390 - tristate "DES and Triple DES cipher algorithms" - depends on S390 - select CRYPTO_ALGAPI - select CRYPTO_SKCIPHER - select CRYPTO_LIB_DES - help - This is the s390 hardware accelerated implementation of the - DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). - - As of z990 the ECB and CBC mode are hardware accelerated. - As of z196 the CTR mode is hardware accelerated. - -config CRYPTO_AES_S390 - tristate "AES cipher algorithms" - depends on S390 - select CRYPTO_ALGAPI - select CRYPTO_SKCIPHER - help - This is the s390 hardware accelerated implementation of the - AES cipher algorithms (FIPS-197). - - As of z9 the ECB and CBC modes are hardware accelerated - for 128 bit keys. - As of z10 the ECB and CBC modes are hardware accelerated - for all AES key sizes. - As of z196 the CTR mode is hardware accelerated for all AES - key sizes and XTS mode is hardware accelerated for 256 and - 512 bit keys. - -config CRYPTO_CHACHA_S390 - tristate "ChaCha20 stream cipher" - depends on S390 - select CRYPTO_SKCIPHER - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA - help - This is the s390 SIMD implementation of the ChaCha20 stream - cipher (RFC 7539). - - It is available as of z13. - config S390_PRNG tristate "Pseudo random number generator device driver" depends on S390 @@ -238,29 +146,6 @@ config S390_PRNG It is available as of z9. -config CRYPTO_GHASH_S390 - tristate "GHASH hash function" - depends on S390 - select CRYPTO_HASH - help - This is the s390 hardware accelerated implementation of GHASH, - the hash function used in GCM (Galois/Counter mode). - - It is available as of z196. - -config CRYPTO_CRC32_S390 - tristate "CRC-32 algorithms" - depends on S390 - select CRYPTO_HASH - select CRC32 - help - Select this option if you want to use hardware accelerated - implementations of CRC algorithms. With this option, you - can optimize the computation of CRC-32 (IEEE 802.3 Ethernet) - and CRC-32C (Castagnoli). - - It is available with IBM z13 or later. - config CRYPTO_DEV_NIAGARA2 tristate "Niagara2 Stream Processing Unit driver" select CRYPTO_LIB_DES diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c index 9dba52fbee99770af545be7cd187a3f718bb7c84..7d79a8744f9a6a279307b41ab692b2982f28144d 100644 --- a/drivers/crypto/ccp/sp-platform.c +++ b/drivers/crypto/ccp/sp-platform.c @@ -85,17 +85,9 @@ static int sp_get_irqs(struct sp_device *sp) struct sp_platform *sp_platform = sp->dev_specific; struct device *dev = sp->dev; struct platform_device *pdev = to_platform_device(dev); - unsigned int i, count; int ret; - for (i = 0, count = 0; i < pdev->num_resources; i++) { - struct resource *res = &pdev->resource[i]; - - if (resource_type(res) == IORESOURCE_IRQ) - count++; - } - - sp_platform->irq_count = count; + sp_platform->irq_count = platform_irq_count(pdev); ret = platform_get_irq(pdev, 0); if (ret < 0) { @@ -104,7 +96,7 @@ static int sp_get_irqs(struct sp_device *sp) } sp->psp_irq = ret; - if (count == 1) { + if (sp_platform->irq_count == 1) { sp->ccp_irq = ret; } else { ret = platform_get_irq(pdev, 1); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 0e89a7a932d40e18f92be92d3fb0f7a983f9a5dc..bfc8ee876278c9c19765780ab90495332157382a 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -197,7 +197,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, else cxld->target_type = CXL_DECODER_ACCELERATOR; - if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent))) + if (is_endpoint_decoder(&cxld->dev)) return 0; target_list.value = diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 54f434733b5623cc35d18c161479e40d8fc372b4..cbf23beebebefaf89df8bf7055b4ac765d5731b0 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -355,11 +355,13 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, return -EBUSY; /* Check the input buffer is the expected size */ - if (info->size_in != send_cmd->in.size) + if ((info->size_in != CXL_VARIABLE_PAYLOAD) && + (info->size_in != send_cmd->in.size)) return -ENOMEM; /* Check the output buffer is at least large enough */ - if (send_cmd->out.size < info->size_out) + if ((info->size_out != CXL_VARIABLE_PAYLOAD) && + (send_cmd->out.size < info->size_out)) return -ENOMEM; *mem_cmd = (struct cxl_mem_command) { diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index ea60abda65006715cfdf04275dba6704b024c90b..dbce99bdffabc5e9615da5924290c5e195895229 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -272,7 +272,7 @@ static const struct device_type cxl_decoder_root_type = { .groups = cxl_decoder_root_attribute_groups, }; -static bool is_endpoint_decoder(struct device *dev) +bool is_endpoint_decoder(struct device *dev) { return dev->type == &cxl_decoder_endpoint_type; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 140dc3278cde142b2e5374a7d00c7a0ad51d4035..6799b27c7db20c02cf97eccb9eae4c3ddfe64f74 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -340,6 +340,7 @@ struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); +bool is_endpoint_decoder(struct device *dev); bool is_cxl_decoder(struct device *dev); struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 60d10ee1e7fcd7bc625400e49672b265633b093d..7df0b053373a39727fdca41d5af74940b193ca92 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -300,13 +300,13 @@ struct cxl_mbox_identify { } __packed; struct cxl_mbox_get_lsa { - u32 offset; - u32 length; + __le32 offset; + __le32 length; } __packed; struct cxl_mbox_set_lsa { - u32 offset; - u32 reserved; + __le32 offset; + __le32 reserved; u8 data[]; } __packed; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index c310f1fd3db02eea814c15fdb4c597a62fc1a85e..a979d0b484d56a7aa5f4a5f9230d8230ab13becc 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -29,6 +29,7 @@ static int create_endpoint(struct cxl_memdev *cxlmd, { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *endpoint; + int rc; endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev, cxlds->component_reg_phys, parent_port); @@ -37,13 +38,17 @@ static int create_endpoint(struct cxl_memdev *cxlmd, dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev)); + rc = cxl_endpoint_autoremove(cxlmd, endpoint); + if (rc) + return rc; + if (!endpoint->dev.driver) { dev_err(&cxlmd->dev, "%s failed probe\n", dev_name(&endpoint->dev)); return -ENXIO; } - return cxl_endpoint_autoremove(cxlmd, endpoint); + return 0; } static void enable_suspend(void *data) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index bbeef91e637e9e7f5c3615234f50f0fc3d6b951d..0aaa70b4e0f7173489a7db52840931e68ff5b271 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -108,8 +108,8 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, return -EINVAL; get_lsa = (struct cxl_mbox_get_lsa) { - .offset = cmd->in_offset, - .length = cmd->in_length, + .offset = cpu_to_le32(cmd->in_offset), + .length = cpu_to_le32(cmd->in_length), }; rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa, @@ -139,7 +139,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, return -ENOMEM; *set_lsa = (struct cxl_mbox_set_lsa) { - .offset = cmd->in_offset, + .offset = cpu_to_le32(cmd->in_offset), }; memcpy(set_lsa->data, cmd->in_buf, cmd->in_length); diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 01474daf45483af90a96e353eac87a31bef162f4..9602141bb8ec49d8fbf47a160616c4a107e51720 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) { - unsigned long *freq_table = devfreq->profile->freq_table; + unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq; lockdep_assert_held(&devfreq->lock); @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */ - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) { + if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0]; - *max_freq = freq_table[devfreq->profile->max_state - 1]; + *max_freq = freq_table[devfreq->max_state - 1]; } else { - *min_freq = freq_table[devfreq->profile->max_state - 1]; + *min_freq = freq_table[devfreq->max_state - 1]; *max_freq = freq_table[0]; } @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev; - for (lev = 0; lev < devfreq->profile->max_state; lev++) - if (freq == devfreq->profile->freq_table[lev]) + for (lev = 0; lev < devfreq->max_state; lev++) + if (freq == devfreq->freq_table[lev]) return lev; return -EINVAL; @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) static int set_freq_table(struct devfreq *devfreq) { - struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count; @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL; - profile->max_state = count; - profile->freq_table = devm_kcalloc(devfreq->dev.parent, - profile->max_state, - sizeof(*profile->freq_table), - GFP_KERNEL); - if (!profile->freq_table) { - profile->max_state = 0; + devfreq->max_state = count; + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent, + devfreq->max_state, + sizeof(*devfreq->freq_table), + GFP_KERNEL); + if (!devfreq->freq_table) return -ENOMEM; - } - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) { + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) { - devm_kfree(devfreq->dev.parent, profile->freq_table); - profile->max_state = 0; + devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp); } dev_pm_opp_put(opp); - profile->freq_table[i] = freq; + devfreq->freq_table[i] = freq; } return 0; @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) if (lev != prev_lev) { devfreq->stats.trans_table[ - (prev_lev * devfreq->profile->max_state) + lev]++; + (prev_lev * devfreq->max_state) + lev]++; devfreq->stats.total_trans++; } @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock); + } else { + devfreq->freq_table = devfreq->profile->freq_table; + devfreq->max_state = devfreq->profile->max_state; } devfreq->scaling_min_freq = find_available_min_freq(devfreq); @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int), - devfreq->profile->max_state, - devfreq->profile->max_state), + devfreq->max_state, + devfreq->max_state), GFP_KERNEL); if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock); @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev, - devfreq->profile->max_state, + devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL); if (!devfreq->stats.time_in_state) { @@ -932,8 +931,9 @@ struct devfreq *devfreq_add_device(struct device *dev, err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START, NULL); if (err) { - dev_err(dev, "%s: Unable to start governor for the device\n", - __func__); + dev_err_probe(dev, err, + "%s: Unable to start governor for the device\n", + __func__); goto err_init; } create_sysfs_files(devfreq, devfreq->governor); @@ -1665,9 +1665,9 @@ static ssize_t available_frequencies_show(struct device *d, mutex_lock(&df->lock); - for (i = 0; i < df->profile->max_state; i++) + for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), - "%lu ", df->profile->freq_table[i]); + "%lu ", df->freq_table[i]); mutex_unlock(&df->lock); /* Truncate the trailing space */ @@ -1690,7 +1690,7 @@ static ssize_t trans_stat_show(struct device *dev, if (!df->profile) return -EINVAL; - max_state = df->profile->max_state; + max_state = df->max_state; if (max_state == 0) return sprintf(buf, "Not Supported.\n"); @@ -1707,19 +1707,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu", - df->profile->freq_table[i]); + df->freq_table[i]); len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) { - if (df->profile->freq_table[i] - == df->previous_freq) { + if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*"); - } else { + else len += sprintf(buf + len, " "); - } - len += sprintf(buf + len, "%10lu:", - df->profile->freq_table[i]); + + len += sprintf(buf + len, "%10lu:", df->freq_table[i]); for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]); @@ -1743,7 +1741,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL; - if (df->profile->max_state == 0) + if (df->max_state == 0) return count; err = kstrtoint(buf, 10, &value); @@ -1751,11 +1749,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL; mutex_lock(&df->lock); - memset(df->stats.time_in_state, 0, (df->profile->max_state * + memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int), - df->profile->max_state, - df->profile->max_state)); + df->max_state, + df->max_state)); df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock); diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 9b849d7811167dfa3675b7f1e86fc89bb8157b5b..a443e7c42dafa050d097dc68446125f43a47ddd7 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -519,15 +519,19 @@ static int of_get_devfreq_events(struct device_node *np, count = of_get_child_count(events_np); desc = devm_kcalloc(dev, count, sizeof(*desc), GFP_KERNEL); - if (!desc) + if (!desc) { + of_node_put(events_np); return -ENOMEM; + } info->num_events = count; of_id = of_match_device(exynos_ppmu_id_match, dev); if (of_id) info->ppmu_type = (enum exynos_ppmu_type)of_id->data; - else + else { + of_node_put(events_np); return -EINVAL; + } j = 0; for_each_child_of_node(events_np, node) { diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index e689101abc93036c0b84a435e1489bdb1c41d1cf..f7dcc44f9414c09009399ccb74b2d794c6c628d6 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev) } } - max_state = bus->devfreq->profile->max_state; - min_freq = (bus->devfreq->profile->freq_table[0] / 1000); - max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000); + max_state = bus->devfreq->max_state; + min_freq = (bus->devfreq->freq_table[0] / 1000); + max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000); pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n", dev_name(dev), min_freq, max_freq); diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 72c67979ebe11217b9abd66c39222d5c37eab9ef..953cf9a1e9f7f93804cc889db38883bf97ae005d 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -1,4 +1,4 @@ - // SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: GPL-2.0-only /* * linux/drivers/devfreq/governor_passive.c * @@ -14,10 +14,9 @@ #include #include #include +#include #include "governor.h" -#define HZ_PER_KHZ 1000 - static struct devfreq_cpu_data * get_parent_cpu_data(struct devfreq_passive_data *p_data, struct cpufreq_policy *policy) @@ -34,6 +33,20 @@ get_parent_cpu_data(struct devfreq_passive_data *p_data, return NULL; } +static void delete_parent_cpu_data(struct devfreq_passive_data *p_data) +{ + struct devfreq_cpu_data *parent_cpu_data, *tmp; + + list_for_each_entry_safe(parent_cpu_data, tmp, &p_data->cpu_data_list, node) { + list_del(&parent_cpu_data->node); + + if (parent_cpu_data->opp_table) + dev_pm_opp_put_opp_table(parent_cpu_data->opp_table); + + kfree(parent_cpu_data); + } +} + static unsigned long get_target_freq_by_required_opp(struct device *p_dev, struct opp_table *p_opp_table, struct opp_table *opp_table, @@ -131,18 +144,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out; /* Use interpolation if required opps is not available */ - for (i = 0; i < parent_devfreq->profile->max_state; i++) - if (parent_devfreq->profile->freq_table[i] == *freq) + for (i = 0; i < parent_devfreq->max_state; i++) + if (parent_devfreq->freq_table[i] == *freq) break; - if (i == parent_devfreq->profile->max_state) + if (i == parent_devfreq->max_state) return -EINVAL; - if (i < devfreq->profile->max_state) { - child_freq = devfreq->profile->freq_table[i]; + if (i < devfreq->max_state) { + child_freq = devfreq->freq_table[i]; } else { - count = devfreq->profile->max_state; - child_freq = devfreq->profile->freq_table[count - 1]; + count = devfreq->max_state; + child_freq = devfreq->freq_table[count - 1]; } out: @@ -222,8 +235,7 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq) { struct devfreq_passive_data *p_data = (struct devfreq_passive_data *)devfreq->data; - struct devfreq_cpu_data *parent_cpu_data; - int cpu, ret = 0; + int ret; if (p_data->nb.notifier_call) { ret = cpufreq_unregister_notifier(&p_data->nb, @@ -232,27 +244,9 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq) return ret; } - for_each_possible_cpu(cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - if (!policy) { - ret = -EINVAL; - continue; - } - - parent_cpu_data = get_parent_cpu_data(p_data, policy); - if (!parent_cpu_data) { - cpufreq_cpu_put(policy); - continue; - } + delete_parent_cpu_data(p_data); - list_del(&parent_cpu_data->node); - if (parent_cpu_data->opp_table) - dev_pm_opp_put_opp_table(parent_cpu_data->opp_table); - kfree(parent_cpu_data); - cpufreq_cpu_put(policy); - } - - return ret; + return 0; } static int cpufreq_passive_register_notifier(struct devfreq *devfreq) @@ -336,7 +330,6 @@ err_free_cpu_data: err_put_policy: cpufreq_cpu_put(policy); err: - WARN_ON(cpufreq_passive_unregister_notifier(devfreq)); return ret; } @@ -407,8 +400,7 @@ static int devfreq_passive_event_handler(struct devfreq *devfreq, if (!p_data) return -EINVAL; - if (!p_data->this) - p_data->this = devfreq; + p_data->this = devfreq; switch (event) { case DEVFREQ_GOV_START: diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 0cce6e4ec9462c3305d6b55476705658a0dcfe97..205acb2c744ded50d42d2138f288cdce9ef97967 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -343,7 +343,7 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, if (old->context != context) continue; - dma_resv_list_set(list, i, replacement, usage); + dma_resv_list_set(list, i, dma_fence_get(replacement), usage); dma_fence_put(old); } } diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index e7330684d3b8247d3cfbdcbd80bfefb596460aab..9631f2fd2faf7f2d575c2fae4872e7eccecad722 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -32,8 +32,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; + pgoff_t pgoff = vmf->pgoff; - vmf->page = ubuf->pages[vmf->pgoff]; + if (pgoff >= ubuf->pagecount) + return VM_FAULT_SIGBUS; + vmf->page = ubuf->pages[pgoff]; get_page(vmf->page); return 0; } diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 3e9d726504e2ed776ddc1155ef43130da5811533..7b3e6030f7b4b007e0ace835bbaeccbda21cb8d3 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1900,6 +1900,11 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < init_nr_desc_per_channel; i++) { desc = at_xdmac_alloc_desc(chan, GFP_KERNEL); if (!desc) { + if (i == 0) { + dev_warn(chan2dev(chan), + "can't allocate any descriptors\n"); + return -EIO; + } dev_warn(chan2dev(chan), "only %d descriptors have been allocated\n", i); break; diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 0a2168a4ccb0cf177ee99b4d7522b1b2c06ae0df..f696246f57fdb3734523930610bc650c52872fdb 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -675,16 +675,10 @@ static int dmatest_func(void *data) /* * src and dst buffers are freed by ourselves below */ - if (params->polled) { + if (params->polled) flags = DMA_CTRL_ACK; - } else { - if (dma_has_cap(DMA_INTERRUPT, dev->cap_mask)) { - flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; - } else { - pr_err("Channel does not support interrupt!\n"); - goto err_pq_array; - } - } + else + flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; ktime = ktime_get(); while (!(kthread_should_stop() || @@ -912,7 +906,6 @@ error_unmap_continue: runtime = ktime_to_us(ktime); ret = 0; -err_pq_array: kfree(dma_pq); err_srcs_array: kfree(srcs); diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index e9c9bcb1f5c20ffc2f8eb93bd6c086069e385506..c741da02b67e9831554da2023cb9cc73c0662ad4 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -1164,8 +1164,9 @@ static int dma_chan_pause(struct dma_chan *dchan) BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT; axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); } else { - val = BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT | - BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT; + val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG); + val |= BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT | + BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT; axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val); } @@ -1190,12 +1191,13 @@ static inline void axi_chan_resume(struct axi_dma_chan *chan) { u32 val; - val = axi_dma_ioread32(chan->chip, DMAC_CHEN); if (chan->chip->dw->hdata->reg_map_8_channels) { + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT); val |= (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT); axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); } else { + val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG); val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT); val |= (BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT); axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val); diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index ff0ea60051f0c90e50463de27df0abf5b53f9e27..5a8cc52c1abfd16938aa3d98ebfc28ebe323045a 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -716,10 +716,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) struct idxd_wq *wq = idxd->wqs[i]; mutex_lock(&wq->wq_lock); - if (wq->state == IDXD_WQ_ENABLED) { - idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; - } + idxd_wq_disable_cleanup(wq); idxd_wq_device_reset_cleanup(wq); mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 355fb3ef4cbfaf1ce6d04892a9030aa21418813f..aa3478257ddb54f73204bc80694f56e8fe288d70 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -512,15 +512,16 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) + if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) { dev_warn(dev, "Unable to turn on user SVA feature.\n"); - else + } else { set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); - if (idxd_enable_system_pasid(idxd)) - dev_warn(dev, "No in-kernel DMA with PASID.\n"); - else - set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + if (idxd_enable_system_pasid(idxd)) + dev_warn(dev, "No in-kernel DMA with PASID.\n"); + else + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } } else if (!sva) { dev_warn(dev, "User forced SVA off via module param.\n"); } diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 8535018ee7a2e14ecf4bbae9878949b0c8ea1335..f37a276f519e6ef91c64a1319d55ce3af9b392fc 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -891,7 +891,7 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac) * SDMA stops cyclic channel when DMA request triggers a channel and no SDMA * owned buffer is available (i.e. BD_DONE was set too late). */ - if (!is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) { + if (sdmac->desc && !is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) { dev_warn(sdmac->sdma->dev, "restart cyclic channel %d\n", sdmac->channel); sdma_enable_channel(sdmac->sdma, sdmac->channel); } @@ -2346,7 +2346,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver"); #if IS_ENABLED(CONFIG_SOC_IMX6Q) MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin"); #endif -#if IS_ENABLED(CONFIG_SOC_IMX7D) +#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M) MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin"); #endif MODULE_LICENSE("GPL"); diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c index efe8bd3a0e2aa5c14a0f49fd9154d0930fd56196..9b9184f964be397a4f5f5b8a7a1c2410032d6456 100644 --- a/drivers/dma/lgm/lgm-dma.c +++ b/drivers/dma/lgm/lgm-dma.c @@ -1593,11 +1593,12 @@ static int intel_ldma_probe(struct platform_device *pdev) d->core_clk = devm_clk_get_optional(dev, NULL); if (IS_ERR(d->core_clk)) return PTR_ERR(d->core_clk); - clk_prepare_enable(d->core_clk); d->rst = devm_reset_control_get_optional(dev, NULL); if (IS_ERR(d->rst)) return PTR_ERR(d->rst); + + clk_prepare_enable(d->core_clk); reset_control_deassert(d->rst); ret = devm_add_action_or_reset(dev, ldma_clk_disable, d); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 858400e42ec05c5541f703bdd7e901552cda77a8..09915a5cba3ea6bda18d6125eefb8a68284e4ac3 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2589,7 +2589,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) /* If the DMAC pool is empty, alloc new */ if (!desc) { - DEFINE_SPINLOCK(lock); + static DEFINE_SPINLOCK(lock); LIST_HEAD(pool); if (!add_desc(&pool, &lock, GFP_ATOMIC, 1)) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 87f6ca1541cffb0d31ebf0a17bc27c59b330c677..2ff787df513e60ec23f14bb0e888aedf46cc4bbe 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -558,14 +558,6 @@ static int bam_alloc_chan(struct dma_chan *chan) return 0; } -static int bam_pm_runtime_get_sync(struct device *dev) -{ - if (pm_runtime_enabled(dev)) - return pm_runtime_get_sync(dev); - - return 0; -} - /** * bam_free_chan - Frees dma resources associated with specific channel * @chan: specified channel @@ -581,7 +573,7 @@ static void bam_free_chan(struct dma_chan *chan) unsigned long flags; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return; @@ -784,7 +776,7 @@ static int bam_pause(struct dma_chan *chan) unsigned long flag; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -810,7 +802,7 @@ static int bam_resume(struct dma_chan *chan) unsigned long flag; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -919,7 +911,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data) if (srcs & P_IRQ) tasklet_schedule(&bdev->task); - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return IRQ_NONE; @@ -1037,7 +1029,7 @@ static void bam_start_dma(struct bam_chan *bchan) if (!vd) return; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return; @@ -1374,11 +1366,6 @@ static int bam_dma_probe(struct platform_device *pdev) if (ret) goto err_unregister_dma; - if (!bdev->bamclk) { - pm_runtime_disable(&pdev->dev); - return 0; - } - pm_runtime_irq_safe(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY); pm_runtime_use_autosuspend(&pdev->dev); @@ -1462,10 +1449,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev) { struct bam_device *bdev = dev_get_drvdata(dev); - if (bdev->bamclk) { - pm_runtime_force_suspend(dev); - clk_unprepare(bdev->bamclk); - } + pm_runtime_force_suspend(dev); + clk_unprepare(bdev->bamclk); return 0; } @@ -1475,13 +1460,11 @@ static int __maybe_unused bam_dma_resume(struct device *dev) struct bam_device *bdev = dev_get_drvdata(dev); int ret; - if (bdev->bamclk) { - ret = clk_prepare(bdev->bamclk); - if (ret) - return ret; + ret = clk_prepare(bdev->bamclk); + if (ret) + return ret; - pm_runtime_force_resume(dev); - } + pm_runtime_force_resume(dev); return 0; } diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c index 71d24fc07c0038cdf23e350d913902c8e9d576b6..f744ddbbbad7fa35e41bfd39915feb2d9cfcae73 100644 --- a/drivers/dma/ti/dma-crossbar.c +++ b/drivers/dma/ti/dma-crossbar.c @@ -245,6 +245,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, if (dma_spec->args[0] >= xbar->xbar_requests) { dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", dma_spec->args[0]); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } @@ -252,12 +253,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); if (!dma_spec->np) { dev_err(&pdev->dev, "Can't get DMA master\n"); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } @@ -268,6 +271,8 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); + of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index c9fe5903725a5f841d5947747badbe616b28e762..9c89f7d53e99db123b4f553e49ab35aa3bfa18fe 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1211,7 +1211,7 @@ static int ioctl_get_cycle_timer2(struct client *client, union ioctl_arg *arg) struct fw_cdev_get_cycle_timer2 *a = &arg->get_cycle_timer2; struct fw_card *card = client->device->card; struct timespec64 ts = {0, 0}; - u32 cycle_time; + u32 cycle_time = 0; int ret = 0; local_irq_disable(); diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 90ed8fdaba754b5adb494f1cf294239618d5c788..adddd8c45d0c1fe39b919020f8ce343075ad5c61 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -372,8 +372,7 @@ static ssize_t rom_index_show(struct device *dev, struct fw_device *device = fw_device(dev->parent); struct fw_unit *unit = fw_unit(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", - (int)(unit->directory - device->config_rom)); + return sysfs_emit(buf, "%td\n", unit->directory - device->config_rom); } static struct device_attribute fw_unit_attributes[] = { @@ -403,8 +402,7 @@ static ssize_t guid_show(struct device *dev, int ret; down_read(&fw_device_rwsem); - ret = snprintf(buf, PAGE_SIZE, "0x%08x%08x\n", - device->config_rom[3], device->config_rom[4]); + ret = sysfs_emit(buf, "0x%08x%08x\n", device->config_rom[3], device->config_rom[4]); up_read(&fw_device_rwsem); return ret; diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c index 20fba7370f4e517d4d5b954c681339f79aebeed5..a52f084a6a87bbb5349b79fddd8de2fb2ee7dc58 100644 --- a/drivers/firmware/arm_scmi/base.c +++ b/drivers/firmware/arm_scmi/base.c @@ -36,7 +36,7 @@ struct scmi_msg_resp_base_attributes { struct scmi_msg_resp_base_discover_agent { __le32 agent_id; - u8 name[SCMI_MAX_STR_SIZE]; + u8 name[SCMI_SHORT_NAME_MAX_SIZE]; }; @@ -119,7 +119,7 @@ scmi_base_vendor_id_get(const struct scmi_protocol_handle *ph, bool sub_vendor) ret = ph->xops->do_xfer(ph, t); if (!ret) - memcpy(vendor_id, t->rx.buf, size); + strscpy(vendor_id, t->rx.buf, size); ph->xops->xfer_put(ph, t); @@ -221,11 +221,17 @@ scmi_base_implementation_list_get(const struct scmi_protocol_handle *ph, calc_list_sz = (1 + (loop_num_ret - 1) / sizeof(u32)) * sizeof(u32); if (calc_list_sz != real_list_sz) { - dev_err(dev, - "Malformed reply - real_sz:%zd calc_sz:%u\n", - real_list_sz, calc_list_sz); - ret = -EPROTO; - break; + dev_warn(dev, + "Malformed reply - real_sz:%zd calc_sz:%u (loop_num_ret:%d)\n", + real_list_sz, calc_list_sz, loop_num_ret); + /* + * Bail out if the expected list size is bigger than the + * total payload size of the received reply. + */ + if (calc_list_sz > real_list_sz) { + ret = -EPROTO; + break; + } } for (loop = 0; loop < loop_num_ret; loop++) @@ -270,7 +276,7 @@ static int scmi_base_discover_agent_get(const struct scmi_protocol_handle *ph, ret = ph->xops->do_xfer(ph, t); if (!ret) { agent_info = t->rx.buf; - strlcpy(name, agent_info->name, SCMI_MAX_STR_SIZE); + strscpy(name, agent_info->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); @@ -369,7 +375,7 @@ static int scmi_base_protocol_init(const struct scmi_protocol_handle *ph) int id, ret; u8 *prot_imp; u32 version; - char name[SCMI_MAX_STR_SIZE]; + char name[SCMI_SHORT_NAME_MAX_SIZE]; struct device *dev = ph->dev; struct scmi_revision_info *rev = scmi_revision_area_get(ph); diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index f6fe723ab869e4836ee341d046585dd95a908220..d4e23101448ae370750d4de36138de0680899a70 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -181,7 +181,7 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, return NULL; } - id = ida_simple_get(&scmi_bus_id, 1, 0, GFP_KERNEL); + id = ida_alloc_min(&scmi_bus_id, 1, GFP_KERNEL); if (id < 0) { kfree_const(scmi_dev->name); kfree(scmi_dev); @@ -204,7 +204,7 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, put_dev: kfree_const(scmi_dev->name); put_device(&scmi_dev->dev); - ida_simple_remove(&scmi_bus_id, id); + ida_free(&scmi_bus_id, id); return NULL; } @@ -212,7 +212,7 @@ void scmi_device_destroy(struct scmi_device *scmi_dev) { kfree_const(scmi_dev->name); scmi_handle_put(scmi_dev->handle); - ida_simple_remove(&scmi_bus_id, scmi_dev->id); + ida_free(&scmi_bus_id, scmi_dev->id); device_unregister(&scmi_dev->dev); } diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 4d36a9a133d1e03d57a12cf4766d6709c2b55947..3ed7ae0d6781e560a69ed79935acabb1c8266235 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -153,7 +153,7 @@ static int scmi_clock_attributes_get(const struct scmi_protocol_handle *ph, if (!ret) { u32 latency = 0; attributes = le32_to_cpu(attr->attributes); - strlcpy(clk->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(clk->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); /* clock_enable_latency field is present only since SCMI v3.1 */ if (PROTOCOL_REV_MAJOR(version) >= 0x2) latency = le32_to_cpu(attr->clock_enable_latency); @@ -194,6 +194,7 @@ static int rate_cmp_func(const void *_r1, const void *_r2) } struct scmi_clk_ipriv { + struct device *dev; u32 clk_id; struct scmi_clock_info *clk; }; @@ -223,6 +224,29 @@ iter_clk_describe_update_state(struct scmi_iterator_state *st, st->num_returned = NUM_RETURNED(flags); p->clk->rate_discrete = RATE_DISCRETE(flags); + /* Warn about out of spec replies ... */ + if (!p->clk->rate_discrete && + (st->num_returned != 3 || st->num_remaining != 0)) { + dev_warn(p->dev, + "Out-of-spec CLOCK_DESCRIBE_RATES reply for %s - returned:%d remaining:%d rx_len:%zd\n", + p->clk->name, st->num_returned, st->num_remaining, + st->rx_len); + + /* + * A known quirk: a triplet is returned but num_returned != 3 + * Check for a safe payload size and fix. + */ + if (st->num_returned != 3 && st->num_remaining == 0 && + st->rx_len == sizeof(*r) + sizeof(__le32) * 2 * 3) { + st->num_returned = 3; + st->num_remaining = 0; + } else { + dev_err(p->dev, + "Cannot fix out-of-spec reply !\n"); + return -EPROTO; + } + } + return 0; } @@ -255,7 +279,6 @@ iter_clk_describe_process_response(const struct scmi_protocol_handle *ph, *rate = RATE_TO_U64(r->rate[st->loop_idx]); p->clk->list.num_rates++; - //XXX dev_dbg(ph->dev, "Rate %llu Hz\n", *rate); } return ret; @@ -266,9 +289,7 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id, struct scmi_clock_info *clk) { int ret; - void *iter; - struct scmi_msg_clock_describe_rates *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_clk_describe_prepare_message, .update_state = iter_clk_describe_update_state, @@ -277,11 +298,13 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id, struct scmi_clk_ipriv cpriv = { .clk_id = clk_id, .clk = clk, + .dev = ph->dev, }; iter = ph->hops->iter_response_init(ph, &ops, SCMI_MAX_NUM_RATES, CLOCK_DESCRIBE_RATES, - sizeof(*msg), &cpriv); + sizeof(struct scmi_msg_clock_describe_rates), + &cpriv); if (IS_ERR(iter)) return PTR_ERR(iter); diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index c1922bd650ae2c65bb64351146c7fe0916341975..8b7ac6663d57de6e8824aac6d0aa4e8509670912 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1223,6 +1223,7 @@ static int scmi_iterator_run(void *iter) if (ret) break; + st->rx_len = i->t->rx.len; ret = iops->update_state(st, i->resp, i->priv); if (ret) break; diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c index b503c22cfd326831cb612ae2d1dde004e3b0b0a9..8abace56b95885da4ccb9d52488083996c234e9e 100644 --- a/drivers/firmware/arm_scmi/optee.c +++ b/drivers/firmware/arm_scmi/optee.c @@ -117,6 +117,7 @@ struct scmi_optee_channel { u32 channel_id; u32 tee_session; u32 caps; + u32 rx_len; struct mutex mu; struct scmi_chan_info *cinfo; union { @@ -302,6 +303,9 @@ static int invoke_process_msg_channel(struct scmi_optee_channel *channel, size_t return -EIO; } + /* Save response size */ + channel->rx_len = param[2].u.memref.size; + return 0; } @@ -353,6 +357,7 @@ static int setup_dynamic_shmem(struct device *dev, struct scmi_optee_channel *ch shbuf = tee_shm_get_va(channel->tee_shm, 0); memset(shbuf, 0, msg_size); channel->req.msg = shbuf; + channel->rx_len = msg_size; return 0; } @@ -508,7 +513,7 @@ static void scmi_optee_fetch_response(struct scmi_chan_info *cinfo, struct scmi_optee_channel *channel = cinfo->transport_info; if (channel->tee_shm) - msg_fetch_response(channel->req.msg, SCMI_OPTEE_MAX_MSG_SIZE, xfer); + msg_fetch_response(channel->req.msg, channel->rx_len, xfer); else shmem_fetch_response(channel->req.shmem, xfer); } diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 8f4051aca2200f6c9964a013f876572279d247e3..bbb0331801ff49274d23f2ea61d582f4f0dbdf7b 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -252,7 +252,7 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->mult_factor = (dom_info->sustained_freq_khz * 1000) / dom_info->sustained_perf_level; - strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); @@ -332,7 +332,6 @@ scmi_perf_describe_levels_get(const struct scmi_protocol_handle *ph, u32 domain, { int ret; void *iter; - struct scmi_msg_perf_describe_levels *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_perf_levels_prepare_message, .update_state = iter_perf_levels_update_state, @@ -345,7 +344,8 @@ scmi_perf_describe_levels_get(const struct scmi_protocol_handle *ph, u32 domain, iter = ph->hops->iter_response_init(ph, &ops, MAX_OPPS, PERF_DESCRIBE_LEVELS, - sizeof(*msg), &ppriv); + sizeof(struct scmi_msg_perf_describe_levels), + &ppriv); if (IS_ERR(iter)) return PTR_ERR(iter); diff --git a/drivers/firmware/arm_scmi/power.c b/drivers/firmware/arm_scmi/power.c index 964882cc87471210080202e416d2ef9a22c11369..356e83631664d965141511f5481cd5ed18fdc7a1 100644 --- a/drivers/firmware/arm_scmi/power.c +++ b/drivers/firmware/arm_scmi/power.c @@ -122,7 +122,7 @@ scmi_power_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->state_set_notify = SUPPORTS_STATE_SET_NOTIFY(flags); dom_info->state_set_async = SUPPORTS_STATE_SET_ASYNC(flags); dom_info->state_set_sync = SUPPORTS_STATE_SET_SYNC(flags); - strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index 73304af5ec4ac49118c4946de0f9d069424fd3a0..51c31379f9b3e85bb4fae72e7dbf4c36767b9b58 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h @@ -24,8 +24,6 @@ #include -#define SCMI_SHORT_NAME_MAX_SIZE 16 - #define PROTOCOL_REV_MINOR_MASK GENMASK(15, 0) #define PROTOCOL_REV_MAJOR_MASK GENMASK(31, 16) #define PROTOCOL_REV_MAJOR(x) ((u16)(FIELD_GET(PROTOCOL_REV_MAJOR_MASK, (x)))) @@ -181,6 +179,8 @@ struct scmi_protocol_handle { * @max_resources: Maximum acceptable number of items, configured by the caller * depending on the underlying resources that it is querying. * @loop_idx: The iterator loop index in the current multi-part reply. + * @rx_len: Size in bytes of the currenly processed message; it can be used by + * the user of the iterator to verify a reply size. * @priv: Optional pointer to some additional state-related private data setup * by the caller during the iterations. */ @@ -190,6 +190,7 @@ struct scmi_iterator_state { unsigned int num_remaining; unsigned int max_resources; unsigned int loop_idx; + size_t rx_len; void *priv; }; diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c index a420a91020940e5deb69886f33746277219709b7..673f3eb498f43483c1ece184c98d8c4b3ce5aef0 100644 --- a/drivers/firmware/arm_scmi/reset.c +++ b/drivers/firmware/arm_scmi/reset.c @@ -116,7 +116,7 @@ scmi_reset_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->latency_us = le32_to_cpu(attr->latency); if (dom_info->latency_us == U32_MAX) dom_info->latency_us = 0; - strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index 21e0ce89b153a75f2b585a316512fe0bf3003294..7288c61178380813cb63fb83e588180c3308ab93 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -338,7 +338,6 @@ static int scmi_sensor_update_intervals(const struct scmi_protocol_handle *ph, struct scmi_sensor_info *s) { void *iter; - struct scmi_msg_sensor_list_update_intervals *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_intervals_prepare_message, .update_state = iter_intervals_update_state, @@ -351,22 +350,28 @@ static int scmi_sensor_update_intervals(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, s->intervals.count, SENSOR_LIST_UPDATE_INTERVALS, - sizeof(*msg), &upriv); + sizeof(struct scmi_msg_sensor_list_update_intervals), + &upriv); if (IS_ERR(iter)) return PTR_ERR(iter); return ph->hops->iter_response_run(iter); } +struct scmi_apriv { + bool any_axes_support_extended_names; + struct scmi_sensor_info *s; +}; + static void iter_axes_desc_prepare_message(void *message, const unsigned int desc_index, const void *priv) { struct scmi_msg_sensor_axis_description_get *msg = message; - const struct scmi_sensor_info *s = priv; + const struct scmi_apriv *apriv = priv; /* Set the number of sensors to be skipped/already read */ - msg->id = cpu_to_le32(s->id); + msg->id = cpu_to_le32(apriv->s->id); msg->axis_desc_index = cpu_to_le32(desc_index); } @@ -393,19 +398,21 @@ iter_axes_desc_process_response(const struct scmi_protocol_handle *ph, u32 attrh, attrl; struct scmi_sensor_axis_info *a; size_t dsize = SCMI_MSG_RESP_AXIS_DESCR_BASE_SZ; - struct scmi_sensor_info *s = priv; + struct scmi_apriv *apriv = priv; const struct scmi_axis_descriptor *adesc = st->priv; attrl = le32_to_cpu(adesc->attributes_low); + if (SUPPORTS_EXTENDED_AXIS_NAMES(attrl)) + apriv->any_axes_support_extended_names = true; - a = &s->axis[st->desc_index + st->loop_idx]; + a = &apriv->s->axis[st->desc_index + st->loop_idx]; a->id = le32_to_cpu(adesc->id); a->extended_attrs = SUPPORTS_EXTEND_ATTRS(attrl); attrh = le32_to_cpu(adesc->attributes_high); a->scale = S32_EXT(SENSOR_SCALE(attrh)); a->type = SENSOR_TYPE(attrh); - strscpy(a->name, adesc->name, SCMI_MAX_STR_SIZE); + strscpy(a->name, adesc->name, SCMI_SHORT_NAME_MAX_SIZE); if (a->extended_attrs) { unsigned int ares = le32_to_cpu(adesc->resolution); @@ -444,10 +451,19 @@ iter_axes_extended_name_process_response(const struct scmi_protocol_handle *ph, void *priv) { struct scmi_sensor_axis_info *a; - const struct scmi_sensor_info *s = priv; + const struct scmi_apriv *apriv = priv; struct scmi_sensor_axis_name_descriptor *adesc = st->priv; + u32 axis_id = le32_to_cpu(adesc->axis_id); - a = &s->axis[st->desc_index + st->loop_idx]; + if (axis_id >= st->max_resources) + return -EPROTO; + + /* + * Pick the corresponding descriptor based on the axis_id embedded + * in the reply since the list of axes supporting extended names + * can be a subset of all the axes. + */ + a = &apriv->s->axis[axis_id]; strscpy(a->name, adesc->name, SCMI_MAX_STR_SIZE); st->priv = ++adesc; @@ -458,21 +474,36 @@ static int scmi_sensor_axis_extended_names_get(const struct scmi_protocol_handle *ph, struct scmi_sensor_info *s) { + int ret; void *iter; - struct scmi_msg_sensor_axis_description_get *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_axes_desc_prepare_message, .update_state = iter_axes_extended_name_update_state, .process_response = iter_axes_extended_name_process_response, }; + struct scmi_apriv apriv = { + .any_axes_support_extended_names = false, + .s = s, + }; iter = ph->hops->iter_response_init(ph, &ops, s->num_axis, SENSOR_AXIS_NAME_GET, - sizeof(*msg), s); + sizeof(struct scmi_msg_sensor_axis_description_get), + &apriv); if (IS_ERR(iter)) return PTR_ERR(iter); - return ph->hops->iter_response_run(iter); + /* + * Do not cause whole protocol initialization failure when failing to + * get extended names for axes. + */ + ret = ph->hops->iter_response_run(iter); + if (ret) + dev_warn(ph->dev, + "Failed to get axes extended names for %s (ret:%d).\n", + s->name, ret); + + return 0; } static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, @@ -481,12 +512,15 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, { int ret; void *iter; - struct scmi_msg_sensor_axis_description_get *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_axes_desc_prepare_message, .update_state = iter_axes_desc_update_state, .process_response = iter_axes_desc_process_response, }; + struct scmi_apriv apriv = { + .any_axes_support_extended_names = false, + .s = s, + }; s->axis = devm_kcalloc(ph->dev, s->num_axis, sizeof(*s->axis), GFP_KERNEL); @@ -495,7 +529,8 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, s->num_axis, SENSOR_AXIS_DESCRIPTION_GET, - sizeof(*msg), s); + sizeof(struct scmi_msg_sensor_axis_description_get), + &apriv); if (IS_ERR(iter)) return PTR_ERR(iter); @@ -503,7 +538,8 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, if (ret) return ret; - if (PROTOCOL_REV_MAJOR(version) >= 0x3) + if (PROTOCOL_REV_MAJOR(version) >= 0x3 && + apriv.any_axes_support_extended_names) ret = scmi_sensor_axis_extended_names_get(ph, s); return ret; @@ -598,7 +634,7 @@ iter_sens_descr_process_response(const struct scmi_protocol_handle *ph, SUPPORTS_AXIS(attrh) ? SENSOR_AXIS_NUMBER(attrh) : 0, SCMI_MAX_NUM_SENSOR_AXIS); - strscpy(s->name, sdesc->name, SCMI_MAX_STR_SIZE); + strscpy(s->name, sdesc->name, SCMI_SHORT_NAME_MAX_SIZE); /* * If supported overwrite short name with the extended diff --git a/drivers/firmware/arm_scmi/voltage.c b/drivers/firmware/arm_scmi/voltage.c index 9d195d8719ab3d4d7a2059ac97e88802875af40b..eaa8d944926a36ce5d40a08bf9bda48811285f06 100644 --- a/drivers/firmware/arm_scmi/voltage.c +++ b/drivers/firmware/arm_scmi/voltage.c @@ -180,7 +180,6 @@ static int scmi_voltage_levels_get(const struct scmi_protocol_handle *ph, { int ret; void *iter; - struct scmi_msg_cmd_describe_levels *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_volt_levels_prepare_message, .update_state = iter_volt_levels_update_state, @@ -193,7 +192,8 @@ static int scmi_voltage_levels_get(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, v->num_levels, VOLTAGE_DESCRIBE_LEVELS, - sizeof(*msg), &vpriv); + sizeof(struct scmi_msg_cmd_describe_levels), + &vpriv); if (IS_ERR(iter)) return PTR_ERR(iter); @@ -225,15 +225,14 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph, /* Retrieve domain attributes at first ... */ put_unaligned_le32(dom, td->tx.buf); - ret = ph->xops->do_xfer(ph, td); /* Skip domain on comms error */ - if (ret) + if (ph->xops->do_xfer(ph, td)) continue; v = vinfo->domains + dom; v->id = dom; attributes = le32_to_cpu(resp_dom->attr); - strlcpy(v->name, resp_dom->name, SCMI_MAX_STR_SIZE); + strscpy(v->name, resp_dom->name, SCMI_SHORT_NAME_MAX_SIZE); /* * If supported overwrite short name with the extended one; @@ -249,12 +248,8 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph, v->async_level_set = true; } - ret = scmi_voltage_levels_get(ph, v); /* Skip invalid voltage descriptors */ - if (ret) - continue; - - ph->xops->reset_rx_to_maxsz(ph, td); + scmi_voltage_levels_get(ph, v); } ph->xops->xfer_put(ph, td); diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c index 73089a24f04b637c886b5177cdd852791ae04d83..ceae84c19d226d911fbed1e891fdb87183a1bd4a 100644 --- a/drivers/firmware/efi/reboot.c +++ b/drivers/firmware/efi/reboot.c @@ -6,7 +6,7 @@ #include #include -static void (*orig_pm_power_off)(void); +static struct sys_off_handler *efi_sys_off_handler; int efi_reboot_quirk_mode = -1; @@ -51,15 +51,11 @@ bool __weak efi_poweroff_required(void) return false; } -static void efi_power_off(void) +static int efi_power_off(struct sys_off_data *data) { efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); - /* - * The above call should not return, if it does fall back to - * the original power off method (typically ACPI poweroff). - */ - if (orig_pm_power_off) - orig_pm_power_off(); + + return NOTIFY_DONE; } static int __init efi_shutdown_init(void) @@ -68,8 +64,13 @@ static int __init efi_shutdown_init(void) return -ENODEV; if (efi_poweroff_required()) { - orig_pm_power_off = pm_power_off; - pm_power_off = efi_power_off; + /* SYS_OFF_PRIO_FIRMWARE + 1 so that it runs before acpi_power_off */ + efi_sys_off_handler = + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_FIRMWARE + 1, + efi_power_off, NULL); + if (IS_ERR(efi_sys_off_handler)) + return PTR_ERR(efi_sys_off_handler); } return 0; diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index 4c7c9dd7733f92a8c2c9e7d15badb92289003e58..7882d4b3f2be4ba298df766fb1d108cf717c5f88 100644 --- a/drivers/firmware/efi/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -26,8 +26,6 @@ #include #include